Skip to content

Commit

Permalink
[RISCV][GISel] Implement canLowerReturn. (llvm#105465)
Browse files Browse the repository at this point in the history
This allows us to handle return values that are too large to fit in x10
and x11. They will be converted to a sret by passing a pointer to where
to store the return value.
  • Loading branch information
topperc authored and dmpolukhin committed Sep 2, 2024
1 parent e44c983 commit 735d136
Show file tree
Hide file tree
Showing 4 changed files with 363 additions and 30 deletions.
87 changes: 57 additions & 30 deletions llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "RISCVMachineFunctionInfo.h"
#include "RISCVSubtarget.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/MachineFrameInfo.h"

Expand Down Expand Up @@ -360,13 +361,7 @@ static bool isSupportedArgumentType(Type *T, const RISCVSubtarget &Subtarget,
// lowerCall.
static bool isSupportedReturnType(Type *T, const RISCVSubtarget &Subtarget,
bool IsLowerRetVal = false) {
// TODO: Integers larger than 2*XLen are passed indirectly which is not
// supported yet.
if (T->isIntegerTy())
return T->getIntegerBitWidth() <= Subtarget.getXLen() * 2;
if (T->isHalfTy() || T->isFloatTy() || T->isDoubleTy())
return true;
if (T->isPointerTy())
if (T->isIntegerTy() || T->isFloatingPointTy() || T->isPointerTy())
return true;

if (T->isArrayTy())
Expand Down Expand Up @@ -394,10 +389,13 @@ bool RISCVCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
assert(!Val == VRegs.empty() && "Return value without a vreg");
MachineInstrBuilder Ret = MIRBuilder.buildInstrNoInsert(RISCV::PseudoRET);

if (!VRegs.empty()) {
if (!FLI.CanLowerReturn) {
insertSRetStores(MIRBuilder, Val->getType(), VRegs, FLI.DemoteRegister);
} else if (!VRegs.empty()) {
const RISCVSubtarget &Subtarget =
MIRBuilder.getMF().getSubtarget<RISCVSubtarget>();
if (!isSupportedReturnType(Val->getType(), Subtarget, /*IsLowerRetVal=*/true))
if (!isSupportedReturnType(Val->getType(), Subtarget,
/*IsLowerRetVal=*/true))
return false;

MachineFunction &MF = MIRBuilder.getMF();
Expand All @@ -418,14 +416,38 @@ bool RISCVCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
/*IsRet=*/true, Dispatcher);
RISCVOutgoingValueHandler Handler(MIRBuilder, MF.getRegInfo(), Ret);
if (!determineAndHandleAssignments(Handler, Assigner, SplitRetInfos,
MIRBuilder, CC, F.isVarArg()))
MIRBuilder, CC, F.isVarArg()))
return false;
}

MIRBuilder.insertInstr(Ret);
return true;
}

bool RISCVCallLowering::canLowerReturn(MachineFunction &MF,
CallingConv::ID CallConv,
SmallVectorImpl<BaseArgInfo> &Outs,
bool IsVarArg) const {
SmallVector<CCValAssign, 16> ArgLocs;
const auto &TLI = *getTLI<RISCVTargetLowering>();
CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs,
MF.getFunction().getContext());

RVVArgDispatcher Dispatcher{&MF, &TLI,
ArrayRef(MF.getFunction().getReturnType())};

RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();

for (unsigned I = 0, E = Outs.size(); I < E; ++I) {
MVT VT = MVT::getVT(Outs[I].Ty);
if (RISCV::CC_RISCV(MF.getDataLayout(), ABI, I, VT, VT, CCValAssign::Full,
Outs[I].Flags[0], CCInfo, /*IsFixed=*/true,
/*isRet=*/true, nullptr, TLI, Dispatcher))
return false;
}
return true;
}

/// If there are varargs that were passed in a0-a7, the data in those registers
/// must be copied to the varargs save area on the stack.
void RISCVCallLowering::saveVarArgRegisters(
Expand Down Expand Up @@ -498,24 +520,26 @@ bool RISCVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
const Function &F,
ArrayRef<ArrayRef<Register>> VRegs,
FunctionLoweringInfo &FLI) const {
// Early exit if there are no arguments. varargs are not part of F.args() but
// must be lowered.
if (F.arg_empty() && !F.isVarArg())
return true;
MachineFunction &MF = MIRBuilder.getMF();

const RISCVSubtarget &Subtarget =
MIRBuilder.getMF().getSubtarget<RISCVSubtarget>();
const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
for (auto &Arg : F.args()) {
if (!isSupportedArgumentType(Arg.getType(), Subtarget,
/*IsLowerArgs=*/true))
return false;
}

MachineFunction &MF = MIRBuilder.getMF();
MachineRegisterInfo &MRI = MF.getRegInfo();
const DataLayout &DL = MF.getDataLayout();
CallingConv::ID CC = F.getCallingConv();

SmallVector<ArgInfo, 32> SplitArgInfos;

// Insert the hidden sret parameter if the return value won't fit in the
// return registers.
if (!FLI.CanLowerReturn)
insertSRetIncomingArgument(F, SplitArgInfos, FLI.DemoteRegister, MRI, DL);

SmallVector<Type *, 4> TypeList;
unsigned Index = 0;
for (auto &Arg : F.args()) {
Expand Down Expand Up @@ -625,21 +649,24 @@ bool RISCVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
*Subtarget.getRegBankInfo(), *Call,
Call->getDesc(), Call->getOperand(0), 0);

if (Info.OrigRet.Ty->isVoidTy())
return true;
if (Info.CanLowerReturn && !Info.OrigRet.Ty->isVoidTy()) {
SmallVector<ArgInfo, 4> SplitRetInfos;
splitToValueTypes(Info.OrigRet, SplitRetInfos, DL, CC);

SmallVector<ArgInfo, 4> SplitRetInfos;
splitToValueTypes(Info.OrigRet, SplitRetInfos, DL, CC);
RVVArgDispatcher RetDispatcher{&MF, getTLI<RISCVTargetLowering>(),
ArrayRef(F.getReturnType())};
RISCVIncomingValueAssigner RetAssigner(
CC == CallingConv::Fast ? RISCV::CC_RISCV_FastCC : RISCV::CC_RISCV,
/*IsRet=*/true, RetDispatcher);
RISCVCallReturnHandler RetHandler(MIRBuilder, MF.getRegInfo(), Call);
if (!determineAndHandleAssignments(RetHandler, RetAssigner, SplitRetInfos,
MIRBuilder, CC, Info.IsVarArg))
return false;
}

RVVArgDispatcher RetDispatcher{&MF, getTLI<RISCVTargetLowering>(),
ArrayRef(F.getReturnType())};
RISCVIncomingValueAssigner RetAssigner(
CC == CallingConv::Fast ? RISCV::CC_RISCV_FastCC : RISCV::CC_RISCV,
/*IsRet=*/true, RetDispatcher);
RISCVCallReturnHandler RetHandler(MIRBuilder, MF.getRegInfo(), Call);
if (!determineAndHandleAssignments(RetHandler, RetAssigner, SplitRetInfos,
MIRBuilder, CC, Info.IsVarArg))
return false;
if (!Info.CanLowerReturn)
insertSRetLoads(MIRBuilder, Info.OrigRet.Ty, Info.OrigRet.Regs,
Info.DemoteRegister, Info.DemoteStackIndex);

return true;
}
4 changes: 4 additions & 0 deletions llvm/lib/Target/RISCV/GISel/RISCVCallLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ class RISCVCallLowering : public CallLowering {
ArrayRef<Register> VRegs,
FunctionLoweringInfo &FLI) const override;

bool canLowerReturn(MachineFunction &MF, CallingConv::ID CallConv,
SmallVectorImpl<BaseArgInfo> &Outs,
bool IsVarArg) const override;

bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
ArrayRef<ArrayRef<Register>> VRegs,
FunctionLoweringInfo &FLI) const override;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -945,6 +945,54 @@ define i32 @caller_small_struct_ret() nounwind {
ret i32 %5
}

; Check return of >2x xlen scalars

define fp128 @callee_large_scalar_ret() nounwind {
; RV32I-LABEL: name: callee_large_scalar_ret
; RV32I: bb.1 (%ir-block.0):
; RV32I-NEXT: liveins: $x10
; RV32I-NEXT: {{ $}}
; RV32I-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
; RV32I-NEXT: [[C:%[0-9]+]]:_(s128) = G_FCONSTANT fp128 0xL00000000000000007FFF000000000000
; RV32I-NEXT: G_STORE [[C]](s128), [[COPY]](p0) :: (store (s128))
; RV32I-NEXT: PseudoRET
ret fp128 0xL00000000000000007FFF000000000000
}

define void @caller_large_scalar_ret() nounwind {
; ILP32-LABEL: name: caller_large_scalar_ret
; ILP32: bb.1 (%ir-block.0):
; ILP32-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
; ILP32-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
; ILP32-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
; ILP32-NEXT: PseudoCALL target-flags(riscv-call) @callee_large_scalar_ret, csr_ilp32_lp64, implicit-def $x1, implicit $x10
; ILP32-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
; ILP32-NEXT: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s128) from %stack.0)
; ILP32-NEXT: PseudoRET
;
; ILP32F-LABEL: name: caller_large_scalar_ret
; ILP32F: bb.1 (%ir-block.0):
; ILP32F-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
; ILP32F-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
; ILP32F-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
; ILP32F-NEXT: PseudoCALL target-flags(riscv-call) @callee_large_scalar_ret, csr_ilp32f_lp64f, implicit-def $x1, implicit $x10
; ILP32F-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
; ILP32F-NEXT: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s128) from %stack.0)
; ILP32F-NEXT: PseudoRET
;
; ILP32D-LABEL: name: caller_large_scalar_ret
; ILP32D: bb.1 (%ir-block.0):
; ILP32D-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
; ILP32D-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
; ILP32D-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
; ILP32D-NEXT: PseudoCALL target-flags(riscv-call) @callee_large_scalar_ret, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10
; ILP32D-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
; ILP32D-NEXT: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s128) from %stack.0)
; ILP32D-NEXT: PseudoRET
%1 = call fp128 @callee_large_scalar_ret()
ret void
}

; Check return of >2x xlen structs

%struct.large = type { i32, i32, i32, i32 }
Expand Down Expand Up @@ -1033,3 +1081,106 @@ define i32 @caller_large_struct_ret() nounwind {
%5 = add i32 %2, %4
ret i32 %5
}

%struct.large2 = type { i32, float, i16, i32 }

define %struct.large2 @callee_large_struct_ret2() nounwind {
; RV32I-LABEL: name: callee_large_struct_ret2
; RV32I: bb.1 (%ir-block.0):
; RV32I-NEXT: liveins: $x10
; RV32I-NEXT: {{ $}}
; RV32I-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
; RV32I-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; RV32I-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; RV32I-NEXT: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; RV32I-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; RV32I-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.000000e+00
; RV32I-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 3
; RV32I-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; RV32I-NEXT: G_STORE [[C]](s32), [[COPY]](p0) :: (store (s32), align 8)
; RV32I-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; RV32I-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s32)
; RV32I-NEXT: G_STORE [[C1]](s32), [[PTR_ADD]](p0) :: (store (s32))
; RV32I-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; RV32I-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s32)
; RV32I-NEXT: G_STORE [[C2]](s16), [[PTR_ADD1]](p0) :: (store (s16), align 8)
; RV32I-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
; RV32I-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s32)
; RV32I-NEXT: G_STORE [[C3]](s32), [[PTR_ADD2]](p0) :: (store (s32))
; RV32I-NEXT: PseudoRET
%a = insertvalue %struct.large2 poison, i32 1, 0
%b = insertvalue %struct.large2 %a, float 2.0, 1
%c = insertvalue %struct.large2 %b, i16 3, 2
%d = insertvalue %struct.large2 %c, i32 4, 3
ret %struct.large2 %d
}

define i32 @caller_large_struct_ret2() nounwind {
; ILP32-LABEL: name: caller_large_struct_ret2
; ILP32: bb.1 (%ir-block.0):
; ILP32-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
; ILP32-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
; ILP32-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
; ILP32-NEXT: PseudoCALL target-flags(riscv-call) @callee_large_struct_ret, csr_ilp32_lp64, implicit-def $x1, implicit $x10
; ILP32-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
; ILP32-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %stack.0, align 8)
; ILP32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; ILP32-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C]](s32)
; ILP32-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from %stack.0)
; ILP32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; ILP32-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s32)
; ILP32-NEXT: [[LOAD2:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from %stack.0, align 8)
; ILP32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
; ILP32-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C2]](s32)
; ILP32-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from %stack.0)
; ILP32-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD]], [[LOAD3]]
; ILP32-NEXT: $x10 = COPY [[ADD]](s32)
; ILP32-NEXT: PseudoRET implicit $x10
;
; ILP32F-LABEL: name: caller_large_struct_ret2
; ILP32F: bb.1 (%ir-block.0):
; ILP32F-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
; ILP32F-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
; ILP32F-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
; ILP32F-NEXT: PseudoCALL target-flags(riscv-call) @callee_large_struct_ret, csr_ilp32f_lp64f, implicit-def $x1, implicit $x10
; ILP32F-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
; ILP32F-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %stack.0, align 8)
; ILP32F-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; ILP32F-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C]](s32)
; ILP32F-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from %stack.0)
; ILP32F-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; ILP32F-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s32)
; ILP32F-NEXT: [[LOAD2:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from %stack.0, align 8)
; ILP32F-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
; ILP32F-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C2]](s32)
; ILP32F-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from %stack.0)
; ILP32F-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD]], [[LOAD3]]
; ILP32F-NEXT: $x10 = COPY [[ADD]](s32)
; ILP32F-NEXT: PseudoRET implicit $x10
;
; ILP32D-LABEL: name: caller_large_struct_ret2
; ILP32D: bb.1 (%ir-block.0):
; ILP32D-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
; ILP32D-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
; ILP32D-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
; ILP32D-NEXT: PseudoCALL target-flags(riscv-call) @callee_large_struct_ret, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10
; ILP32D-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
; ILP32D-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %stack.0, align 8)
; ILP32D-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; ILP32D-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C]](s32)
; ILP32D-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from %stack.0)
; ILP32D-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; ILP32D-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s32)
; ILP32D-NEXT: [[LOAD2:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from %stack.0, align 8)
; ILP32D-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
; ILP32D-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C2]](s32)
; ILP32D-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from %stack.0)
; ILP32D-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD]], [[LOAD3]]
; ILP32D-NEXT: $x10 = COPY [[ADD]](s32)
; ILP32D-NEXT: PseudoRET implicit $x10
%1 = call %struct.large2 @callee_large_struct_ret()
%2 = extractvalue %struct.large2 %1, 0
%3 = extractvalue %struct.large2 %1, 3
%4 = add i32 %2, %3
ret i32 %4
}
Loading

0 comments on commit 735d136

Please sign in to comment.