From be4b6dca7bbbef189e7fcc52fe021b3729c79d81 Mon Sep 17 00:00:00 2001 From: Michal Paszkowski Date: Mon, 10 Jun 2024 03:51:18 -0700 Subject: [PATCH 1/2] [SPIR-V] Don't change switch condition type in CodeGen opts This change makes sure the preferresd switch condition int type size remains the same throughout CodeGen optimizations. The change fixes running several OpenCL applications with -O2 or higher opt levels, and fixes Basic/stream/stream_max_stmt_exceed.cpp DPC++ E2E test with -O2. --- llvm/lib/Target/SPIRV/SPIRVISelLowering.h | 5 +++++ .../optimizations/switch-condition-type.ll | 18 ++++++++++++++++++ 2 files changed, 23 insertions(+) create mode 100644 llvm/test/CodeGen/SPIRV/optimizations/switch-condition-type.ll diff --git a/llvm/lib/Target/SPIRV/SPIRVISelLowering.h b/llvm/lib/Target/SPIRV/SPIRVISelLowering.h index 6fc200abf46279..77356b7512a739 100644 --- a/llvm/lib/Target/SPIRV/SPIRVISelLowering.h +++ b/llvm/lib/Target/SPIRV/SPIRVISelLowering.h @@ -68,6 +68,11 @@ class SPIRVTargetLowering : public TargetLowering { // extra instructions required to preserve validity of SPIR-V code imposed by // the standard. void finalizeLowering(MachineFunction &MF) const override; + + MVT getPreferredSwitchConditionType(LLVMContext &Context, + EVT ConditionVT) const override { + return ConditionVT.getSimpleVT(); + } }; } // namespace llvm diff --git a/llvm/test/CodeGen/SPIRV/optimizations/switch-condition-type.ll b/llvm/test/CodeGen/SPIRV/optimizations/switch-condition-type.ll new file mode 100644 index 00000000000000..054520d2021b99 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/optimizations/switch-condition-type.ll @@ -0,0 +1,18 @@ +; RUN: llc -O2 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O2 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK: %[[#INT16:]] = OpTypeInt 16 0 +; CHECK: %[[#PARAM:]] = OpFunctionParameter %[[#INT16]] +; CHECK: OpSwitch %[[#PARAM]] %[[#]] 1 %[[#]] 2 %[[#]] + +define i32 @test_switch(i16 %cond) { +entry: + switch i16 %cond, label %default [ i16 1, label %case_one + i16 2, label %case_two ] +case_one: + ret i32 1 +case_two: + ret i32 2 +default: + ret i32 3 +} From 6c2d18295b42804cbbb94e8d85cb93e7cca52d1d Mon Sep 17 00:00:00 2001 From: Michal Paszkowski Date: Mon, 10 Jun 2024 07:31:31 -0700 Subject: [PATCH 2/2] [SPIR-V] Lower llvm.x.with.overflow intrinsics This patch introduces lowering for the remaining llvm.x.with.overflow intrinsics. The proposed implementation does not rely on OpIAddCarry for sadd_with_overflow and equivalent for ssub_with_overflow. Also, the patch changes the ordering of running the SPIRVPrepareFunctions pass, so that it runs after CodeGenPrepare. The changes push further the compilation of vector/scalar_access.cpp and other DPC++ E2E tests with -O2 or higher optimization levels. --- .../Target/SPIRV/SPIRVPrepareFunctions.cpp | 102 +++++++++++++----- llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp | 2 +- llvm/test/CodeGen/SPIRV/assume.ll | 8 +- .../llvm-intrinsics/smul.with.overflow.ll | 54 ++++++++++ .../llvm-intrinsics/umul.with.overflow.ll | 2 +- 5 files changed, 135 insertions(+), 33 deletions(-) create mode 100644 llvm/test/CodeGen/SPIRV/llvm-intrinsics/smul.with.overflow.ll diff --git a/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp b/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp index 7bee87d7204ede..4c3b6a6a247373 100644 --- a/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp @@ -342,26 +342,70 @@ static void lowerFunnelShifts(IntrinsicInst *FSHIntrinsic) { FSHIntrinsic->setCalledFunction(FSHFunc); } -static void buildUMulWithOverflowFunc(Function *UMulFunc) { - // The function body is already created. - if (!UMulFunc->empty()) +static void buildArithWithOverflowFunc(Function *Func, Intrinsic::ID ID) { + if (!Func->empty()) return; - BasicBlock *EntryBB = BasicBlock::Create(UMulFunc->getParent()->getContext(), - "entry", UMulFunc); + BasicBlock *EntryBB = + BasicBlock::Create(Func->getParent()->getContext(), "entry", Func); IRBuilder<> IRB(EntryBB); - // Build the actual unsigned multiplication logic with the overflow - // indication. Do unsigned multiplication Mul = A * B. Then check - // if unsigned division Div = Mul / A is not equal to B. If so, - // then overflow has happened. - Value *Mul = IRB.CreateNUWMul(UMulFunc->getArg(0), UMulFunc->getArg(1)); - Value *Div = IRB.CreateUDiv(Mul, UMulFunc->getArg(0)); - Value *Overflow = IRB.CreateICmpNE(UMulFunc->getArg(0), Div); - - // umul.with.overflow intrinsic return a structure, where the first element - // is the multiplication result, and the second is an overflow bit. - Type *StructTy = UMulFunc->getReturnType(); - Value *Agg = IRB.CreateInsertValue(PoisonValue::get(StructTy), Mul, {0}); + Value *LHS = Func->getArg(0); + Value *RHS = Func->getArg(1); + + Value *Result; + Value *Overflow; + Type *StructTy = Func->getReturnType(); + + switch (ID) { + case Intrinsic::smul_with_overflow: + Result = IRB.CreateNSWMul(LHS, RHS); + Overflow = IRB.CreateICmpNE(IRB.CreateSDiv(Result, LHS), RHS); + break; + case Intrinsic::umul_with_overflow: + Result = IRB.CreateNUWMul(LHS, RHS); + Overflow = IRB.CreateICmpNE(IRB.CreateUDiv(Result, LHS), RHS); + break; + case Intrinsic::sadd_with_overflow: + // TODO: Implement using OpIAddCarry + Result = IRB.CreateNSWAdd(LHS, RHS); + // Overflow if (LHS > 0 && RHS > 0 && Result < 0) || (LHS < 0 && RHS < 0 && + // Result > 0) + Overflow = IRB.CreateOr( + IRB.CreateAnd(IRB.CreateAnd(IRB.CreateICmpSGT(LHS, IRB.getInt32(0)), + IRB.CreateICmpSGT(RHS, IRB.getInt32(0))), + IRB.CreateICmpSLT(Result, IRB.getInt32(0))), + IRB.CreateAnd(IRB.CreateAnd(IRB.CreateICmpSLT(LHS, IRB.getInt32(0)), + IRB.CreateICmpSLT(RHS, IRB.getInt32(0))), + IRB.CreateICmpSGT(Result, IRB.getInt32(0)))); + break; + case Intrinsic::uadd_with_overflow: + Result = IRB.CreateNUWAdd(LHS, RHS); + // Overflow occurs if the result is less than either of the operands. + Overflow = IRB.CreateICmpULT(Result, LHS); + break; + case Intrinsic::ssub_with_overflow: + Result = IRB.CreateNSWSub(LHS, RHS); + // Overflow if (LHS < 0 && RHS > 0 && Result > 0) || (LHS > 0 && RHS < 0 && + // Result < 0) + Overflow = IRB.CreateOr( + IRB.CreateAnd(IRB.CreateAnd(IRB.CreateICmpSLT(LHS, IRB.getInt32(0)), + IRB.CreateICmpSGT(RHS, IRB.getInt32(0))), + IRB.CreateICmpSGT(Result, IRB.getInt32(0))), + IRB.CreateAnd(IRB.CreateAnd(IRB.CreateICmpSGT(LHS, IRB.getInt32(0)), + IRB.CreateICmpSLT(RHS, IRB.getInt32(0))), + IRB.CreateICmpSLT(Result, IRB.getInt32(0)))); + break; + case Intrinsic::usub_with_overflow: + Result = IRB.CreateNUWSub(LHS, RHS); + // Overflow occurs if the result is greater than the left-hand-side operand. + Overflow = IRB.CreateICmpUGT(Result, LHS); + break; + + default: + llvm_unreachable("Unsupported arithmetic with overflow intrinsic."); + } + + Value *Agg = IRB.CreateInsertValue(PoisonValue::get(StructTy), Result, {0}); Value *Res = IRB.CreateInsertValue(Agg, Overflow, {1}); IRB.CreateRet(Res); } @@ -407,18 +451,17 @@ static bool toSpvOverloadedIntrinsic(IntrinsicInst *II, Intrinsic::ID NewID, return true; } -static void lowerUMulWithOverflow(IntrinsicInst *UMulIntrinsic) { +static void lowerArithWithOverflow(IntrinsicInst *ArithIntrinsic) { // Get a separate function - otherwise, we'd have to rework the CFG of the // current one. Then simply replace the intrinsic uses with a call to the new // function. - Module *M = UMulIntrinsic->getModule(); - FunctionType *UMulFuncTy = UMulIntrinsic->getFunctionType(); - Type *FSHLRetTy = UMulFuncTy->getReturnType(); - const std::string FuncName = lowerLLVMIntrinsicName(UMulIntrinsic); - Function *UMulFunc = - getOrCreateFunction(M, FSHLRetTy, UMulFuncTy->params(), FuncName); - buildUMulWithOverflowFunc(UMulFunc); - UMulIntrinsic->setCalledFunction(UMulFunc); + Module *M = ArithIntrinsic->getModule(); + FunctionType *FuncTy = ArithIntrinsic->getFunctionType(); + Type *RetTy = FuncTy->getReturnType(); + const std::string FuncName = lowerLLVMIntrinsicName(ArithIntrinsic); + Function *Func = getOrCreateFunction(M, RetTy, FuncTy->params(), FuncName); + buildArithWithOverflowFunc(Func, ArithIntrinsic->getIntrinsicID()); + ArithIntrinsic->setCalledFunction(Func); } // Substitutes calls to LLVM intrinsics with either calls to SPIR-V intrinsics @@ -444,8 +487,13 @@ bool SPIRVPrepareFunctions::substituteIntrinsicCalls(Function *F) { lowerFunnelShifts(II); Changed = true; break; + case Intrinsic::sadd_with_overflow: + case Intrinsic::smul_with_overflow: + case Intrinsic::ssub_with_overflow: + case Intrinsic::uadd_with_overflow: case Intrinsic::umul_with_overflow: - lowerUMulWithOverflow(II); + case Intrinsic::usub_with_overflow: + lowerArithWithOverflow(II); Changed = true; break; case Intrinsic::assume: diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp index 52fc6f33b4ef14..845113dd48650e 100644 --- a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp @@ -177,11 +177,11 @@ void SPIRVPassConfig::addIRPasses() { TargetPassConfig::addIRPasses(); addPass(createSPIRVRegularizerPass()); - addPass(createSPIRVPrepareFunctionsPass(TM)); addPass(createSPIRVStripConvergenceIntrinsicsPass()); } void SPIRVPassConfig::addISelPrepare() { + addPass(createSPIRVPrepareFunctionsPass(TM)); addPass(createSPIRVEmitIntrinsicsPass(&getTM())); TargetPassConfig::addISelPrepare(); } diff --git a/llvm/test/CodeGen/SPIRV/assume.ll b/llvm/test/CodeGen/SPIRV/assume.ll index fbf12ef184a891..88ff74535bde7e 100644 --- a/llvm/test/CodeGen/SPIRV/assume.ll +++ b/llvm/test/CodeGen/SPIRV/assume.ll @@ -1,7 +1,7 @@ -; RUN: llc -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_KHR_expect_assume < %s | FileCheck --check-prefixes=EXT,CHECK %s -; RUN: llc -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_KHR_expect_assume < %s | FileCheck --check-prefixes=EXT,CHECK %s -; RUN: llc -mtriple=spirv32-unknown-unknown < %s | FileCheck --check-prefixes=NOEXT,CHECK %s -; RUN: llc -mtriple=spirv64-unknown-unknown < %s | FileCheck --check-prefixes=NOEXT,CHECK %s +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_KHR_expect_assume < %s | FileCheck --check-prefixes=EXT,CHECK %s +; RUN: llc -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_KHR_expect_assume < %s | FileCheck --check-prefixes=EXT,CHECK %s +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown < %s | FileCheck --check-prefixes=NOEXT,CHECK %s +; RUN: llc -O0 -mtriple=spirv64-unknown-unknown < %s | FileCheck --check-prefixes=NOEXT,CHECK %s ; EXT: OpCapability ExpectAssumeKHR ; EXT-NEXT: OpExtension "SPV_KHR_expect_assume" diff --git a/llvm/test/CodeGen/SPIRV/llvm-intrinsics/smul.with.overflow.ll b/llvm/test/CodeGen/SPIRV/llvm-intrinsics/smul.with.overflow.ll new file mode 100644 index 00000000000000..b336e22a145b49 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/llvm-intrinsics/smul.with.overflow.ll @@ -0,0 +1,54 @@ +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV + +; CHECK-SPIRV: OpName %[[#NAME_SMUL_FUNC_8:]] "spirv.llvm_smul_with_overflow_i8" +; CHECK-SPIRV: OpName %[[#NAME_SMUL_FUNC_32:]] "spirv.llvm_smul_with_overflow_i32" +; CHECK-SPIRV: OpName %[[#NAME_SMUL_FUNC_VEC_I64:]] "spirv.llvm_smul_with_overflow_v2i64" + +define dso_local spir_func void @_Z4foo8hhPh(i8 zeroext %a, i8 zeroext %b, i8* nocapture %c) local_unnamed_addr { +entry: + ; CHECK-SPIRV: %[[#]] = OpFunctionCall %[[#]] %[[#NAME_SMUL_FUNC_8]] + %smul = tail call { i8, i1 } @llvm.smul.with.overflow.i8(i8 %a, i8 %b) + %cmp = extractvalue { i8, i1 } %smul, 1 + %smul.value = extractvalue { i8, i1 } %smul, 0 + %storemerge = select i1 %cmp, i8 0, i8 %smul.value + store i8 %storemerge, i8* %c, align 1 + ret void +} + +define dso_local spir_func void @_Z5foo32jjPj(i32 %a, i32 %b, i32* nocapture %c) local_unnamed_addr { +entry: + ; CHECK-SPIRV: %[[#]] = OpFunctionCall %[[#]] %[[#NAME_SMUL_FUNC_32]] + %smul = tail call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %b, i32 %a) + %smul.val = extractvalue { i32, i1 } %smul, 0 + %smul.ov = extractvalue { i32, i1 } %smul, 1 + %spec.select = select i1 %smul.ov, i32 0, i32 %smul.val + store i32 %spec.select, i32* %c, align 4 + ret void +} + +define dso_local spir_func void @smulo_v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64>* %p) nounwind { + ; CHECK-SPIRV: %[[#]] = OpFunctionCall %[[#]] %[[#NAME_SMUL_FUNC_VEC_I64]] + %smul = call {<2 x i64>, <2 x i1>} @llvm.smul.with.overflow.v2i64(<2 x i64> %a, <2 x i64> %b) + %smul.val = extractvalue {<2 x i64>, <2 x i1>} %smul, 0 + %smul.ov = extractvalue {<2 x i64>, <2 x i1>} %smul, 1 + %zero = alloca <2 x i64>, align 16 + %spec.select = select <2 x i1> %smul.ov, <2 x i64> , <2 x i64> %smul.val + store <2 x i64> %spec.select, <2 x i64>* %p + ret void +} + +; CHECK-SPIRV: %[[#NAME_SMUL_FUNC_8]] = OpFunction %[[#]] +; CHECK-SPIRV: %[[#VAR_A:]] = OpFunctionParameter %[[#]] +; CHECK-SPIRV: %[[#VAR_B:]] = OpFunctionParameter %[[#]] +; CHECK-SPIRV: %[[#MUL_RES:]] = OpIMul %[[#]] %[[#VAR_A]] %[[#VAR_B]] +; CHECK-SPIRV: %[[#DIV_RES:]] = OpSDiv %[[#]] %[[#MUL_RES]] %[[#VAR_A]] +; CHECK-SPIRV: %[[#CMP_RES:]] = OpINotEqual %[[#]] %[[#DIV_RES]] %[[#VAR_B]] +; CHECK-SPIRV: %[[#INSERT_RES:]] = OpCompositeInsert %[[#]] %[[#MUL_RES]] +; CHECK-SPIRV: %[[#INSERT_RES_1:]] = OpCompositeInsert %[[#]] %[[#CMP_RES]] %[[#INSERT_RES]] +; CHECK-SPIRV: OpReturnValue %[[#INSERT_RES_1]] + +declare { i8, i1 } @llvm.smul.with.overflow.i8(i8, i8) + +declare { i32, i1 } @llvm.smul.with.overflow.i32(i32, i32) + +declare {<2 x i64>, <2 x i1>} @llvm.smul.with.overflow.v2i64(<2 x i64>, <2 x i64>) diff --git a/llvm/test/CodeGen/SPIRV/llvm-intrinsics/umul.with.overflow.ll b/llvm/test/CodeGen/SPIRV/llvm-intrinsics/umul.with.overflow.ll index 406a23fa7d3df5..7a4137b875fd26 100644 --- a/llvm/test/CodeGen/SPIRV/llvm-intrinsics/umul.with.overflow.ll +++ b/llvm/test/CodeGen/SPIRV/llvm-intrinsics/umul.with.overflow.ll @@ -42,7 +42,7 @@ define dso_local spir_func void @umulo_v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i6 ; CHECK-SPIRV: %[[#VAR_B:]] = OpFunctionParameter %[[#]] ; CHECK-SPIRV: %[[#MUL_RES:]] = OpIMul %[[#]] %[[#VAR_A]] %[[#VAR_B]] ; CHECK-SPIRV: %[[#DIV_RES:]] = OpUDiv %[[#]] %[[#MUL_RES]] %[[#VAR_A]] -; CHECK-SPIRV: %[[#CMP_RES:]] = OpINotEqual %[[#]] %[[#VAR_A]] %[[#DIV_RES]] +; CHECK-SPIRV: %[[#CMP_RES:]] = OpINotEqual %[[#]] %[[#DIV_RES]] %[[#VAR_B]] ; CHECK-SPIRV: %[[#INSERT_RES:]] = OpCompositeInsert %[[#]] %[[#MUL_RES]] ; CHECK-SPIRV: %[[#INSERT_RES_1:]] = OpCompositeInsert %[[#]] %[[#CMP_RES]] %[[#INSERT_RES]] ; CHECK-SPIRV: OpReturnValue %[[#INSERT_RES_1]]