From a9198b8a06ff0393e0b1cbfcecdc876925c0d239 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Tue, 2 Jul 2024 23:36:35 +0800 Subject: [PATCH 1/4] [SDAG] Add pre-commit tests from PR96366. NFC. --- llvm/test/CodeGen/AArch64/pr96366.ll | 23 +++++++++++++++++++++++ llvm/test/CodeGen/RISCV/pr96366.ll | 25 +++++++++++++++++++++++++ 2 files changed, 48 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/pr96366.ll create mode 100644 llvm/test/CodeGen/RISCV/pr96366.ll diff --git a/llvm/test/CodeGen/AArch64/pr96366.ll b/llvm/test/CodeGen/AArch64/pr96366.ll new file mode 100644 index 00000000000000..fd13197ca396f5 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/pr96366.ll @@ -0,0 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=aarch64 | FileCheck %s + +declare void @mumble(i32) + +define i32 @f(i32 %0) nounwind { +; CHECK-LABEL: f: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: bl mumble +; CHECK-NEXT: mov w0, #4 // =0x4 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %2 = sub nuw i32 0, %0 + call void @mumble(i32 %2) + %3 = sub i32 1, %0 + %4 = sub i32 3, %0 + %5 = mul i32 %0, 1 + %6 = add i32 %3, %5 + %7 = add i32 %6, %4 + ret i32 %7 +} diff --git a/llvm/test/CodeGen/RISCV/pr96366.ll b/llvm/test/CodeGen/RISCV/pr96366.ll new file mode 100644 index 00000000000000..712d07b3599e0d --- /dev/null +++ b/llvm/test/CodeGen/RISCV/pr96366.ll @@ -0,0 +1,25 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=riscv64 | FileCheck %s + +declare void @mumble(i32) + +define i32 @f(i32 %0) nounwind { +; CHECK-LABEL: f: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; CHECK-NEXT: li a0, 0 +; CHECK-NEXT: call mumble +; CHECK-NEXT: li a0, 4 +; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %2 = sub nuw i32 0, %0 + call void @mumble(i32 %2) + %3 = sub i32 1, %0 + %4 = sub i32 3, %0 + %5 = mul i32 %0, 1 + %6 = add i32 %3, %5 + %7 = add i32 %6, %4 + ret i32 %7 +} From 10d216a51ed8eff6a9c57f3a5907853407090aa4 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Tue, 2 Jul 2024 23:44:16 +0800 Subject: [PATCH 2/4] [SDAG] Intersect poison-generating flags after CSE --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 1 + llvm/test/CodeGen/AArch64/pr96366.ll | 10 ++++++---- llvm/test/CodeGen/RISCV/pr96366.ll | 6 +++++- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index bc16f885f6a046..96242305e9eaba 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1239,6 +1239,7 @@ SelectionDAG::AddModifiedNodeToCSEMaps(SDNode *N) { // If there was already an existing matching node, use ReplaceAllUsesWith // to replace the dead one with the existing one. This can cause // recursive merging of other unrelated nodes down the line. + Existing->intersectFlagsWith(N->getFlags()); ReplaceAllUsesWith(N, Existing); // N is now dead. Inform the listeners and delete it. diff --git a/llvm/test/CodeGen/AArch64/pr96366.ll b/llvm/test/CodeGen/AArch64/pr96366.ll index fd13197ca396f5..392b7b66b0bb15 100644 --- a/llvm/test/CodeGen/AArch64/pr96366.ll +++ b/llvm/test/CodeGen/AArch64/pr96366.ll @@ -6,11 +6,13 @@ declare void @mumble(i32) define i32 @f(i32 %0) nounwind { ; CHECK-LABEL: f: ; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT: mov w19, w0 +; CHECK-NEXT: neg w0, w0 ; CHECK-NEXT: bl mumble -; CHECK-NEXT: mov w0, #4 // =0x4 -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: mov w8, #4 // =0x4 +; CHECK-NEXT: sub w0, w8, w19 +; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ret %2 = sub nuw i32 0, %0 call void @mumble(i32 %2) diff --git a/llvm/test/CodeGen/RISCV/pr96366.ll b/llvm/test/CodeGen/RISCV/pr96366.ll index 712d07b3599e0d..05563d7bc14af0 100644 --- a/llvm/test/CodeGen/RISCV/pr96366.ll +++ b/llvm/test/CodeGen/RISCV/pr96366.ll @@ -8,10 +8,14 @@ define i32 @f(i32 %0) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; CHECK-NEXT: li a0, 0 +; CHECK-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; CHECK-NEXT: mv s0, a0 +; CHECK-NEXT: negw a0, a0 ; CHECK-NEXT: call mumble ; CHECK-NEXT: li a0, 4 +; CHECK-NEXT: subw a0, a0, s0 ; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 0(sp) # 8-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %2 = sub nuw i32 0, %0 From 1108057d2c990ebe68fa7afe87bcb5953a289da1 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Wed, 3 Jul 2024 01:19:08 +0800 Subject: [PATCH 3/4] [SDAG] Address review comments. NFC. --- llvm/test/CodeGen/AArch64/pr96366.ll | 22 +++++++++++----------- llvm/test/CodeGen/RISCV/pr96366.ll | 22 +++++++++++----------- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/pr96366.ll b/llvm/test/CodeGen/AArch64/pr96366.ll index 392b7b66b0bb15..0a5d87c7f9bbf7 100644 --- a/llvm/test/CodeGen/AArch64/pr96366.ll +++ b/llvm/test/CodeGen/AArch64/pr96366.ll @@ -1,25 +1,25 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -mtriple=aarch64 | FileCheck %s -declare void @mumble(i32) +declare void @use(i32) -define i32 @f(i32 %0) nounwind { +define i32 @f(i32 %x) nounwind { ; CHECK-LABEL: f: ; CHECK: // %bb.0: ; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: mov w19, w0 ; CHECK-NEXT: neg w0, w0 -; CHECK-NEXT: bl mumble +; CHECK-NEXT: bl use ; CHECK-NEXT: mov w8, #4 // =0x4 ; CHECK-NEXT: sub w0, w8, w19 ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ret - %2 = sub nuw i32 0, %0 - call void @mumble(i32 %2) - %3 = sub i32 1, %0 - %4 = sub i32 3, %0 - %5 = mul i32 %0, 1 - %6 = add i32 %3, %5 - %7 = add i32 %6, %4 - ret i32 %7 + %sub1 = sub nuw i32 0, %x + call void @use(i32 %sub1) + %sub2 = sub i32 1, %x + %sub3 = sub i32 3, %x + %mul = mul i32 %x, 1 + %add1 = add i32 %sub2, %mul + %add2 = add i32 %add1, %sub3 + ret i32 %add2 } diff --git a/llvm/test/CodeGen/RISCV/pr96366.ll b/llvm/test/CodeGen/RISCV/pr96366.ll index 05563d7bc14af0..8c6fd5bfb6cb05 100644 --- a/llvm/test/CodeGen/RISCV/pr96366.ll +++ b/llvm/test/CodeGen/RISCV/pr96366.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -mtriple=riscv64 | FileCheck %s -declare void @mumble(i32) +declare void @use(i32) -define i32 @f(i32 %0) nounwind { +define i32 @f(i32 %x) nounwind { ; CHECK-LABEL: f: ; CHECK: # %bb.0: ; CHECK-NEXT: addi sp, sp, -16 @@ -11,19 +11,19 @@ define i32 @f(i32 %0) nounwind { ; CHECK-NEXT: sd s0, 0(sp) # 8-byte Folded Spill ; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: negw a0, a0 -; CHECK-NEXT: call mumble +; CHECK-NEXT: call use ; CHECK-NEXT: li a0, 4 ; CHECK-NEXT: subw a0, a0, s0 ; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 0(sp) # 8-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret - %2 = sub nuw i32 0, %0 - call void @mumble(i32 %2) - %3 = sub i32 1, %0 - %4 = sub i32 3, %0 - %5 = mul i32 %0, 1 - %6 = add i32 %3, %5 - %7 = add i32 %6, %4 - ret i32 %7 + %sub1 = sub nuw i32 0, %x + call void @use(i32 %sub1) + %sub2 = sub i32 1, %x + %sub3 = sub i32 3, %x + %mul = mul i32 %x, 1 + %add1 = add i32 %sub2, %mul + %add2 = add i32 %add1, %sub3 + ret i32 %add2 } From 8bb2460fbe5f25dfc6d9c7717bbb00c7928978d3 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Wed, 3 Jul 2024 16:06:48 +0800 Subject: [PATCH 4/4] [SDAG] Fix tests. NFC. --- llvm/test/CodeGen/AMDGPU/dagcombine-fma-crash.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-fma-crash.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-fma-crash.ll index 8b1a6878136a98..ddb635cabbab15 100644 --- a/llvm/test/CodeGen/AMDGPU/dagcombine-fma-crash.ll +++ b/llvm/test/CodeGen/AMDGPU/dagcombine-fma-crash.ll @@ -90,7 +90,7 @@ define float @test2(float %arg, float %arg1) { ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 1120534528 ; CHECK-NEXT: [[V_FMAC_F32_e64_:%[0-9]+]]:vgpr_32 = nsz contract reassoc nofpexcept V_FMAC_F32_e64 0, [[COPY]], 0, killed [[S_MOV_B32_]], 0, [[V_MOV_B32_e32_]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 0 - ; CHECK-NEXT: [[V_FMAC_F32_e64_1:%[0-9]+]]:vgpr_32 = nsz contract reassoc nofpexcept V_FMAC_F32_e64 0, [[COPY1]], 0, killed [[S_MOV_B32_1]], 0, [[V_FMAC_F32_e64_]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_FMAC_F32_e64_1:%[0-9]+]]:vgpr_32 = nsz contract nofpexcept V_FMAC_F32_e64 0, [[COPY1]], 0, killed [[S_MOV_B32_1]], 0, [[V_FMAC_F32_e64_]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nsz contract reassoc nofpexcept V_ADD_F32_e64 0, [[V_FMAC_F32_e64_1]], 0, [[V_MOV_B32_e32_]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: [[V_RCP_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_RCP_F32_e64 0, [[V_FMAC_F32_e64_1]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: [[V_RCP_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_RCP_F32_e64 0, killed [[V_ADD_F32_e64_]], 0, 0, implicit $mode, implicit $exec