diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index b39fbdc26795c9..ddce80b2ae129e 100644 --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -2117,6 +2117,9 @@ void SILoadStoreOptimizer::processBaseWithConstOffset(const MachineOperand &Base BaseLo = *Src0; } + if (!BaseLo.isReg()) + return; + Src0 = TII->getNamedOperand(*BaseHiDef, AMDGPU::OpName::src0); Src1 = TII->getNamedOperand(*BaseHiDef, AMDGPU::OpName::src1); @@ -2129,6 +2132,9 @@ void SILoadStoreOptimizer::processBaseWithConstOffset(const MachineOperand &Base uint64_t Offset1 = Src1->getImm(); BaseHi = *Src0; + if (!BaseHi.isReg()) + return; + Addr.Base.LoReg = BaseLo.getReg(); Addr.Base.HiReg = BaseHi.getReg(); Addr.Base.LoSubReg = BaseLo.getSubReg(); diff --git a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll index cf7efed46cef55..ad1f790457de97 100644 --- a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll +++ b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll @@ -2562,4 +2562,99 @@ entry: ret void } +define amdgpu_kernel void @negativeoffsetnullptr(ptr %buffer) { +; GFX8-LABEL: negativeoffsetnullptr: +; GFX8: ; %bb.0: ; %entry +; GFX8-NEXT: s_load_dword s1, s[2:3], 0xec +; GFX8-NEXT: s_add_u32 s0, 0, -1 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_addc_u32 s1, s1, -1 +; GFX8-NEXT: v_mov_b32_e32 v0, s0 +; GFX8-NEXT: v_mov_b32_e32 v1, s1 +; GFX8-NEXT: flat_load_ubyte v0, v[0:1] +; GFX8-NEXT: s_mov_b64 s[0:1], 0 +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_cmp_eq_u16_e32 vcc, 0, v0 +; GFX8-NEXT: .LBB8_1: ; %branch +; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX8-NEXT: s_and_b64 s[2:3], exec, vcc +; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] +; GFX8-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX8-NEXT: s_cbranch_execnz .LBB8_1 +; GFX8-NEXT: ; %bb.2: ; %end +; GFX8-NEXT: s_endpgm +; +; GFX9-LABEL: negativeoffsetnullptr: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_mov_b64 s[0:1], src_private_base +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: v_add_co_u32_e64 v0, vcc, -1, 0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] +; GFX9-NEXT: s_mov_b64 s[0:1], 0 +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_eq_u16_e32 vcc, 0, v0 +; GFX9-NEXT: .LBB8_1: ; %branch +; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX9-NEXT: s_and_b64 s[2:3], exec, vcc +; GFX9-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] +; GFX9-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX9-NEXT: s_cbranch_execnz .LBB8_1 +; GFX9-NEXT: ; %bb.2: ; %end +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: negativeoffsetnullptr: +; GFX10: ; %bb.0: ; %entry +; GFX10-NEXT: s_mov_b64 s[0:1], src_private_base +; GFX10-NEXT: s_add_u32 s0, 0, -1 +; GFX10-NEXT: s_addc_u32 s1, s1, -1 +; GFX10-NEXT: v_mov_b32_e32 v0, s0 +; GFX10-NEXT: v_mov_b32_e32 v1, s1 +; GFX10-NEXT: s_mov_b32 s0, 0 +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0, v0 +; GFX10-NEXT: .LBB8_1: ; %branch +; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX10-NEXT: s_and_b32 s1, exec_lo, vcc_lo +; GFX10-NEXT: s_or_b32 s0, s1, s0 +; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s0 +; GFX10-NEXT: s_cbranch_execnz .LBB8_1 +; GFX10-NEXT: ; %bb.2: ; %end +; GFX10-NEXT: s_endpgm +; +; GFX11-LABEL: negativeoffsetnullptr: +; GFX11: ; %bb.0: ; %entry +; GFX11-NEXT: s_mov_b64 s[0:1], src_private_base +; GFX11-NEXT: v_add_co_u32 v0, s0, -1, 0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 +; GFX11-NEXT: s_mov_b32 s0, 0 +; GFX11-NEXT: flat_load_u8 v0, v[0:1] +; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0, v0 +; GFX11-NEXT: .LBB8_1: ; %branch +; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX11-NEXT: s_and_b32 s1, exec_lo, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: s_or_b32 s0, s1, s0 +; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s0 +; GFX11-NEXT: s_cbranch_execnz .LBB8_1 +; GFX11-NEXT: ; %bb.2: ; %end +; GFX11-NEXT: s_endpgm +entry: + %null = select i1 false, ptr %buffer, ptr addrspacecast (ptr addrspace(5) null to ptr) + %gep = getelementptr i8, ptr %null, i64 -1 + %ld = load i8, ptr %gep + %cmp = icmp eq i8 %ld, 0 + br label %branch + +branch: + br i1 %cmp, label %end, label %branch + +end: + ret void +} + + attributes #0 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } diff --git a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir index 6dda1fe1f39da5..aa32e21b169394 100644 --- a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir +++ b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir @@ -447,3 +447,22 @@ body: | %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1 %15:vgpr_32 = FLAT_ATOMIC_ADD_RTN %13:vreg_64, %0.sub0, 0, 0, implicit $exec, implicit $flat_scr ... + +--- +# GCN-LABEL: name: negative_offset_nullptr +# GCN: V_ADD_CO_U32_e64 -1, 0, 0 +# GCN: V_ADDC_U32_e64 -1, %{{[0-9]+}}, %{{[0-9]+}}, 0 + +name: negative_offset_nullptr +body: | + bb.0: + %0:sreg_64 = S_MOV_B64 $src_private_base + %1:sreg_32 = S_MOV_B32 0 + %2:sreg_64 = REG_SEQUENCE %1, %subreg.sub0, %0.sub1, %subreg.sub1 + %3:vgpr_32, %4:sreg_64_xexec = V_ADD_CO_U32_e64 -1, 0, 0, implicit $exec + %5:vgpr_32 = COPY %2.sub1 + %6:vgpr_32, %7:sreg_64 = V_ADDC_U32_e64 -1, %5, %4, 0, implicit $exec + %8:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %6, %subreg.sub1 + %9:vgpr_32 = FLAT_LOAD_UBYTE %8, 0, 0, implicit $exec, implicit $flat_scr + S_ENDPGM 0, implicit %9 +...