Skip to content

Commit

Permalink
[AMDGPU] add missing checks in processBaseWithConstOffset
Browse files Browse the repository at this point in the history
  • Loading branch information
tgymnich committed Aug 8, 2024
1 parent 59531cf commit eeb6de6
Show file tree
Hide file tree
Showing 3 changed files with 131 additions and 0 deletions.
6 changes: 6 additions & 0 deletions llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2117,6 +2117,9 @@ void SILoadStoreOptimizer::processBaseWithConstOffset(const MachineOperand &Base
BaseLo = *Src0;
}

if (!BaseLo.isReg())
return;

Src0 = TII->getNamedOperand(*BaseHiDef, AMDGPU::OpName::src0);
Src1 = TII->getNamedOperand(*BaseHiDef, AMDGPU::OpName::src1);

Expand All @@ -2129,6 +2132,9 @@ void SILoadStoreOptimizer::processBaseWithConstOffset(const MachineOperand &Base
uint64_t Offset1 = Src1->getImm();
BaseHi = *Src0;

if (!BaseHi.isReg())
return;

Addr.Base.LoReg = BaseLo.getReg();
Addr.Base.HiReg = BaseHi.getReg();
Addr.Base.LoSubReg = BaseLo.getSubReg();
Expand Down
95 changes: 95 additions & 0 deletions llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2562,4 +2562,99 @@ entry:
ret void
}

define amdgpu_kernel void @negativeoffsetnullptr(ptr %buffer) {
; GFX8-LABEL: negativeoffsetnullptr:
; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_load_dword s1, s[2:3], 0xec
; GFX8-NEXT: s_add_u32 s0, 0, -1
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_addc_u32 s1, s1, -1
; GFX8-NEXT: v_mov_b32_e32 v0, s0
; GFX8-NEXT: v_mov_b32_e32 v1, s1
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
; GFX8-NEXT: s_mov_b64 s[0:1], 0
; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_cmp_eq_u16_e32 vcc, 0, v0
; GFX8-NEXT: .LBB8_1: ; %branch
; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX8-NEXT: s_and_b64 s[2:3], exec, vcc
; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
; GFX8-NEXT: s_andn2_b64 exec, exec, s[0:1]
; GFX8-NEXT: s_cbranch_execnz .LBB8_1
; GFX8-NEXT: ; %bb.2: ; %end
; GFX8-NEXT: s_endpgm
;
; GFX9-LABEL: negativeoffsetnullptr:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX9-NEXT: v_mov_b32_e32 v1, s1
; GFX9-NEXT: v_add_co_u32_e64 v0, vcc, -1, 0
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX9-NEXT: flat_load_ubyte v0, v[0:1]
; GFX9-NEXT: s_mov_b64 s[0:1], 0
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_cmp_eq_u16_e32 vcc, 0, v0
; GFX9-NEXT: .LBB8_1: ; %branch
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX9-NEXT: s_and_b64 s[2:3], exec, vcc
; GFX9-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
; GFX9-NEXT: s_andn2_b64 exec, exec, s[0:1]
; GFX9-NEXT: s_cbranch_execnz .LBB8_1
; GFX9-NEXT: ; %bb.2: ; %end
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: negativeoffsetnullptr:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX10-NEXT: s_add_u32 s0, 0, -1
; GFX10-NEXT: s_addc_u32 s1, s1, -1
; GFX10-NEXT: v_mov_b32_e32 v0, s0
; GFX10-NEXT: v_mov_b32_e32 v1, s1
; GFX10-NEXT: s_mov_b32 s0, 0
; GFX10-NEXT: flat_load_ubyte v0, v[0:1]
; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0, v0
; GFX10-NEXT: .LBB8_1: ; %branch
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX10-NEXT: s_and_b32 s1, exec_lo, vcc_lo
; GFX10-NEXT: s_or_b32 s0, s1, s0
; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s0
; GFX10-NEXT: s_cbranch_execnz .LBB8_1
; GFX10-NEXT: ; %bb.2: ; %end
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: negativeoffsetnullptr:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX11-NEXT: v_add_co_u32 v0, s0, -1, 0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0
; GFX11-NEXT: s_mov_b32 s0, 0
; GFX11-NEXT: flat_load_u8 v0, v[0:1]
; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0, v0
; GFX11-NEXT: .LBB8_1: ; %branch
; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX11-NEXT: s_and_b32 s1, exec_lo, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: s_or_b32 s0, s1, s0
; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s0
; GFX11-NEXT: s_cbranch_execnz .LBB8_1
; GFX11-NEXT: ; %bb.2: ; %end
; GFX11-NEXT: s_endpgm
entry:
%null = select i1 false, ptr %buffer, ptr addrspacecast (ptr addrspace(5) null to ptr)
%gep = getelementptr i8, ptr %null, i64 -1
%ld = load i8, ptr %gep
%cmp = icmp eq i8 %ld, 0
br label %branch

branch:
br i1 %cmp, label %end, label %branch

end:
ret void
}


attributes #0 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
30 changes: 30 additions & 0 deletions llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir
Original file line number Diff line number Diff line change
Expand Up @@ -447,3 +447,33 @@ body: |
%13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
%15:vgpr_32 = FLAT_ATOMIC_ADD_RTN %13:vreg_64, %0.sub0, 0, 0, implicit $exec, implicit $flat_scr
...

---
# GCN-LABEL: name: negative_offset_nullptr
# GCN: V_ADD_CO_U32_e64 -1, 0, 0
# GCN: V_ADDC_U32_e64 -1, %{{[0-9]+}}, %{{[0-9]+}}, 0

name: negative_offset_nullptr
tracksRegLiveness: true
body: |
bb.0.entry:
%15:sreg_64 = S_MOV_B64 $src_private_base
%17:sreg_32 = S_MOV_B32 0
%18:sreg_64 = REG_SEQUENCE %17, %subreg.sub0, %15.sub1, %subreg.sub1
%21:vgpr_32, %22:sreg_64_xexec = V_ADD_CO_U32_e64 -1, 0, 0, implicit $exec
%28:vgpr_32 = COPY %18.sub1
%25:vgpr_32, %26:sreg_64 = V_ADDC_U32_e64 -1, %28, %22, 0, implicit $exec
%29:vreg_64 = REG_SEQUENCE %21, %subreg.sub0, %25, %subreg.sub1
%30:vgpr_32 = FLAT_LOAD_UBYTE %29, 0, 0, implicit $exec, implicit $flat_scr
%32:sreg_64 = V_CMP_EQ_U16_e64 %30, 0, implicit $exec
%14:sreg_64 = S_MOV_B64 0
bb.1:
%1:sreg_64 = PHI %14, %bb.0, %2, %bb.1
%2:sreg_64 = SI_IF_BREAK %32, %1, implicit-def dead $scc
SI_LOOP %2, %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
S_BRANCH %bb.2
bb.2:
SI_END_CF %2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
S_ENDPGM 0

0 comments on commit eeb6de6

Please sign in to comment.