Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AMDGPU: Add tests for atomicrmw handling of new metadata #89248

Merged
merged 5 commits into from
Apr 19, 2024

Conversation

arsenm
Copy link
Contributor

@arsenm arsenm commented Apr 18, 2024

Add baseline tests which should comprehensively test the new atomic metadata. Test codegen / expansion, and preservation in a few transforms.

New metadata defined in #85052

Add baseline tests which should comprehensively test the new
atomic metadata. Test codegen / expansion, and preservation
in a few transforms.

New metadata defined in llvm#89248
@llvmbot
Copy link
Collaborator

llvmbot commented Apr 18, 2024

@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)

Changes

Add baseline tests which should comprehensively test the new atomic metadata. Test codegen / expansion, and preservation in a few transforms.

New metadata defined in #85052


Patch is 1.49 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/89248.diff

16 Files Affected:

  • (modified) llvm/test/CodeGen/AMDGPU/flat_atomics_i32_system.ll (+1744-417)
  • (modified) llvm/test/CodeGen/AMDGPU/flat_atomics_i64_system.ll (+1875-393)
  • (modified) llvm/test/CodeGen/AMDGPU/global_atomics_i32_system.ll (+1918-463)
  • (modified) llvm/test/CodeGen/AMDGPU/global_atomics_i64_system.ll (+1961-367)
  • (modified) llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll (+741)
  • (modified) llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll (+672)
  • (modified) llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll (+671)
  • (added) llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f32-system.ll (+3717)
  • (added) llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f64-system.ll (+1685)
  • (added) llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i32-system.ll (+828)
  • (added) llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i64-system.ll (+828)
  • (added) llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-v2bf16-system.ll (+859)
  • (added) llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-v2f16-system.ll (+859)
  • (modified) llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomicrmw-integer-ops-0-to-add-0.ll (+10)
  • (modified) llvm/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll (+10)
  • (added) llvm/test/Transforms/Inline/AMDGPU/inline-atomicrmw-md-preserve.ll (+30)
diff --git a/llvm/test/CodeGen/AMDGPU/flat_atomics_i32_system.ll b/llvm/test/CodeGen/AMDGPU/flat_atomics_i32_system.ll
index 94956511c39dfb..961273468e75ff 100644
--- a/llvm/test/CodeGen/AMDGPU/flat_atomics_i32_system.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat_atomics_i32_system.ll
@@ -287,6 +287,72 @@ define amdgpu_gfx i32 @flat_atomic_xchg_i32_ret_offset_scalar(ptr inreg %out, i3
   ret i32 %result
 }
 
+define void @flat_atomic_xchg_i32_noret_offset__amdgpu_no_remote_memory_access(ptr %out, i32 %in) {
+; GCN1-LABEL: flat_atomic_xchg_i32_noret_offset__amdgpu_no_remote_memory_access:
+; GCN1:       ; %bb.0:
+; GCN1-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN1-NEXT:    v_add_i32_e32 v0, vcc, 16, v0
+; GCN1-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GCN1-NEXT:    flat_atomic_swap v[0:1], v2
+; GCN1-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN1-NEXT:    buffer_wbinvl1_vol
+; GCN1-NEXT:    s_setpc_b64 s[30:31]
+;
+; GCN2-LABEL: flat_atomic_xchg_i32_noret_offset__amdgpu_no_remote_memory_access:
+; GCN2:       ; %bb.0:
+; GCN2-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN2-NEXT:    v_add_u32_e32 v0, vcc, 16, v0
+; GCN2-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GCN2-NEXT:    flat_atomic_swap v[0:1], v2
+; GCN2-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN2-NEXT:    buffer_wbinvl1_vol
+; GCN2-NEXT:    s_setpc_b64 s[30:31]
+;
+; GCN3-LABEL: flat_atomic_xchg_i32_noret_offset__amdgpu_no_remote_memory_access:
+; GCN3:       ; %bb.0:
+; GCN3-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN3-NEXT:    flat_atomic_swap v[0:1], v2 offset:16
+; GCN3-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN3-NEXT:    buffer_wbinvl1_vol
+; GCN3-NEXT:    s_setpc_b64 s[30:31]
+  %gep = getelementptr i32, ptr %out, i64 4
+  %tmp0 = atomicrmw xchg ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory.access !0
+  ret void
+}
+
+define i32 @flat_atomic_xchg_i32_ret_offset__amdgpu_no_remote_memory_access(ptr %out, i32 %in) {
+; GCN1-LABEL: flat_atomic_xchg_i32_ret_offset__amdgpu_no_remote_memory_access:
+; GCN1:       ; %bb.0:
+; GCN1-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN1-NEXT:    v_add_i32_e32 v0, vcc, 16, v0
+; GCN1-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GCN1-NEXT:    flat_atomic_swap v0, v[0:1], v2 glc
+; GCN1-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN1-NEXT:    buffer_wbinvl1_vol
+; GCN1-NEXT:    s_setpc_b64 s[30:31]
+;
+; GCN2-LABEL: flat_atomic_xchg_i32_ret_offset__amdgpu_no_remote_memory_access:
+; GCN2:       ; %bb.0:
+; GCN2-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN2-NEXT:    v_add_u32_e32 v0, vcc, 16, v0
+; GCN2-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GCN2-NEXT:    flat_atomic_swap v0, v[0:1], v2 glc
+; GCN2-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN2-NEXT:    buffer_wbinvl1_vol
+; GCN2-NEXT:    s_setpc_b64 s[30:31]
+;
+; GCN3-LABEL: flat_atomic_xchg_i32_ret_offset__amdgpu_no_remote_memory_access:
+; GCN3:       ; %bb.0:
+; GCN3-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN3-NEXT:    flat_atomic_swap v0, v[0:1], v2 offset:16 glc
+; GCN3-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN3-NEXT:    buffer_wbinvl1_vol
+; GCN3-NEXT:    s_setpc_b64 s[30:31]
+  %gep = getelementptr i32, ptr %out, i64 4
+  %result = atomicrmw xchg ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory.access !0
+  ret i32 %result
+}
+
 ; ---------------------------------------------------------------------
 ; atomicrmw xchg f32
 ; ---------------------------------------------------------------------
@@ -571,6 +637,72 @@ define amdgpu_gfx float @flat_atomic_xchg_f32_ret_offset_scalar(ptr inreg %out,
   ret float %result
 }
 
+define void @flat_atomic_xchg_f32_noret_offset__amdgpu_no_remote_memory_access(ptr %out, float %in) {
+; GCN1-LABEL: flat_atomic_xchg_f32_noret_offset__amdgpu_no_remote_memory_access:
+; GCN1:       ; %bb.0:
+; GCN1-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN1-NEXT:    v_add_i32_e32 v0, vcc, 16, v0
+; GCN1-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GCN1-NEXT:    flat_atomic_swap v[0:1], v2
+; GCN1-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN1-NEXT:    buffer_wbinvl1_vol
+; GCN1-NEXT:    s_setpc_b64 s[30:31]
+;
+; GCN2-LABEL: flat_atomic_xchg_f32_noret_offset__amdgpu_no_remote_memory_access:
+; GCN2:       ; %bb.0:
+; GCN2-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN2-NEXT:    v_add_u32_e32 v0, vcc, 16, v0
+; GCN2-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GCN2-NEXT:    flat_atomic_swap v[0:1], v2
+; GCN2-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN2-NEXT:    buffer_wbinvl1_vol
+; GCN2-NEXT:    s_setpc_b64 s[30:31]
+;
+; GCN3-LABEL: flat_atomic_xchg_f32_noret_offset__amdgpu_no_remote_memory_access:
+; GCN3:       ; %bb.0:
+; GCN3-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN3-NEXT:    flat_atomic_swap v[0:1], v2 offset:16
+; GCN3-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN3-NEXT:    buffer_wbinvl1_vol
+; GCN3-NEXT:    s_setpc_b64 s[30:31]
+  %gep = getelementptr float, ptr %out, i64 4
+  %tmp0 = atomicrmw xchg ptr %gep, float %in seq_cst, !amdgpu.no.remote.memory.access !0
+  ret void
+}
+
+define float @flat_atomic_xchg_f32_ret_offset__amdgpu_no_remote_memory_access(ptr %out, float %in) {
+; GCN1-LABEL: flat_atomic_xchg_f32_ret_offset__amdgpu_no_remote_memory_access:
+; GCN1:       ; %bb.0:
+; GCN1-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN1-NEXT:    v_add_i32_e32 v0, vcc, 16, v0
+; GCN1-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GCN1-NEXT:    flat_atomic_swap v0, v[0:1], v2 glc
+; GCN1-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN1-NEXT:    buffer_wbinvl1_vol
+; GCN1-NEXT:    s_setpc_b64 s[30:31]
+;
+; GCN2-LABEL: flat_atomic_xchg_f32_ret_offset__amdgpu_no_remote_memory_access:
+; GCN2:       ; %bb.0:
+; GCN2-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN2-NEXT:    v_add_u32_e32 v0, vcc, 16, v0
+; GCN2-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GCN2-NEXT:    flat_atomic_swap v0, v[0:1], v2 glc
+; GCN2-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN2-NEXT:    buffer_wbinvl1_vol
+; GCN2-NEXT:    s_setpc_b64 s[30:31]
+;
+; GCN3-LABEL: flat_atomic_xchg_f32_ret_offset__amdgpu_no_remote_memory_access:
+; GCN3:       ; %bb.0:
+; GCN3-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN3-NEXT:    flat_atomic_swap v0, v[0:1], v2 offset:16 glc
+; GCN3-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN3-NEXT:    buffer_wbinvl1_vol
+; GCN3-NEXT:    s_setpc_b64 s[30:31]
+  %gep = getelementptr float, ptr %out, i64 4
+  %result = atomicrmw xchg ptr %gep, float %in seq_cst, !amdgpu.no.remote.memory.access !0
+  ret float %result
+}
+
 ; ---------------------------------------------------------------------
 ; atomicrmw add
 ; ---------------------------------------------------------------------
@@ -855,6 +987,72 @@ define amdgpu_gfx i32 @flat_atomic_add_i32_ret_offset_scalar(ptr inreg %out, i32
   ret i32 %result
 }
 
+define void @flat_atomic_add_i32_noret_offset__amdgpu_no_remote_memory_access(ptr %out, i32 %in) {
+; GCN1-LABEL: flat_atomic_add_i32_noret_offset__amdgpu_no_remote_memory_access:
+; GCN1:       ; %bb.0:
+; GCN1-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN1-NEXT:    v_add_i32_e32 v0, vcc, 16, v0
+; GCN1-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GCN1-NEXT:    flat_atomic_add v[0:1], v2
+; GCN1-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN1-NEXT:    buffer_wbinvl1_vol
+; GCN1-NEXT:    s_setpc_b64 s[30:31]
+;
+; GCN2-LABEL: flat_atomic_add_i32_noret_offset__amdgpu_no_remote_memory_access:
+; GCN2:       ; %bb.0:
+; GCN2-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN2-NEXT:    v_add_u32_e32 v0, vcc, 16, v0
+; GCN2-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GCN2-NEXT:    flat_atomic_add v[0:1], v2
+; GCN2-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN2-NEXT:    buffer_wbinvl1_vol
+; GCN2-NEXT:    s_setpc_b64 s[30:31]
+;
+; GCN3-LABEL: flat_atomic_add_i32_noret_offset__amdgpu_no_remote_memory_access:
+; GCN3:       ; %bb.0:
+; GCN3-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN3-NEXT:    flat_atomic_add v[0:1], v2 offset:16
+; GCN3-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN3-NEXT:    buffer_wbinvl1_vol
+; GCN3-NEXT:    s_setpc_b64 s[30:31]
+  %gep = getelementptr i32, ptr %out, i64 4
+  %tmp0 = atomicrmw add ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory.access !0
+  ret void
+}
+
+define i32 @flat_atomic_add_i32_ret_offset__amdgpu_no_remote_memory_access(ptr %out, i32 %in) {
+; GCN1-LABEL: flat_atomic_add_i32_ret_offset__amdgpu_no_remote_memory_access:
+; GCN1:       ; %bb.0:
+; GCN1-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN1-NEXT:    v_add_i32_e32 v0, vcc, 16, v0
+; GCN1-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GCN1-NEXT:    flat_atomic_add v0, v[0:1], v2 glc
+; GCN1-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN1-NEXT:    buffer_wbinvl1_vol
+; GCN1-NEXT:    s_setpc_b64 s[30:31]
+;
+; GCN2-LABEL: flat_atomic_add_i32_ret_offset__amdgpu_no_remote_memory_access:
+; GCN2:       ; %bb.0:
+; GCN2-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN2-NEXT:    v_add_u32_e32 v0, vcc, 16, v0
+; GCN2-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GCN2-NEXT:    flat_atomic_add v0, v[0:1], v2 glc
+; GCN2-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN2-NEXT:    buffer_wbinvl1_vol
+; GCN2-NEXT:    s_setpc_b64 s[30:31]
+;
+; GCN3-LABEL: flat_atomic_add_i32_ret_offset__amdgpu_no_remote_memory_access:
+; GCN3:       ; %bb.0:
+; GCN3-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN3-NEXT:    flat_atomic_add v0, v[0:1], v2 offset:16 glc
+; GCN3-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN3-NEXT:    buffer_wbinvl1_vol
+; GCN3-NEXT:    s_setpc_b64 s[30:31]
+  %gep = getelementptr i32, ptr %out, i64 4
+  %result = atomicrmw add ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory.access !0
+  ret i32 %result
+}
+
 ; ---------------------------------------------------------------------
 ; atomicrmw sub
 ; ---------------------------------------------------------------------
@@ -1139,6 +1337,72 @@ define amdgpu_gfx i32 @flat_atomic_sub_i32_ret_offset_scalar(ptr inreg %out, i32
   ret i32 %result
 }
 
+define void @flat_atomic_sub_i32_noret_offset__amdgpu_no_remote_memory_access(ptr %out, i32 %in) {
+; GCN1-LABEL: flat_atomic_sub_i32_noret_offset__amdgpu_no_remote_memory_access:
+; GCN1:       ; %bb.0:
+; GCN1-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN1-NEXT:    v_add_i32_e32 v0, vcc, 16, v0
+; GCN1-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GCN1-NEXT:    flat_atomic_sub v[0:1], v2
+; GCN1-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN1-NEXT:    buffer_wbinvl1_vol
+; GCN1-NEXT:    s_setpc_b64 s[30:31]
+;
+; GCN2-LABEL: flat_atomic_sub_i32_noret_offset__amdgpu_no_remote_memory_access:
+; GCN2:       ; %bb.0:
+; GCN2-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN2-NEXT:    v_add_u32_e32 v0, vcc, 16, v0
+; GCN2-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GCN2-NEXT:    flat_atomic_sub v[0:1], v2
+; GCN2-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN2-NEXT:    buffer_wbinvl1_vol
+; GCN2-NEXT:    s_setpc_b64 s[30:31]
+;
+; GCN3-LABEL: flat_atomic_sub_i32_noret_offset__amdgpu_no_remote_memory_access:
+; GCN3:       ; %bb.0:
+; GCN3-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN3-NEXT:    flat_atomic_sub v[0:1], v2 offset:16
+; GCN3-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN3-NEXT:    buffer_wbinvl1_vol
+; GCN3-NEXT:    s_setpc_b64 s[30:31]
+  %gep = getelementptr i32, ptr %out, i64 4
+  %tmp0 = atomicrmw sub ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory.access !0
+  ret void
+}
+
+define i32 @flat_atomic_sub_i32_ret_offset__amdgpu_no_remote_memory_access(ptr %out, i32 %in) {
+; GCN1-LABEL: flat_atomic_sub_i32_ret_offset__amdgpu_no_remote_memory_access:
+; GCN1:       ; %bb.0:
+; GCN1-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN1-NEXT:    v_add_i32_e32 v0, vcc, 16, v0
+; GCN1-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GCN1-NEXT:    flat_atomic_sub v0, v[0:1], v2 glc
+; GCN1-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN1-NEXT:    buffer_wbinvl1_vol
+; GCN1-NEXT:    s_setpc_b64 s[30:31]
+;
+; GCN2-LABEL: flat_atomic_sub_i32_ret_offset__amdgpu_no_remote_memory_access:
+; GCN2:       ; %bb.0:
+; GCN2-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN2-NEXT:    v_add_u32_e32 v0, vcc, 16, v0
+; GCN2-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GCN2-NEXT:    flat_atomic_sub v0, v[0:1], v2 glc
+; GCN2-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN2-NEXT:    buffer_wbinvl1_vol
+; GCN2-NEXT:    s_setpc_b64 s[30:31]
+;
+; GCN3-LABEL: flat_atomic_sub_i32_ret_offset__amdgpu_no_remote_memory_access:
+; GCN3:       ; %bb.0:
+; GCN3-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN3-NEXT:    flat_atomic_sub v0, v[0:1], v2 offset:16 glc
+; GCN3-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN3-NEXT:    buffer_wbinvl1_vol
+; GCN3-NEXT:    s_setpc_b64 s[30:31]
+  %gep = getelementptr i32, ptr %out, i64 4
+  %result = atomicrmw sub ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory.access !0
+  ret i32 %result
+}
+
 ; ---------------------------------------------------------------------
 ; atomicrmw and
 ; ---------------------------------------------------------------------
@@ -1423,61 +1687,127 @@ define amdgpu_gfx i32 @flat_atomic_and_i32_ret_offset_scalar(ptr inreg %out, i32
   ret i32 %result
 }
 
-; ---------------------------------------------------------------------
-; atomicrmw nand
-; ---------------------------------------------------------------------
-
-define void @flat_atomic_nand_i32_noret(ptr %ptr, i32 %in) {
-; GCN1-LABEL: flat_atomic_nand_i32_noret:
+define void @flat_atomic_and_i32_noret_offset__amdgpu_no_remote_memory_access(ptr %out, i32 %in) {
+; GCN1-LABEL: flat_atomic_and_i32_noret_offset__amdgpu_no_remote_memory_access:
 ; GCN1:       ; %bb.0:
 ; GCN1-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN1-NEXT:    flat_load_dword v4, v[0:1]
-; GCN1-NEXT:    s_mov_b64 s[4:5], 0
-; GCN1-NEXT:  .LBB40_1: ; %atomicrmw.start
-; GCN1-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GCN1-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN1-NEXT:    v_and_b32_e32 v3, v4, v2
-; GCN1-NEXT:    v_not_b32_e32 v3, v3
-; GCN1-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[3:4] glc
+; GCN1-NEXT:    v_add_i32_e32 v0, vcc, 16, v0
+; GCN1-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GCN1-NEXT:    flat_atomic_and v[0:1], v2
 ; GCN1-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GCN1-NEXT:    buffer_wbinvl1_vol
-; GCN1-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v4
-; GCN1-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
-; GCN1-NEXT:    v_mov_b32_e32 v4, v3
-; GCN1-NEXT:    s_andn2_b64 exec, exec, s[4:5]
-; GCN1-NEXT:    s_cbranch_execnz .LBB40_1
-; GCN1-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GCN1-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GCN1-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GCN2-LABEL: flat_atomic_nand_i32_noret:
+; GCN2-LABEL: flat_atomic_and_i32_noret_offset__amdgpu_no_remote_memory_access:
 ; GCN2:       ; %bb.0:
 ; GCN2-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN2-NEXT:    flat_load_dword v4, v[0:1]
-; GCN2-NEXT:    s_mov_b64 s[4:5], 0
-; GCN2-NEXT:  .LBB40_1: ; %atomicrmw.start
-; GCN2-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GCN2-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN2-NEXT:    v_and_b32_e32 v3, v4, v2
-; GCN2-NEXT:    v_not_b32_e32 v3, v3
-; GCN2-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[3:4] glc
+; GCN2-NEXT:    v_add_u32_e32 v0, vcc, 16, v0
+; GCN2-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GCN2-NEXT:    flat_atomic_and v[0:1], v2
 ; GCN2-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GCN2-NEXT:    buffer_wbinvl1_vol
-; GCN2-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v4
-; GCN2-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
-; GCN2-NEXT:    v_mov_b32_e32 v4, v3
-; GCN2-NEXT:    s_andn2_b64 exec, exec, s[4:5]
-; GCN2-NEXT:    s_cbranch_execnz .LBB40_1
-; GCN2-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GCN2-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GCN2-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GCN3-LABEL: flat_atomic_nand_i32_noret:
+; GCN3-LABEL: flat_atomic_and_i32_noret_offset__amdgpu_no_remote_memory_access:
+; GCN3:       ; %bb.0:
+; GCN3-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN3-NEXT:    flat_atomic_and v[0:1], v2 offset:16
+; GCN3-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN3-NEXT:    buffer_wbinvl1_vol
+; GCN3-NEXT:    s_setpc_b64 s[30:31]
+  %gep = getelementptr i32, ptr %out, i64 4
+  %tmp0 = atomicrmw and ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory.access !0
+  ret void
+}
+
+define i32 @flat_atomic_and_i32_ret_offset__amdgpu_no_remote_memory_access(ptr %out, i32 %in) {
+; GCN1-LABEL: flat_atomic_and_i32_ret_offset__amdgpu_no_remote_memory_access:
+; GCN1:       ; %bb.0:
+; GCN1-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN1-NEXT:    v_add_i32_e32 v0, vcc, 16, v0
+; GCN1-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GCN1-NEXT:    flat_atomic_and v0, v[0:1], v2 glc
+; GCN1-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN1-NEXT:    buffer_wbinvl1_vol
+; GCN1-NEXT:    s_setpc_b64 s[30:31]
+;
+; GCN2-LABEL: flat_atomic_and_i32_ret_offset__amdgpu_no_remote_memory_access:
+; GCN2:       ; %bb.0:
+; GCN2-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN2-NEXT:    v_add_u32_e32 v0, vcc, 16, v0
+; GCN2-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GCN2-NEXT:    flat_atomic_and v0, v[0:1], v2 glc
+; GCN2-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN2-NEXT:    buffer_wbinvl1_vol
+; GCN2-NEXT:    s_setpc_b64 s[30:31]
+;
+; GCN3-LABEL: flat_atomic_and_i32_ret_offset__amdgpu_no_remote_memory_access:
+; GCN3:       ; %bb.0:
+; GCN3-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN3-NEXT:    flat_atomic_and v0, v[0:1], v2 offset:16 glc
+; GCN3-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN3-NEXT:    buffer_wbinvl1_vol
+; GCN3-NEXT:    s_setpc_b64 s[30:31]
+  %gep = getelementptr i32, ptr %out, i64 4
+  %result = atomicrmw and ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory.access !0
+  ret i32 %result
+}
+
+; ---------------------------------------------------------------------
+; atomicrmw nand
+; ---------------------------------------------------------------------
+
+define void @flat_atomic_nand_i32_noret(ptr %ptr, i32 %in) {
+; GCN1-LABEL: flat_atomic_nand_i32_noret:
+; GCN1:       ; %bb.0:
+; GCN1-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN1-NEXT:    flat_load_dword v4, v[0:1]
+; GCN1-NEXT:    s_mov_b64 s[4:5], 0
+; GCN1-NEXT:  .LBB50_1: ; %atomicrmw.start
+; GCN1-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GCN1-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN1-NEXT:    v_and_b32_e32 v3, v4, v2
+; GCN1-NEXT:    v_not_b32_e32 v3, v3
+; GCN1-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[3:4] glc
+; GCN1-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN1-NEXT:    buffer_wbinvl1_vol
+; GCN1-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v4
+; GCN1-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
+; GCN1-NEXT:    v_mov_b32_e32 v4, v3
+; GCN1-NEXT:    s_andn2_b64 exec, exec, s[4:5]
+; GCN1-NEXT:    s_cbranch_execnz .LBB50_1
+; GCN1-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GCN1-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GCN1-NEXT:    s_setpc_b64 s[30:31]
+;
+; GCN2-LABEL: flat_atomic_nand_i32_noret:
+; GCN2:       ; %bb.0:
+; GCN2-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN2-NEXT:    flat_load_dword v4, v[0:1]
+; GCN2-NEXT:    s_mov_b64 s[4:5], 0
+; GCN2-NEXT:  .LBB50_1: ; %atomicrmw.start
+; GCN2-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GCN2-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN2-NEXT:    v_and_b32_e32 v3, v4, v2
+; GCN2-NEXT:    v_not_b32_e32 v3, v3
+; GCN2-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[3:4] glc
+; GCN2-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN2-NEXT:    buffer_wbinvl1_vol
+; GCN2-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v4
+; GCN2-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
+; GCN2-NEXT:    v_mov_b32_e32 v4, v3
+; GCN2-NEXT:    s_andn2_b64 exec, exec, s[4:5]
+; GCN2-NEXT:    s_cbranch_execnz .LBB50_1
+; GCN2-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GCN2-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GCN2-NEXT:    s_setpc_b64 s[30:31]
+;
+; GCN3-LABEL: flat_atomic_nand_i32_noret:
 ; GCN3:       ; %bb.0:
 ; GCN3-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN3-NEXT:    flat_load_dword v4, v[0:1]
 ; GCN3-NEXT:    s_mov_b64 s[4:5], 0
-; GCN3-NEXT:  .LBB40_1: ; %atomicrmw.start
+; GCN3-NEXT:  .LBB50_1: ; %atomicrmw.start
 ; GCN3-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN3-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GCN3-NEXT:    v_and_b32_e32 v3, v4, v2
@@ -1489,7 +1819,7 @@ define void @flat_atomic_nand_i32_noret(ptr %ptr, i32 %in) {
 ; GCN3-NEXT...
[truncated]

@arsenm arsenm force-pushed the amdgpu-new-atomicrmw-metadata-baseline-tests branch from 677250e to ee3a6cf Compare April 18, 2024 15:21
@arsenm arsenm merged commit f433c3b into llvm:main Apr 19, 2024
3 of 4 checks passed
@arsenm arsenm deleted the amdgpu-new-atomicrmw-metadata-baseline-tests branch April 19, 2024 22:43
aniplcc pushed a commit to aniplcc/llvm-project that referenced this pull request Apr 21, 2024
Add baseline tests which should comprehensively test the new atomic
metadata. Test codegen / expansion, and preservation in a few
transforms.

New metadata defined in llvm#85052
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants