AMDGPU/GlobalISel: RegBankLegalize rules for DS barrier arrive atomics (#192767)

This commit is contained in:
vangthao95
2026-04-21 14:53:12 -07:00
committed by GitHub
parent 5ee4c51c1a
commit 92958a0631
3 changed files with 119 additions and 15 deletions

View File

@@ -1905,6 +1905,12 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
.Uni(S32, {{Sgpr32}, {IntrId, Sgpr32}}, hasPST)
.Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32}}, !hasPST);
addRulesForIOpcs({amdgcn_ds_atomic_async_barrier_arrive_b64})
.Any({{}, {{}, {IntrId, VgprP3}}});
addRulesForIOpcs({amdgcn_ds_atomic_barrier_arrive_rtn_b64}, Standard)
.Div(S64, {{Vgpr64}, {IntrId, VgprP3, Vgpr64}});
addRulesForIOpcs({amdgcn_ds_add_gs_reg_rtn, amdgcn_ds_sub_gs_reg_rtn},
Standard)
.Div(S32, {{Vgpr32}, {IntrId, Vgpr32}})

View File

@@ -1,21 +1,48 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck --check-prefix=GCN %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck --check-prefix=GCN %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck --check-prefix=GCN %s
declare void @llvm.amdgcn.ds.atomic.async.barrier.arrive.b64(ptr addrspace(3))
; GCN-LABEL: {{^}}test_ds_atomic_async_barrier_arrive_b64:
; GCN: ds_atomic_async_barrier_arrive_b64 v0{{$}}
define void @test_ds_atomic_async_barrier_arrive_b64(ptr addrspace(3) %bar) {
; GCN-LABEL: test_ds_atomic_async_barrier_arrive_b64:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_wait_loadcnt_dscnt 0x0
; GCN-NEXT: s_wait_kmcnt 0x0
; GCN-NEXT: s_wait_alu depctr_vm_vsrc(0)
; GCN-NEXT: ds_atomic_async_barrier_arrive_b64 v0
; GCN-NEXT: s_wait_alu depctr_vm_vsrc(0)
; GCN-NEXT: s_set_pc_i64 s[30:31]
entry:
call void @llvm.amdgcn.ds.atomic.async.barrier.arrive.b64(ptr addrspace(3) %bar)
ret void
}
; GCN-LABEL: {{^}}test_ds_atomic_async_barrier_arrive_b64_off:
; GCN: ds_atomic_async_barrier_arrive_b64 v0 offset:8184{{$}}
define void @test_ds_atomic_async_barrier_arrive_b64_off(ptr addrspace(3) %in) {
; GCN-LABEL: test_ds_atomic_async_barrier_arrive_b64_off:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_wait_loadcnt_dscnt 0x0
; GCN-NEXT: s_wait_kmcnt 0x0
; GCN-NEXT: s_wait_alu depctr_vm_vsrc(0)
; GCN-NEXT: ds_atomic_async_barrier_arrive_b64 v0 offset:8184
; GCN-NEXT: s_wait_alu depctr_vm_vsrc(0)
; GCN-NEXT: s_set_pc_i64 s[30:31]
entry:
%bar = getelementptr i64, ptr addrspace(3) %in, i32 1023
call void @llvm.amdgcn.ds.atomic.async.barrier.arrive.b64(ptr addrspace(3) %bar)
ret void
}
define amdgpu_ps void @test_ds_atomic_async_barrier_arrive_b64_s(ptr addrspace(3) inreg %bar) {
; GCN-LABEL: test_ds_atomic_async_barrier_arrive_b64_s:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
; GCN-NEXT: v_mov_b32_e32 v0, s0
; GCN-NEXT: s_wait_alu depctr_vm_vsrc(0)
; GCN-NEXT: ds_atomic_async_barrier_arrive_b64 v0
; GCN-NEXT: s_wait_alu depctr_vm_vsrc(0)
; GCN-NEXT: s_endpgm
entry:
call void @llvm.amdgcn.ds.atomic.async.barrier.arrive.b64(ptr addrspace(3) %bar)
ret void
}

View File

@@ -1,27 +1,98 @@
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck --check-prefix=GCN %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck --check-prefix=GCN %s
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck --check-prefix=SDAG %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck --check-prefix=GISEL %s
declare i64 @llvm.amdgcn.ds.atomic.barrier.arrive.rtn.b64(ptr addrspace(3), i64)
; GCN-LABEL: {{^}}test_ds_atomic_barrier_arrive_rtn_b64:
; GCN: ds_atomic_barrier_arrive_rtn_b64 v[{{[0-9:]+}}], v2, v[0:1]{{$}}
; GCN: s_wait_dscnt 0x0
; GCN: flat_store_b64
define void @test_ds_atomic_barrier_arrive_rtn_b64(i64 %data, ptr addrspace(3) %bar, ptr %out) {
; SDAG-LABEL: test_ds_atomic_barrier_arrive_rtn_b64:
; SDAG: ; %bb.0: ; %entry
; SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; SDAG-NEXT: s_wait_kmcnt 0x0
; SDAG-NEXT: ds_atomic_barrier_arrive_rtn_b64 v[0:1], v2, v[0:1]
; SDAG-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
; SDAG-NEXT: s_wait_dscnt 0x0
; SDAG-NEXT: flat_store_b64 v[4:5], v[0:1]
; SDAG-NEXT: s_wait_dscnt 0x0
; SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GISEL-LABEL: test_ds_atomic_barrier_arrive_rtn_b64:
; GISEL: ; %bb.0: ; %entry
; GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GISEL-NEXT: s_wait_kmcnt 0x0
; GISEL-NEXT: ds_atomic_barrier_arrive_rtn_b64 v[0:1], v2, v[0:1]
; GISEL-NEXT: v_dual_mov_b32 v6, v3 :: v_dual_mov_b32 v7, v4
; GISEL-NEXT: s_wait_dscnt 0x0
; GISEL-NEXT: flat_store_b64 v[6:7], v[0:1]
; GISEL-NEXT: s_wait_dscnt 0x0
; GISEL-NEXT: s_set_pc_i64 s[30:31]
entry:
%ret = call i64 @llvm.amdgcn.ds.atomic.barrier.arrive.rtn.b64(ptr addrspace(3) %bar, i64 %data)
store i64 %ret, ptr %out
ret void
}
; GCN-LABEL: {{^}}test_ds_atomic_barrier_arrive_rtn_b64_off:
; GCN: ds_atomic_barrier_arrive_rtn_b64 v[{{[0-9:]+}}], v0, v[{{[0-9:]+}}] offset:8184{{$}}
; GCN: s_wait_dscnt 0x0
; GCN: flat_store_b64
define void @test_ds_atomic_barrier_arrive_rtn_b64_off(ptr addrspace(3) %in, ptr %out) {
; SDAG-LABEL: test_ds_atomic_barrier_arrive_rtn_b64_off:
; SDAG: ; %bb.0: ; %entry
; SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; SDAG-NEXT: s_wait_kmcnt 0x0
; SDAG-NEXT: v_mov_b64_e32 v[4:5], 0x200
; SDAG-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
; SDAG-NEXT: ds_atomic_barrier_arrive_rtn_b64 v[0:1], v0, v[4:5] offset:8184
; SDAG-NEXT: s_wait_dscnt 0x0
; SDAG-NEXT: flat_store_b64 v[2:3], v[0:1]
; SDAG-NEXT: s_wait_dscnt 0x0
; SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GISEL-LABEL: test_ds_atomic_barrier_arrive_rtn_b64_off:
; GISEL: ; %bb.0: ; %entry
; GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GISEL-NEXT: s_wait_kmcnt 0x0
; GISEL-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
; GISEL-NEXT: v_mov_b64_e32 v[2:3], 0x200
; GISEL-NEXT: ds_atomic_barrier_arrive_rtn_b64 v[0:1], v0, v[2:3] offset:8184
; GISEL-NEXT: s_wait_dscnt 0x0
; GISEL-NEXT: flat_store_b64 v[4:5], v[0:1]
; GISEL-NEXT: s_wait_dscnt 0x0
; GISEL-NEXT: s_set_pc_i64 s[30:31]
entry:
%bar = getelementptr i64, ptr addrspace(3) %in, i32 1023
%ret = call i64 @llvm.amdgcn.ds.atomic.barrier.arrive.rtn.b64(ptr addrspace(3) %bar, i64 512)
store i64 %ret, ptr %out
ret void
}
define amdgpu_ps void @test_ds_atomic_barrier_arrive_rtn_b64_ss(ptr addrspace(3) inreg %bar, i64 inreg %data, ptr inreg %out) {
; SDAG-LABEL: test_ds_atomic_barrier_arrive_rtn_b64_ss:
; SDAG: ; %bb.0: ; %entry
; SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
; SDAG-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s2
; SDAG-NEXT: v_mov_b32_e32 v2, s0
; SDAG-NEXT: s_mov_b32 s5, s4
; SDAG-NEXT: s_mov_b32 s4, s3
; SDAG-NEXT: ds_atomic_barrier_arrive_rtn_b64 v[0:1], v2, v[0:1]
; SDAG-NEXT: v_mov_b32_e32 v2, 0
; SDAG-NEXT: s_wait_dscnt 0x0
; SDAG-NEXT: flat_store_b64 v2, v[0:1], s[4:5]
; SDAG-NEXT: s_endpgm
;
; GISEL-LABEL: test_ds_atomic_barrier_arrive_rtn_b64_ss:
; GISEL: ; %bb.0: ; %entry
; GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
; GISEL-NEXT: s_mov_b32 s6, s1
; GISEL-NEXT: s_mov_b32 s7, s2
; GISEL-NEXT: v_mov_b32_e32 v2, s0
; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[6:7]
; GISEL-NEXT: s_mov_b32 s2, s3
; GISEL-NEXT: s_mov_b32 s3, s4
; GISEL-NEXT: ds_atomic_barrier_arrive_rtn_b64 v[0:1], v2, v[0:1]
; GISEL-NEXT: v_mov_b32_e32 v2, 0
; GISEL-NEXT: s_wait_dscnt 0x0
; GISEL-NEXT: flat_store_b64 v2, v[0:1], s[2:3]
; GISEL-NEXT: s_endpgm
entry:
%ret = call i64 @llvm.amdgcn.ds.atomic.barrier.arrive.rtn.b64(ptr addrspace(3) %bar, i64 %data)
store i64 %ret, ptr %out
ret void
}