AMDGPU/GlobalISel: RegBankLegalize rules for DS barrier arrive atomics (#192767)
This commit is contained in:
@@ -1905,6 +1905,12 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
|
||||
.Uni(S32, {{Sgpr32}, {IntrId, Sgpr32}}, hasPST)
|
||||
.Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32}}, !hasPST);
|
||||
|
||||
addRulesForIOpcs({amdgcn_ds_atomic_async_barrier_arrive_b64})
|
||||
.Any({{}, {{}, {IntrId, VgprP3}}});
|
||||
|
||||
addRulesForIOpcs({amdgcn_ds_atomic_barrier_arrive_rtn_b64}, Standard)
|
||||
.Div(S64, {{Vgpr64}, {IntrId, VgprP3, Vgpr64}});
|
||||
|
||||
addRulesForIOpcs({amdgcn_ds_add_gs_reg_rtn, amdgcn_ds_sub_gs_reg_rtn},
|
||||
Standard)
|
||||
.Div(S32, {{Vgpr32}, {IntrId, Vgpr32}})
|
||||
|
||||
@@ -1,21 +1,48 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
|
||||
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck --check-prefix=GCN %s
|
||||
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck --check-prefix=GCN %s
|
||||
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck --check-prefix=GCN %s
|
||||
|
||||
declare void @llvm.amdgcn.ds.atomic.async.barrier.arrive.b64(ptr addrspace(3))
|
||||
|
||||
; GCN-LABEL: {{^}}test_ds_atomic_async_barrier_arrive_b64:
|
||||
; GCN: ds_atomic_async_barrier_arrive_b64 v0{{$}}
|
||||
define void @test_ds_atomic_async_barrier_arrive_b64(ptr addrspace(3) %bar) {
|
||||
; GCN-LABEL: test_ds_atomic_async_barrier_arrive_b64:
|
||||
; GCN: ; %bb.0: ; %entry
|
||||
; GCN-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GCN-NEXT: s_wait_kmcnt 0x0
|
||||
; GCN-NEXT: s_wait_alu depctr_vm_vsrc(0)
|
||||
; GCN-NEXT: ds_atomic_async_barrier_arrive_b64 v0
|
||||
; GCN-NEXT: s_wait_alu depctr_vm_vsrc(0)
|
||||
; GCN-NEXT: s_set_pc_i64 s[30:31]
|
||||
entry:
|
||||
call void @llvm.amdgcn.ds.atomic.async.barrier.arrive.b64(ptr addrspace(3) %bar)
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_ds_atomic_async_barrier_arrive_b64_off:
|
||||
; GCN: ds_atomic_async_barrier_arrive_b64 v0 offset:8184{{$}}
|
||||
define void @test_ds_atomic_async_barrier_arrive_b64_off(ptr addrspace(3) %in) {
|
||||
; GCN-LABEL: test_ds_atomic_async_barrier_arrive_b64_off:
|
||||
; GCN: ; %bb.0: ; %entry
|
||||
; GCN-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GCN-NEXT: s_wait_kmcnt 0x0
|
||||
; GCN-NEXT: s_wait_alu depctr_vm_vsrc(0)
|
||||
; GCN-NEXT: ds_atomic_async_barrier_arrive_b64 v0 offset:8184
|
||||
; GCN-NEXT: s_wait_alu depctr_vm_vsrc(0)
|
||||
; GCN-NEXT: s_set_pc_i64 s[30:31]
|
||||
entry:
|
||||
%bar = getelementptr i64, ptr addrspace(3) %in, i32 1023
|
||||
call void @llvm.amdgcn.ds.atomic.async.barrier.arrive.b64(ptr addrspace(3) %bar)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @test_ds_atomic_async_barrier_arrive_b64_s(ptr addrspace(3) inreg %bar) {
|
||||
; GCN-LABEL: test_ds_atomic_async_barrier_arrive_b64_s:
|
||||
; GCN: ; %bb.0: ; %entry
|
||||
; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GCN-NEXT: s_wait_alu depctr_vm_vsrc(0)
|
||||
; GCN-NEXT: ds_atomic_async_barrier_arrive_b64 v0
|
||||
; GCN-NEXT: s_wait_alu depctr_vm_vsrc(0)
|
||||
; GCN-NEXT: s_endpgm
|
||||
entry:
|
||||
call void @llvm.amdgcn.ds.atomic.async.barrier.arrive.b64(ptr addrspace(3) %bar)
|
||||
ret void
|
||||
}
|
||||
|
||||
@@ -1,27 +1,98 @@
|
||||
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck --check-prefix=GCN %s
|
||||
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck --check-prefix=GCN %s
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
|
||||
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck --check-prefix=SDAG %s
|
||||
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck --check-prefix=GISEL %s
|
||||
|
||||
declare i64 @llvm.amdgcn.ds.atomic.barrier.arrive.rtn.b64(ptr addrspace(3), i64)
|
||||
|
||||
; GCN-LABEL: {{^}}test_ds_atomic_barrier_arrive_rtn_b64:
|
||||
; GCN: ds_atomic_barrier_arrive_rtn_b64 v[{{[0-9:]+}}], v2, v[0:1]{{$}}
|
||||
; GCN: s_wait_dscnt 0x0
|
||||
; GCN: flat_store_b64
|
||||
define void @test_ds_atomic_barrier_arrive_rtn_b64(i64 %data, ptr addrspace(3) %bar, ptr %out) {
|
||||
; SDAG-LABEL: test_ds_atomic_barrier_arrive_rtn_b64:
|
||||
; SDAG: ; %bb.0: ; %entry
|
||||
; SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; SDAG-NEXT: s_wait_kmcnt 0x0
|
||||
; SDAG-NEXT: ds_atomic_barrier_arrive_rtn_b64 v[0:1], v2, v[0:1]
|
||||
; SDAG-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
|
||||
; SDAG-NEXT: s_wait_dscnt 0x0
|
||||
; SDAG-NEXT: flat_store_b64 v[4:5], v[0:1]
|
||||
; SDAG-NEXT: s_wait_dscnt 0x0
|
||||
; SDAG-NEXT: s_set_pc_i64 s[30:31]
|
||||
;
|
||||
; GISEL-LABEL: test_ds_atomic_barrier_arrive_rtn_b64:
|
||||
; GISEL: ; %bb.0: ; %entry
|
||||
; GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GISEL-NEXT: s_wait_kmcnt 0x0
|
||||
; GISEL-NEXT: ds_atomic_barrier_arrive_rtn_b64 v[0:1], v2, v[0:1]
|
||||
; GISEL-NEXT: v_dual_mov_b32 v6, v3 :: v_dual_mov_b32 v7, v4
|
||||
; GISEL-NEXT: s_wait_dscnt 0x0
|
||||
; GISEL-NEXT: flat_store_b64 v[6:7], v[0:1]
|
||||
; GISEL-NEXT: s_wait_dscnt 0x0
|
||||
; GISEL-NEXT: s_set_pc_i64 s[30:31]
|
||||
entry:
|
||||
%ret = call i64 @llvm.amdgcn.ds.atomic.barrier.arrive.rtn.b64(ptr addrspace(3) %bar, i64 %data)
|
||||
store i64 %ret, ptr %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_ds_atomic_barrier_arrive_rtn_b64_off:
|
||||
; GCN: ds_atomic_barrier_arrive_rtn_b64 v[{{[0-9:]+}}], v0, v[{{[0-9:]+}}] offset:8184{{$}}
|
||||
; GCN: s_wait_dscnt 0x0
|
||||
; GCN: flat_store_b64
|
||||
define void @test_ds_atomic_barrier_arrive_rtn_b64_off(ptr addrspace(3) %in, ptr %out) {
|
||||
; SDAG-LABEL: test_ds_atomic_barrier_arrive_rtn_b64_off:
|
||||
; SDAG: ; %bb.0: ; %entry
|
||||
; SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; SDAG-NEXT: s_wait_kmcnt 0x0
|
||||
; SDAG-NEXT: v_mov_b64_e32 v[4:5], 0x200
|
||||
; SDAG-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
|
||||
; SDAG-NEXT: ds_atomic_barrier_arrive_rtn_b64 v[0:1], v0, v[4:5] offset:8184
|
||||
; SDAG-NEXT: s_wait_dscnt 0x0
|
||||
; SDAG-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; SDAG-NEXT: s_wait_dscnt 0x0
|
||||
; SDAG-NEXT: s_set_pc_i64 s[30:31]
|
||||
;
|
||||
; GISEL-LABEL: test_ds_atomic_barrier_arrive_rtn_b64_off:
|
||||
; GISEL: ; %bb.0: ; %entry
|
||||
; GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GISEL-NEXT: s_wait_kmcnt 0x0
|
||||
; GISEL-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
|
||||
; GISEL-NEXT: v_mov_b64_e32 v[2:3], 0x200
|
||||
; GISEL-NEXT: ds_atomic_barrier_arrive_rtn_b64 v[0:1], v0, v[2:3] offset:8184
|
||||
; GISEL-NEXT: s_wait_dscnt 0x0
|
||||
; GISEL-NEXT: flat_store_b64 v[4:5], v[0:1]
|
||||
; GISEL-NEXT: s_wait_dscnt 0x0
|
||||
; GISEL-NEXT: s_set_pc_i64 s[30:31]
|
||||
entry:
|
||||
%bar = getelementptr i64, ptr addrspace(3) %in, i32 1023
|
||||
%ret = call i64 @llvm.amdgcn.ds.atomic.barrier.arrive.rtn.b64(ptr addrspace(3) %bar, i64 512)
|
||||
store i64 %ret, ptr %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @test_ds_atomic_barrier_arrive_rtn_b64_ss(ptr addrspace(3) inreg %bar, i64 inreg %data, ptr inreg %out) {
|
||||
; SDAG-LABEL: test_ds_atomic_barrier_arrive_rtn_b64_ss:
|
||||
; SDAG: ; %bb.0: ; %entry
|
||||
; SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
||||
; SDAG-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s2
|
||||
; SDAG-NEXT: v_mov_b32_e32 v2, s0
|
||||
; SDAG-NEXT: s_mov_b32 s5, s4
|
||||
; SDAG-NEXT: s_mov_b32 s4, s3
|
||||
; SDAG-NEXT: ds_atomic_barrier_arrive_rtn_b64 v[0:1], v2, v[0:1]
|
||||
; SDAG-NEXT: v_mov_b32_e32 v2, 0
|
||||
; SDAG-NEXT: s_wait_dscnt 0x0
|
||||
; SDAG-NEXT: flat_store_b64 v2, v[0:1], s[4:5]
|
||||
; SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GISEL-LABEL: test_ds_atomic_barrier_arrive_rtn_b64_ss:
|
||||
; GISEL: ; %bb.0: ; %entry
|
||||
; GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
||||
; GISEL-NEXT: s_mov_b32 s6, s1
|
||||
; GISEL-NEXT: s_mov_b32 s7, s2
|
||||
; GISEL-NEXT: v_mov_b32_e32 v2, s0
|
||||
; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[6:7]
|
||||
; GISEL-NEXT: s_mov_b32 s2, s3
|
||||
; GISEL-NEXT: s_mov_b32 s3, s4
|
||||
; GISEL-NEXT: ds_atomic_barrier_arrive_rtn_b64 v[0:1], v2, v[0:1]
|
||||
; GISEL-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GISEL-NEXT: s_wait_dscnt 0x0
|
||||
; GISEL-NEXT: flat_store_b64 v2, v[0:1], s[2:3]
|
||||
; GISEL-NEXT: s_endpgm
|
||||
entry:
|
||||
%ret = call i64 @llvm.amdgcn.ds.atomic.barrier.arrive.rtn.b64(ptr addrspace(3) %bar, i64 %data)
|
||||
store i64 %ret, ptr %out
|
||||
ret void
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user