Files
llvm-project/llvm/test/CodeGen/AMDGPU/scratch-pointer-sink.ll
Petar Avramovic 40270e8ef2 AMDGPU/GlobalISel: Add regbanklegalize rules for load and store (#153176)
Cover all the missing cases and add very detailed tests for each rule.
In summary:
- Flat and Scratch, addrspace(0) and addrspace(5), loads are always
  divergent.
- Global and Constant, addrspace(1) and addrspace(4), have real uniform
  loads, s_load, but require additional checks for align and flags in mmo.
  For not natural align or not uniform mmo do uniform-in-vgpr lowering.
- Private, addrspace(3), only has instructions for divergent load, for
  uniform do uniform-in-vgpr lowering.
- Store rules are simplified using Ptr32 and Ptr64.
  All operands need to be vgpr.

Some tests have code size regression since they use more sgpr instructions,
marked with FixMe comment to get back to later.
2025-09-11 11:26:20 +02:00

47 lines
1.7 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefixes=GCN
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefixes=GISEL
define amdgpu_gfx i32 @sink_scratch_pointer(ptr addrspace(5) %stack, i32 inreg %flag) {
; GCN-LABEL: sink_scratch_pointer:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: s_cmp_lg_u32 s4, 0
; GCN-NEXT: s_cbranch_scc0 .LBB0_2
; GCN-NEXT: ; %bb.1: ; %bb2
; GCN-NEXT: scratch_load_b32 v0, v0, off offset:-4
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
; GCN-NEXT: .LBB0_2: ; %bb1
; GCN-NEXT: v_mov_b32_e32 v1, 1
; GCN-NEXT: scratch_store_b32 v0, v1, off offset:-4
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-LABEL: sink_scratch_pointer:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: s_cmp_lg_u32 s4, 0
; GISEL-NEXT: s_cbranch_scc0 .LBB0_2
; GISEL-NEXT: ; %bb.1: ; %bb2
; GISEL-NEXT: scratch_load_b32 v0, v0, off offset:-4
; GISEL-NEXT: s_waitcnt vmcnt(0)
; GISEL-NEXT: s_setpc_b64 s[30:31]
; GISEL-NEXT: .LBB0_2: ; %bb1
; GISEL-NEXT: v_mov_b32_e32 v1, 1
; GISEL-NEXT: scratch_store_b32 v0, v1, off offset:-4
; GISEL-NEXT: v_mov_b32_e32 v0, 0
; GISEL-NEXT: s_setpc_b64 s[30:31]
%ptr = getelementptr inbounds i32, ptr addrspace(5) %stack, i32 -1
%cond = icmp eq i32 %flag, 0
br i1 %cond, label %bb1, label %bb2
bb1:
store i32 1, ptr addrspace(5) %ptr, align 4
ret i32 0
bb2:
%value = load i32, ptr addrspace(5) %ptr, align 4
ret i32 %value
}