The pattern was guarded by the D16PreservesUnusedBits predicate which is not needed for stores.
1253 lines
62 KiB
LLVM
1253 lines
62 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1250 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-SDAG,GFX1250-SDAG-FAKE16 %s
|
|
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1250 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL,GFX1250-GISEL-FAKE16 %s
|
|
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1250 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-SDAG,GFX1250-SDAG-REAL16 %s
|
|
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1250 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL,GFX1250-GISEL-REAL16 %s
|
|
|
|
; Test using saddr addressing mode of flat_*store_* instructions.
|
|
|
|
define amdgpu_ps void @flat_store_saddr_i8_zext_vgpr(ptr inreg %sbase, ptr %voffset.ptr, i8 %data) {
|
|
; GFX1250-LABEL: flat_store_saddr_i8_zext_vgpr:
|
|
; GFX1250: ; %bb.0:
|
|
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-NEXT: flat_load_b32 v0, v[0:1]
|
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX1250-NEXT: flat_store_b8 v0, v2, s[2:3]
|
|
; GFX1250-NEXT: s_endpgm
|
|
%voffset = load i32, ptr %voffset.ptr
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
store i8 %data, ptr %gep0
|
|
ret void
|
|
}
|
|
|
|
; Maximum positive offset on gfx10
|
|
define amdgpu_ps void @flat_store_saddr_i8_zext_vgpr_offset_2047(ptr inreg %sbase, ptr %voffset.ptr, i8 %data) {
|
|
; GFX1250-LABEL: flat_store_saddr_i8_zext_vgpr_offset_2047:
|
|
; GFX1250: ; %bb.0:
|
|
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-NEXT: flat_load_b32 v0, v[0:1]
|
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX1250-NEXT: flat_store_b8 v0, v2, s[2:3] offset:2047
|
|
; GFX1250-NEXT: s_endpgm
|
|
%voffset = load i32, ptr %voffset.ptr
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
%gep1 = getelementptr inbounds i8, ptr %gep0, i64 2047
|
|
store i8 %data, ptr %gep1
|
|
ret void
|
|
}
|
|
|
|
; Maximum negative offset on gfx10
|
|
define amdgpu_ps void @flat_store_saddr_i8_zext_vgpr_offset_neg2048(ptr inreg %sbase, ptr %voffset.ptr, i8 %data) {
|
|
; GFX1250-LABEL: flat_store_saddr_i8_zext_vgpr_offset_neg2048:
|
|
; GFX1250: ; %bb.0:
|
|
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-NEXT: flat_load_b32 v0, v[0:1]
|
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX1250-NEXT: flat_store_b8 v0, v2, s[2:3] offset:-2048
|
|
; GFX1250-NEXT: s_endpgm
|
|
%voffset = load i32, ptr %voffset.ptr
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
%gep1 = getelementptr inbounds i8, ptr %gep0, i64 -2048
|
|
store i8 %data, ptr %gep1
|
|
ret void
|
|
}
|
|
|
|
; --------------------------------------------------------------------------------
|
|
; Uniformity edge cases
|
|
; --------------------------------------------------------------------------------
|
|
|
|
@ptr.in.lds = internal addrspace(3) global ptr poison
|
|
|
|
; Base pointer is uniform, but also in VGPRs
|
|
define amdgpu_ps void @flat_store_saddr_uniform_ptr_in_vgprs(i32 %voffset, i8 %data) {
|
|
; GFX1250-SDAG-LABEL: flat_store_saddr_uniform_ptr_in_vgprs:
|
|
; GFX1250-SDAG: ; %bb.0:
|
|
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, 0
|
|
; GFX1250-SDAG-NEXT: ds_load_b64 v[2:3], v2
|
|
; GFX1250-SDAG-NEXT: s_wait_dscnt 0x0
|
|
; GFX1250-SDAG-NEXT: v_readfirstlane_b32 s0, v2
|
|
; GFX1250-SDAG-NEXT: v_readfirstlane_b32 s1, v3
|
|
; GFX1250-SDAG-NEXT: flat_store_b8 v0, v1, s[0:1]
|
|
; GFX1250-SDAG-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-GISEL-LABEL: flat_store_saddr_uniform_ptr_in_vgprs:
|
|
; GFX1250-GISEL: ; %bb.0:
|
|
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, 0
|
|
; GFX1250-GISEL-NEXT: ds_load_b64 v[2:3], v2
|
|
; GFX1250-GISEL-NEXT: s_wait_dscnt 0x0
|
|
; GFX1250-GISEL-NEXT: v_add_co_u32 v2, vcc_lo, v2, v0
|
|
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v3, null, 0, v3, vcc_lo
|
|
; GFX1250-GISEL-NEXT: flat_store_b8 v[2:3], v1
|
|
; GFX1250-GISEL-NEXT: s_endpgm
|
|
%sbase = load ptr, ptr addrspace(3) @ptr.in.lds
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
store i8 %data, ptr %gep0
|
|
ret void
|
|
}
|
|
|
|
; Base pointer is uniform, but also in VGPRs, with imm offset
|
|
define amdgpu_ps void @flat_store_saddr_uniform_ptr_in_vgprs_immoffset(i32 %voffset, i8 %data) {
|
|
; GFX1250-SDAG-LABEL: flat_store_saddr_uniform_ptr_in_vgprs_immoffset:
|
|
; GFX1250-SDAG: ; %bb.0:
|
|
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, 0
|
|
; GFX1250-SDAG-NEXT: ds_load_b64 v[2:3], v2
|
|
; GFX1250-SDAG-NEXT: s_wait_dscnt 0x0
|
|
; GFX1250-SDAG-NEXT: v_readfirstlane_b32 s0, v2
|
|
; GFX1250-SDAG-NEXT: v_readfirstlane_b32 s1, v3
|
|
; GFX1250-SDAG-NEXT: flat_store_b8 v0, v1, s[0:1] offset:-120
|
|
; GFX1250-SDAG-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-GISEL-LABEL: flat_store_saddr_uniform_ptr_in_vgprs_immoffset:
|
|
; GFX1250-GISEL: ; %bb.0:
|
|
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, 0
|
|
; GFX1250-GISEL-NEXT: ds_load_b64 v[2:3], v2
|
|
; GFX1250-GISEL-NEXT: s_wait_dscnt 0x0
|
|
; GFX1250-GISEL-NEXT: v_add_co_u32 v2, vcc_lo, v2, v0
|
|
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v3, null, 0, v3, vcc_lo
|
|
; GFX1250-GISEL-NEXT: flat_store_b8 v[2:3], v1 offset:-120
|
|
; GFX1250-GISEL-NEXT: s_endpgm
|
|
%sbase = load ptr, ptr addrspace(3) @ptr.in.lds
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
%gep1 = getelementptr inbounds i8, ptr %gep0, i64 -120
|
|
store i8 %data, ptr %gep1
|
|
ret void
|
|
}
|
|
|
|
; --------------------------------------------------------------------------------
|
|
; Stress various type stores
|
|
; --------------------------------------------------------------------------------
|
|
|
|
define amdgpu_ps void @flat_store_saddr_i16_zext_vgpr(ptr inreg %sbase, i32 %voffset, i16 %data) {
|
|
; GFX1250-LABEL: flat_store_saddr_i16_zext_vgpr:
|
|
; GFX1250: ; %bb.0:
|
|
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-NEXT: flat_store_b16 v0, v1, s[2:3]
|
|
; GFX1250-NEXT: s_endpgm
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
store i16 %data, ptr %gep0
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @flat_store_saddr_i16_zext_vgpr_offset_neg128(ptr inreg %sbase, i32 %voffset, i16 %data) {
|
|
; GFX1250-LABEL: flat_store_saddr_i16_zext_vgpr_offset_neg128:
|
|
; GFX1250: ; %bb.0:
|
|
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-NEXT: flat_store_b16 v0, v1, s[2:3] offset:-128
|
|
; GFX1250-NEXT: s_endpgm
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
%gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128
|
|
store i16 %data, ptr %gep1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @flat_store_saddr_f16_zext_vgpr(ptr inreg %sbase, i32 %voffset, half %data) {
|
|
; GFX1250-LABEL: flat_store_saddr_f16_zext_vgpr:
|
|
; GFX1250: ; %bb.0:
|
|
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-NEXT: flat_store_b16 v0, v1, s[2:3]
|
|
; GFX1250-NEXT: s_endpgm
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
store half %data, ptr %gep0
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @flat_store_saddr_f16_zext_vgpr_offset_neg128(ptr inreg %sbase, i32 %voffset, half %data) {
|
|
; GFX1250-LABEL: flat_store_saddr_f16_zext_vgpr_offset_neg128:
|
|
; GFX1250: ; %bb.0:
|
|
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-NEXT: flat_store_b16 v0, v1, s[2:3] offset:-128
|
|
; GFX1250-NEXT: s_endpgm
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
%gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128
|
|
store half %data, ptr %gep1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @flat_store_saddr_i32_zext_vgpr(ptr inreg %sbase, i32 %voffset, i32 %data) {
|
|
; GFX1250-LABEL: flat_store_saddr_i32_zext_vgpr:
|
|
; GFX1250: ; %bb.0:
|
|
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-NEXT: flat_store_b32 v0, v1, s[2:3]
|
|
; GFX1250-NEXT: s_endpgm
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
store i32 %data, ptr %gep0
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @flat_store_saddr_i32_zext_vgpr_offset_neg128(ptr inreg %sbase, i32 %voffset, i32 %data) {
|
|
; GFX1250-LABEL: flat_store_saddr_i32_zext_vgpr_offset_neg128:
|
|
; GFX1250: ; %bb.0:
|
|
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-NEXT: flat_store_b32 v0, v1, s[2:3] offset:-128
|
|
; GFX1250-NEXT: s_endpgm
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
%gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128
|
|
store i32 %data, ptr %gep1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @flat_store_saddr_f32_zext_vgpr(ptr inreg %sbase, i32 %voffset, float %data) {
|
|
; GFX1250-LABEL: flat_store_saddr_f32_zext_vgpr:
|
|
; GFX1250: ; %bb.0:
|
|
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-NEXT: flat_store_b32 v0, v1, s[2:3]
|
|
; GFX1250-NEXT: s_endpgm
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
store float %data, ptr %gep0
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @flat_store_saddr_f32_zext_vgpr_offset_neg128(ptr inreg %sbase, i32 %voffset, float %data) {
|
|
; GFX1250-LABEL: flat_store_saddr_f32_zext_vgpr_offset_neg128:
|
|
; GFX1250: ; %bb.0:
|
|
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-NEXT: flat_store_b32 v0, v1, s[2:3] offset:-128
|
|
; GFX1250-NEXT: s_endpgm
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
%gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128
|
|
store float %data, ptr %gep1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @flat_store_saddr_p3_zext_vgpr(ptr inreg %sbase, i32 %voffset, ptr addrspace(3) %data) {
|
|
; GFX1250-LABEL: flat_store_saddr_p3_zext_vgpr:
|
|
; GFX1250: ; %bb.0:
|
|
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-NEXT: flat_store_b32 v0, v1, s[2:3]
|
|
; GFX1250-NEXT: s_endpgm
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
store ptr addrspace(3) %data, ptr %gep0
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @flat_store_saddr_p3_zext_vgpr_offset_neg128(ptr inreg %sbase, i32 %voffset, ptr addrspace(3) %data) {
|
|
; GFX1250-LABEL: flat_store_saddr_p3_zext_vgpr_offset_neg128:
|
|
; GFX1250: ; %bb.0:
|
|
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-NEXT: flat_store_b32 v0, v1, s[2:3] offset:-128
|
|
; GFX1250-NEXT: s_endpgm
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
%gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128
|
|
store ptr addrspace(3) %data, ptr %gep1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @flat_store_saddr_i64_zext_vgpr(ptr inreg %sbase, i32 %voffset, i64 %data) {
|
|
; GFX1250-SDAG-LABEL: flat_store_saddr_i64_zext_vgpr:
|
|
; GFX1250-SDAG: ; %bb.0:
|
|
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
|
|
; GFX1250-SDAG-NEXT: flat_store_b64 v0, v[2:3], s[2:3]
|
|
; GFX1250-SDAG-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-GISEL-LABEL: flat_store_saddr_i64_zext_vgpr:
|
|
; GFX1250-GISEL: ; %bb.0:
|
|
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
|
|
; GFX1250-GISEL-NEXT: flat_store_b64 v0, v[4:5], s[2:3]
|
|
; GFX1250-GISEL-NEXT: s_endpgm
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
store i64 %data, ptr %gep0
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @flat_store_saddr_i64_zext_vgpr_offset_neg128(ptr inreg %sbase, i32 %voffset, i64 %data) {
|
|
; GFX1250-SDAG-LABEL: flat_store_saddr_i64_zext_vgpr_offset_neg128:
|
|
; GFX1250-SDAG: ; %bb.0:
|
|
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
|
|
; GFX1250-SDAG-NEXT: flat_store_b64 v0, v[2:3], s[2:3] offset:-128
|
|
; GFX1250-SDAG-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-GISEL-LABEL: flat_store_saddr_i64_zext_vgpr_offset_neg128:
|
|
; GFX1250-GISEL: ; %bb.0:
|
|
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
|
|
; GFX1250-GISEL-NEXT: flat_store_b64 v0, v[4:5], s[2:3] offset:-128
|
|
; GFX1250-GISEL-NEXT: s_endpgm
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
%gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128
|
|
store i64 %data, ptr %gep1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @flat_store_saddr_f64_zext_vgpr(ptr inreg %sbase, i32 %voffset, double %data) {
|
|
; GFX1250-SDAG-LABEL: flat_store_saddr_f64_zext_vgpr:
|
|
; GFX1250-SDAG: ; %bb.0:
|
|
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
|
|
; GFX1250-SDAG-NEXT: flat_store_b64 v0, v[2:3], s[2:3]
|
|
; GFX1250-SDAG-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-GISEL-LABEL: flat_store_saddr_f64_zext_vgpr:
|
|
; GFX1250-GISEL: ; %bb.0:
|
|
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
|
|
; GFX1250-GISEL-NEXT: flat_store_b64 v0, v[4:5], s[2:3]
|
|
; GFX1250-GISEL-NEXT: s_endpgm
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
store double %data, ptr %gep0
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @flat_store_saddr_f64_zext_vgpr_offset_neg128(ptr inreg %sbase, i32 %voffset, double %data) {
|
|
; GFX1250-SDAG-LABEL: flat_store_saddr_f64_zext_vgpr_offset_neg128:
|
|
; GFX1250-SDAG: ; %bb.0:
|
|
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
|
|
; GFX1250-SDAG-NEXT: flat_store_b64 v0, v[2:3], s[2:3] offset:-128
|
|
; GFX1250-SDAG-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-GISEL-LABEL: flat_store_saddr_f64_zext_vgpr_offset_neg128:
|
|
; GFX1250-GISEL: ; %bb.0:
|
|
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
|
|
; GFX1250-GISEL-NEXT: flat_store_b64 v0, v[4:5], s[2:3] offset:-128
|
|
; GFX1250-GISEL-NEXT: s_endpgm
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
%gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128
|
|
store double %data, ptr %gep1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @flat_store_saddr_v2i32_zext_vgpr(ptr inreg %sbase, i32 %voffset, <2 x i32> %data) {
|
|
; GFX1250-SDAG-LABEL: flat_store_saddr_v2i32_zext_vgpr:
|
|
; GFX1250-SDAG: ; %bb.0:
|
|
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
|
|
; GFX1250-SDAG-NEXT: flat_store_b64 v0, v[2:3], s[2:3]
|
|
; GFX1250-SDAG-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-GISEL-LABEL: flat_store_saddr_v2i32_zext_vgpr:
|
|
; GFX1250-GISEL: ; %bb.0:
|
|
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
|
|
; GFX1250-GISEL-NEXT: flat_store_b64 v0, v[4:5], s[2:3]
|
|
; GFX1250-GISEL-NEXT: s_endpgm
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
store <2 x i32> %data, ptr %gep0
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @flat_store_saddr_v2i32_zext_vgpr_offset_neg128(ptr inreg %sbase, i32 %voffset, <2 x i32> %data) {
|
|
; GFX1250-SDAG-LABEL: flat_store_saddr_v2i32_zext_vgpr_offset_neg128:
|
|
; GFX1250-SDAG: ; %bb.0:
|
|
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
|
|
; GFX1250-SDAG-NEXT: flat_store_b64 v0, v[2:3], s[2:3] offset:-128
|
|
; GFX1250-SDAG-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-GISEL-LABEL: flat_store_saddr_v2i32_zext_vgpr_offset_neg128:
|
|
; GFX1250-GISEL: ; %bb.0:
|
|
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
|
|
; GFX1250-GISEL-NEXT: flat_store_b64 v0, v[4:5], s[2:3] offset:-128
|
|
; GFX1250-GISEL-NEXT: s_endpgm
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
%gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128
|
|
store <2 x i32> %data, ptr %gep1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @flat_store_saddr_v2f32_zext_vgpr(ptr inreg %sbase, i32 %voffset, <2 x float> %data) {
|
|
; GFX1250-SDAG-LABEL: flat_store_saddr_v2f32_zext_vgpr:
|
|
; GFX1250-SDAG: ; %bb.0:
|
|
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
|
|
; GFX1250-SDAG-NEXT: flat_store_b64 v0, v[2:3], s[2:3]
|
|
; GFX1250-SDAG-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-GISEL-LABEL: flat_store_saddr_v2f32_zext_vgpr:
|
|
; GFX1250-GISEL: ; %bb.0:
|
|
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
|
|
; GFX1250-GISEL-NEXT: flat_store_b64 v0, v[4:5], s[2:3]
|
|
; GFX1250-GISEL-NEXT: s_endpgm
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
store <2 x float> %data, ptr %gep0
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @flat_store_saddr_v2f32_zext_vgpr_offset_neg128(ptr inreg %sbase, i32 %voffset, <2 x float> %data) {
|
|
; GFX1250-SDAG-LABEL: flat_store_saddr_v2f32_zext_vgpr_offset_neg128:
|
|
; GFX1250-SDAG: ; %bb.0:
|
|
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
|
|
; GFX1250-SDAG-NEXT: flat_store_b64 v0, v[2:3], s[2:3] offset:-128
|
|
; GFX1250-SDAG-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-GISEL-LABEL: flat_store_saddr_v2f32_zext_vgpr_offset_neg128:
|
|
; GFX1250-GISEL: ; %bb.0:
|
|
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
|
|
; GFX1250-GISEL-NEXT: flat_store_b64 v0, v[4:5], s[2:3] offset:-128
|
|
; GFX1250-GISEL-NEXT: s_endpgm
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
%gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128
|
|
store <2 x float> %data, ptr %gep1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @flat_store_saddr_v4i16_zext_vgpr(ptr inreg %sbase, i32 %voffset, <4 x i16> %data) {
|
|
; GFX1250-SDAG-LABEL: flat_store_saddr_v4i16_zext_vgpr:
|
|
; GFX1250-SDAG: ; %bb.0:
|
|
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
|
|
; GFX1250-SDAG-NEXT: flat_store_b64 v0, v[2:3], s[2:3]
|
|
; GFX1250-SDAG-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-GISEL-LABEL: flat_store_saddr_v4i16_zext_vgpr:
|
|
; GFX1250-GISEL: ; %bb.0:
|
|
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
|
|
; GFX1250-GISEL-NEXT: flat_store_b64 v0, v[4:5], s[2:3]
|
|
; GFX1250-GISEL-NEXT: s_endpgm
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
store <4 x i16> %data, ptr %gep0
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @flat_store_saddr_v4i16_zext_vgpr_offset_neg128(ptr inreg %sbase, i32 %voffset, <4 x i16> %data) {
|
|
; GFX1250-SDAG-LABEL: flat_store_saddr_v4i16_zext_vgpr_offset_neg128:
|
|
; GFX1250-SDAG: ; %bb.0:
|
|
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
|
|
; GFX1250-SDAG-NEXT: flat_store_b64 v0, v[2:3], s[2:3] offset:-128
|
|
; GFX1250-SDAG-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-GISEL-LABEL: flat_store_saddr_v4i16_zext_vgpr_offset_neg128:
|
|
; GFX1250-GISEL: ; %bb.0:
|
|
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
|
|
; GFX1250-GISEL-NEXT: flat_store_b64 v0, v[4:5], s[2:3] offset:-128
|
|
; GFX1250-GISEL-NEXT: s_endpgm
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
%gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128
|
|
store <4 x i16> %data, ptr %gep1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @flat_store_saddr_v4f16_zext_vgpr(ptr inreg %sbase, i32 %voffset, <4 x half> %data) {
|
|
; GFX1250-SDAG-LABEL: flat_store_saddr_v4f16_zext_vgpr:
|
|
; GFX1250-SDAG: ; %bb.0:
|
|
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
|
|
; GFX1250-SDAG-NEXT: flat_store_b64 v0, v[2:3], s[2:3]
|
|
; GFX1250-SDAG-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-GISEL-LABEL: flat_store_saddr_v4f16_zext_vgpr:
|
|
; GFX1250-GISEL: ; %bb.0:
|
|
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
|
|
; GFX1250-GISEL-NEXT: flat_store_b64 v0, v[4:5], s[2:3]
|
|
; GFX1250-GISEL-NEXT: s_endpgm
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
store <4 x half> %data, ptr %gep0
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @flat_store_saddr_v4f16_zext_vgpr_offset_neg128(ptr inreg %sbase, i32 %voffset, <4 x half> %data) {
|
|
; GFX1250-SDAG-LABEL: flat_store_saddr_v4f16_zext_vgpr_offset_neg128:
|
|
; GFX1250-SDAG: ; %bb.0:
|
|
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
|
|
; GFX1250-SDAG-NEXT: flat_store_b64 v0, v[2:3], s[2:3] offset:-128
|
|
; GFX1250-SDAG-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-GISEL-LABEL: flat_store_saddr_v4f16_zext_vgpr_offset_neg128:
|
|
; GFX1250-GISEL: ; %bb.0:
|
|
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
|
|
; GFX1250-GISEL-NEXT: flat_store_b64 v0, v[4:5], s[2:3] offset:-128
|
|
; GFX1250-GISEL-NEXT: s_endpgm
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
%gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128
|
|
store <4 x half> %data, ptr %gep1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @flat_store_saddr_p1_zext_vgpr(ptr inreg %sbase, i32 %voffset, ptr %data) {
|
|
; GFX1250-SDAG-LABEL: flat_store_saddr_p1_zext_vgpr:
|
|
; GFX1250-SDAG: ; %bb.0:
|
|
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
|
|
; GFX1250-SDAG-NEXT: flat_store_b64 v0, v[2:3], s[2:3]
|
|
; GFX1250-SDAG-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-GISEL-LABEL: flat_store_saddr_p1_zext_vgpr:
|
|
; GFX1250-GISEL: ; %bb.0:
|
|
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
|
|
; GFX1250-GISEL-NEXT: flat_store_b64 v0, v[4:5], s[2:3]
|
|
; GFX1250-GISEL-NEXT: s_endpgm
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
store ptr %data, ptr %gep0
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @flat_store_saddr_p1_zext_vgpr_offset_neg128(ptr inreg %sbase, i32 %voffset, ptr %data) {
|
|
; GFX1250-SDAG-LABEL: flat_store_saddr_p1_zext_vgpr_offset_neg128:
|
|
; GFX1250-SDAG: ; %bb.0:
|
|
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
|
|
; GFX1250-SDAG-NEXT: flat_store_b64 v0, v[2:3], s[2:3] offset:-128
|
|
; GFX1250-SDAG-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-GISEL-LABEL: flat_store_saddr_p1_zext_vgpr_offset_neg128:
|
|
; GFX1250-GISEL: ; %bb.0:
|
|
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
|
|
; GFX1250-GISEL-NEXT: flat_store_b64 v0, v[4:5], s[2:3] offset:-128
|
|
; GFX1250-GISEL-NEXT: s_endpgm
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
%gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128
|
|
store ptr %data, ptr %gep1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @flat_store_saddr_v3i32_zext_vgpr(ptr inreg %sbase, i32 %voffset, <3 x i32> %data) {
|
|
; GFX1250-SDAG-LABEL: flat_store_saddr_v3i32_zext_vgpr:
|
|
; GFX1250-SDAG: ; %bb.0:
|
|
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v4, v3 :: v_dual_mov_b32 v3, v2
|
|
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, v1
|
|
; GFX1250-SDAG-NEXT: flat_store_b96 v0, v[2:4], s[2:3]
|
|
; GFX1250-SDAG-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-GISEL-LABEL: flat_store_saddr_v3i32_zext_vgpr:
|
|
; GFX1250-GISEL: ; %bb.0:
|
|
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
|
|
; GFX1250-GISEL-NEXT: v_mov_b32_e32 v6, v3
|
|
; GFX1250-GISEL-NEXT: flat_store_b96 v0, v[4:6], s[2:3]
|
|
; GFX1250-GISEL-NEXT: s_endpgm
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
store <3 x i32> %data, ptr %gep0
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @flat_store_saddr_v3i32_zext_vgpr_offset_neg128(ptr inreg %sbase, i32 %voffset, <3 x i32> %data) {
|
|
; GFX1250-SDAG-LABEL: flat_store_saddr_v3i32_zext_vgpr_offset_neg128:
|
|
; GFX1250-SDAG: ; %bb.0:
|
|
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v4, v3 :: v_dual_mov_b32 v3, v2
|
|
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, v1
|
|
; GFX1250-SDAG-NEXT: flat_store_b96 v0, v[2:4], s[2:3] offset:-128
|
|
; GFX1250-SDAG-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-GISEL-LABEL: flat_store_saddr_v3i32_zext_vgpr_offset_neg128:
|
|
; GFX1250-GISEL: ; %bb.0:
|
|
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
|
|
; GFX1250-GISEL-NEXT: v_mov_b32_e32 v6, v3
|
|
; GFX1250-GISEL-NEXT: flat_store_b96 v0, v[4:6], s[2:3] offset:-128
|
|
; GFX1250-GISEL-NEXT: s_endpgm
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
%gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128
|
|
store <3 x i32> %data, ptr %gep1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @flat_store_saddr_v3f32_zext_vgpr(ptr inreg %sbase, i32 %voffset, <3 x float> %data) {
|
|
; GFX1250-SDAG-LABEL: flat_store_saddr_v3f32_zext_vgpr:
|
|
; GFX1250-SDAG: ; %bb.0:
|
|
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v4, v3 :: v_dual_mov_b32 v3, v2
|
|
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, v1
|
|
; GFX1250-SDAG-NEXT: flat_store_b96 v0, v[2:4], s[2:3]
|
|
; GFX1250-SDAG-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-GISEL-LABEL: flat_store_saddr_v3f32_zext_vgpr:
|
|
; GFX1250-GISEL: ; %bb.0:
|
|
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
|
|
; GFX1250-GISEL-NEXT: v_mov_b32_e32 v6, v3
|
|
; GFX1250-GISEL-NEXT: flat_store_b96 v0, v[4:6], s[2:3]
|
|
; GFX1250-GISEL-NEXT: s_endpgm
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
store <3 x float> %data, ptr %gep0
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @flat_store_saddr_v3f32_zext_vgpr_offset_neg128(ptr inreg %sbase, i32 %voffset, <3 x float> %data) {
|
|
; GFX1250-SDAG-LABEL: flat_store_saddr_v3f32_zext_vgpr_offset_neg128:
|
|
; GFX1250-SDAG: ; %bb.0:
|
|
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v4, v3 :: v_dual_mov_b32 v3, v2
|
|
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, v1
|
|
; GFX1250-SDAG-NEXT: flat_store_b96 v0, v[2:4], s[2:3] offset:-128
|
|
; GFX1250-SDAG-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-GISEL-LABEL: flat_store_saddr_v3f32_zext_vgpr_offset_neg128:
|
|
; GFX1250-GISEL: ; %bb.0:
|
|
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
|
|
; GFX1250-GISEL-NEXT: v_mov_b32_e32 v6, v3
|
|
; GFX1250-GISEL-NEXT: flat_store_b96 v0, v[4:6], s[2:3] offset:-128
|
|
; GFX1250-GISEL-NEXT: s_endpgm
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
%gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128
|
|
store <3 x float> %data, ptr %gep1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @flat_store_saddr_v6i16_zext_vgpr(ptr inreg %sbase, i32 %voffset, <6 x i16> %data) {
|
|
; GFX1250-SDAG-LABEL: flat_store_saddr_v6i16_zext_vgpr:
|
|
; GFX1250-SDAG: ; %bb.0:
|
|
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v4, v3 :: v_dual_mov_b32 v3, v2
|
|
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, v1
|
|
; GFX1250-SDAG-NEXT: flat_store_b96 v0, v[2:4], s[2:3]
|
|
; GFX1250-SDAG-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-GISEL-LABEL: flat_store_saddr_v6i16_zext_vgpr:
|
|
; GFX1250-GISEL: ; %bb.0:
|
|
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
|
|
; GFX1250-GISEL-NEXT: v_mov_b32_e32 v6, v3
|
|
; GFX1250-GISEL-NEXT: flat_store_b96 v0, v[4:6], s[2:3]
|
|
; GFX1250-GISEL-NEXT: s_endpgm
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
store <6 x i16> %data, ptr %gep0
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @flat_store_saddr_v6i16_zext_vgpr_offset_neg128(ptr inreg %sbase, i32 %voffset, <6 x i16> %data) {
|
|
; GFX1250-SDAG-LABEL: flat_store_saddr_v6i16_zext_vgpr_offset_neg128:
|
|
; GFX1250-SDAG: ; %bb.0:
|
|
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v4, v3 :: v_dual_mov_b32 v3, v2
|
|
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, v1
|
|
; GFX1250-SDAG-NEXT: flat_store_b96 v0, v[2:4], s[2:3] offset:-128
|
|
; GFX1250-SDAG-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-GISEL-LABEL: flat_store_saddr_v6i16_zext_vgpr_offset_neg128:
|
|
; GFX1250-GISEL: ; %bb.0:
|
|
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
|
|
; GFX1250-GISEL-NEXT: v_mov_b32_e32 v6, v3
|
|
; GFX1250-GISEL-NEXT: flat_store_b96 v0, v[4:6], s[2:3] offset:-128
|
|
; GFX1250-GISEL-NEXT: s_endpgm
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
%gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128
|
|
store <6 x i16> %data, ptr %gep1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @flat_store_saddr_v6f16_zext_vgpr(ptr inreg %sbase, i32 %voffset, <6 x half> %data) {
|
|
; GFX1250-SDAG-LABEL: flat_store_saddr_v6f16_zext_vgpr:
|
|
; GFX1250-SDAG: ; %bb.0:
|
|
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v4, v3 :: v_dual_mov_b32 v3, v2
|
|
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, v1
|
|
; GFX1250-SDAG-NEXT: flat_store_b96 v0, v[2:4], s[2:3]
|
|
; GFX1250-SDAG-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-GISEL-LABEL: flat_store_saddr_v6f16_zext_vgpr:
|
|
; GFX1250-GISEL: ; %bb.0:
|
|
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
|
|
; GFX1250-GISEL-NEXT: v_mov_b32_e32 v6, v3
|
|
; GFX1250-GISEL-NEXT: flat_store_b96 v0, v[4:6], s[2:3]
|
|
; GFX1250-GISEL-NEXT: s_endpgm
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
store <6 x half> %data, ptr %gep0
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @flat_store_saddr_v6f16_zext_vgpr_offset_neg128(ptr inreg %sbase, i32 %voffset, <6 x half> %data) {
|
|
; GFX1250-SDAG-LABEL: flat_store_saddr_v6f16_zext_vgpr_offset_neg128:
|
|
; GFX1250-SDAG: ; %bb.0:
|
|
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v4, v3 :: v_dual_mov_b32 v3, v2
|
|
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, v1
|
|
; GFX1250-SDAG-NEXT: flat_store_b96 v0, v[2:4], s[2:3] offset:-128
|
|
; GFX1250-SDAG-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-GISEL-LABEL: flat_store_saddr_v6f16_zext_vgpr_offset_neg128:
|
|
; GFX1250-GISEL: ; %bb.0:
|
|
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
|
|
; GFX1250-GISEL-NEXT: v_mov_b32_e32 v6, v3
|
|
; GFX1250-GISEL-NEXT: flat_store_b96 v0, v[4:6], s[2:3] offset:-128
|
|
; GFX1250-GISEL-NEXT: s_endpgm
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
%gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128
|
|
store <6 x half> %data, ptr %gep1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @flat_store_saddr_v4i32_zext_vgpr(ptr inreg %sbase, i32 %voffset, <4 x i32> %data) {
|
|
; GFX1250-SDAG-LABEL: flat_store_saddr_v4i32_zext_vgpr:
|
|
; GFX1250-SDAG: ; %bb.0:
|
|
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
|
|
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
|
|
; GFX1250-SDAG-NEXT: flat_store_b128 v0, v[2:5], s[2:3]
|
|
; GFX1250-SDAG-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-GISEL-LABEL: flat_store_saddr_v4i32_zext_vgpr:
|
|
; GFX1250-GISEL: ; %bb.0:
|
|
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v6, v1 :: v_dual_mov_b32 v7, v2
|
|
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v4
|
|
; GFX1250-GISEL-NEXT: flat_store_b128 v0, v[6:9], s[2:3]
|
|
; GFX1250-GISEL-NEXT: s_endpgm
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
store <4 x i32> %data, ptr %gep0
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @flat_store_saddr_v4i32_zext_vgpr_offset_neg128(ptr inreg %sbase, i32 %voffset, <4 x i32> %data) {
|
|
; GFX1250-SDAG-LABEL: flat_store_saddr_v4i32_zext_vgpr_offset_neg128:
|
|
; GFX1250-SDAG: ; %bb.0:
|
|
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
|
|
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
|
|
; GFX1250-SDAG-NEXT: flat_store_b128 v0, v[2:5], s[2:3] offset:-128
|
|
; GFX1250-SDAG-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-GISEL-LABEL: flat_store_saddr_v4i32_zext_vgpr_offset_neg128:
|
|
; GFX1250-GISEL: ; %bb.0:
|
|
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v6, v1 :: v_dual_mov_b32 v7, v2
|
|
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v4
|
|
; GFX1250-GISEL-NEXT: flat_store_b128 v0, v[6:9], s[2:3] offset:-128
|
|
; GFX1250-GISEL-NEXT: s_endpgm
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
%gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128
|
|
store <4 x i32> %data, ptr %gep1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @flat_store_saddr_v4f32_zext_vgpr(ptr inreg %sbase, i32 %voffset, <4 x float> %data) {
|
|
; GFX1250-SDAG-LABEL: flat_store_saddr_v4f32_zext_vgpr:
|
|
; GFX1250-SDAG: ; %bb.0:
|
|
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
|
|
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
|
|
; GFX1250-SDAG-NEXT: flat_store_b128 v0, v[2:5], s[2:3]
|
|
; GFX1250-SDAG-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-GISEL-LABEL: flat_store_saddr_v4f32_zext_vgpr:
|
|
; GFX1250-GISEL: ; %bb.0:
|
|
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v6, v1 :: v_dual_mov_b32 v7, v2
|
|
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v4
|
|
; GFX1250-GISEL-NEXT: flat_store_b128 v0, v[6:9], s[2:3]
|
|
; GFX1250-GISEL-NEXT: s_endpgm
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
store <4 x float> %data, ptr %gep0
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @flat_store_saddr_v4f32_zext_vgpr_offset_neg128(ptr inreg %sbase, i32 %voffset, <4 x float> %data) {
|
|
; GFX1250-SDAG-LABEL: flat_store_saddr_v4f32_zext_vgpr_offset_neg128:
|
|
; GFX1250-SDAG: ; %bb.0:
|
|
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
|
|
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
|
|
; GFX1250-SDAG-NEXT: flat_store_b128 v0, v[2:5], s[2:3] offset:-128
|
|
; GFX1250-SDAG-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-GISEL-LABEL: flat_store_saddr_v4f32_zext_vgpr_offset_neg128:
|
|
; GFX1250-GISEL: ; %bb.0:
|
|
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v6, v1 :: v_dual_mov_b32 v7, v2
|
|
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v4
|
|
; GFX1250-GISEL-NEXT: flat_store_b128 v0, v[6:9], s[2:3] offset:-128
|
|
; GFX1250-GISEL-NEXT: s_endpgm
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
%gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128
|
|
store <4 x float> %data, ptr %gep1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @flat_store_saddr_v2i64_zext_vgpr(ptr inreg %sbase, i32 %voffset, <2 x i64> %data) {
|
|
; GFX1250-SDAG-LABEL: flat_store_saddr_v2i64_zext_vgpr:
|
|
; GFX1250-SDAG: ; %bb.0:
|
|
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
|
|
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
|
|
; GFX1250-SDAG-NEXT: flat_store_b128 v0, v[2:5], s[2:3]
|
|
; GFX1250-SDAG-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-GISEL-LABEL: flat_store_saddr_v2i64_zext_vgpr:
|
|
; GFX1250-GISEL: ; %bb.0:
|
|
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v6, v1 :: v_dual_mov_b32 v7, v2
|
|
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v4
|
|
; GFX1250-GISEL-NEXT: flat_store_b128 v0, v[6:9], s[2:3]
|
|
; GFX1250-GISEL-NEXT: s_endpgm
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
store <2 x i64> %data, ptr %gep0
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @flat_store_saddr_v2i64_zext_vgpr_offset_neg128(ptr inreg %sbase, i32 %voffset, <2 x i64> %data) {
|
|
; GFX1250-SDAG-LABEL: flat_store_saddr_v2i64_zext_vgpr_offset_neg128:
|
|
; GFX1250-SDAG: ; %bb.0:
|
|
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
|
|
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
|
|
; GFX1250-SDAG-NEXT: flat_store_b128 v0, v[2:5], s[2:3] offset:-128
|
|
; GFX1250-SDAG-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-GISEL-LABEL: flat_store_saddr_v2i64_zext_vgpr_offset_neg128:
|
|
; GFX1250-GISEL: ; %bb.0:
|
|
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v6, v1 :: v_dual_mov_b32 v7, v2
|
|
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v4
|
|
; GFX1250-GISEL-NEXT: flat_store_b128 v0, v[6:9], s[2:3] offset:-128
|
|
; GFX1250-GISEL-NEXT: s_endpgm
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
%gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128
|
|
store <2 x i64> %data, ptr %gep1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @flat_store_saddr_v2f64_zext_vgpr(ptr inreg %sbase, i32 %voffset, <2 x double> %data) {
|
|
; GFX1250-SDAG-LABEL: flat_store_saddr_v2f64_zext_vgpr:
|
|
; GFX1250-SDAG: ; %bb.0:
|
|
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
|
|
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
|
|
; GFX1250-SDAG-NEXT: flat_store_b128 v0, v[2:5], s[2:3]
|
|
; GFX1250-SDAG-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-GISEL-LABEL: flat_store_saddr_v2f64_zext_vgpr:
|
|
; GFX1250-GISEL: ; %bb.0:
|
|
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v6, v1 :: v_dual_mov_b32 v7, v2
|
|
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v4
|
|
; GFX1250-GISEL-NEXT: flat_store_b128 v0, v[6:9], s[2:3]
|
|
; GFX1250-GISEL-NEXT: s_endpgm
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
store <2 x double> %data, ptr %gep0
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @flat_store_saddr_v2f64_zext_vgpr_offset_neg128(ptr inreg %sbase, i32 %voffset, <2 x double> %data) {
|
|
; GFX1250-SDAG-LABEL: flat_store_saddr_v2f64_zext_vgpr_offset_neg128:
|
|
; GFX1250-SDAG: ; %bb.0:
|
|
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
|
|
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
|
|
; GFX1250-SDAG-NEXT: flat_store_b128 v0, v[2:5], s[2:3] offset:-128
|
|
; GFX1250-SDAG-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-GISEL-LABEL: flat_store_saddr_v2f64_zext_vgpr_offset_neg128:
|
|
; GFX1250-GISEL: ; %bb.0:
|
|
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v6, v1 :: v_dual_mov_b32 v7, v2
|
|
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v4
|
|
; GFX1250-GISEL-NEXT: flat_store_b128 v0, v[6:9], s[2:3] offset:-128
|
|
; GFX1250-GISEL-NEXT: s_endpgm
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
%gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128
|
|
store <2 x double> %data, ptr %gep1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @flat_store_saddr_v8i16_zext_vgpr(ptr inreg %sbase, i32 %voffset, <8 x i16> %data) {
|
|
; GFX1250-SDAG-LABEL: flat_store_saddr_v8i16_zext_vgpr:
|
|
; GFX1250-SDAG: ; %bb.0:
|
|
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
|
|
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
|
|
; GFX1250-SDAG-NEXT: flat_store_b128 v0, v[2:5], s[2:3]
|
|
; GFX1250-SDAG-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-GISEL-LABEL: flat_store_saddr_v8i16_zext_vgpr:
|
|
; GFX1250-GISEL: ; %bb.0:
|
|
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v6, v1 :: v_dual_mov_b32 v7, v2
|
|
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v4
|
|
; GFX1250-GISEL-NEXT: flat_store_b128 v0, v[6:9], s[2:3]
|
|
; GFX1250-GISEL-NEXT: s_endpgm
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
store <8 x i16> %data, ptr %gep0
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @flat_store_saddr_v8i16_zext_vgpr_offset_neg128(ptr inreg %sbase, i32 %voffset, <8 x i16> %data) {
|
|
; GFX1250-SDAG-LABEL: flat_store_saddr_v8i16_zext_vgpr_offset_neg128:
|
|
; GFX1250-SDAG: ; %bb.0:
|
|
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
|
|
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
|
|
; GFX1250-SDAG-NEXT: flat_store_b128 v0, v[2:5], s[2:3] offset:-128
|
|
; GFX1250-SDAG-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-GISEL-LABEL: flat_store_saddr_v8i16_zext_vgpr_offset_neg128:
|
|
; GFX1250-GISEL: ; %bb.0:
|
|
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v6, v1 :: v_dual_mov_b32 v7, v2
|
|
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v4
|
|
; GFX1250-GISEL-NEXT: flat_store_b128 v0, v[6:9], s[2:3] offset:-128
|
|
; GFX1250-GISEL-NEXT: s_endpgm
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
%gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128
|
|
store <8 x i16> %data, ptr %gep1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @flat_store_saddr_v8f16_zext_vgpr(ptr inreg %sbase, i32 %voffset, <8 x half> %data) {
|
|
; GFX1250-SDAG-LABEL: flat_store_saddr_v8f16_zext_vgpr:
|
|
; GFX1250-SDAG: ; %bb.0:
|
|
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
|
|
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
|
|
; GFX1250-SDAG-NEXT: flat_store_b128 v0, v[2:5], s[2:3]
|
|
; GFX1250-SDAG-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-GISEL-LABEL: flat_store_saddr_v8f16_zext_vgpr:
|
|
; GFX1250-GISEL: ; %bb.0:
|
|
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v6, v1 :: v_dual_mov_b32 v7, v2
|
|
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v4
|
|
; GFX1250-GISEL-NEXT: flat_store_b128 v0, v[6:9], s[2:3]
|
|
; GFX1250-GISEL-NEXT: s_endpgm
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
store <8 x half> %data, ptr %gep0
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @flat_store_saddr_v8f16_zext_vgpr_offset_neg128(ptr inreg %sbase, i32 %voffset, <8 x half> %data) {
|
|
; GFX1250-SDAG-LABEL: flat_store_saddr_v8f16_zext_vgpr_offset_neg128:
|
|
; GFX1250-SDAG: ; %bb.0:
|
|
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
|
|
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
|
|
; GFX1250-SDAG-NEXT: flat_store_b128 v0, v[2:5], s[2:3] offset:-128
|
|
; GFX1250-SDAG-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-GISEL-LABEL: flat_store_saddr_v8f16_zext_vgpr_offset_neg128:
|
|
; GFX1250-GISEL: ; %bb.0:
|
|
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v6, v1 :: v_dual_mov_b32 v7, v2
|
|
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v4
|
|
; GFX1250-GISEL-NEXT: flat_store_b128 v0, v[6:9], s[2:3] offset:-128
|
|
; GFX1250-GISEL-NEXT: s_endpgm
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
%gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128
|
|
store <8 x half> %data, ptr %gep1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @flat_store_saddr_v2p1_zext_vgpr(ptr inreg %sbase, i32 %voffset, <2 x ptr> %data) {
|
|
; GFX1250-SDAG-LABEL: flat_store_saddr_v2p1_zext_vgpr:
|
|
; GFX1250-SDAG: ; %bb.0:
|
|
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
|
|
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
|
|
; GFX1250-SDAG-NEXT: flat_store_b128 v0, v[2:5], s[2:3]
|
|
; GFX1250-SDAG-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-GISEL-LABEL: flat_store_saddr_v2p1_zext_vgpr:
|
|
; GFX1250-GISEL: ; %bb.0:
|
|
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v6, v1 :: v_dual_mov_b32 v7, v2
|
|
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v4
|
|
; GFX1250-GISEL-NEXT: flat_store_b128 v0, v[6:9], s[2:3]
|
|
; GFX1250-GISEL-NEXT: s_endpgm
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
store <2 x ptr> %data, ptr %gep0
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @flat_store_saddr_v2p1_zext_vgpr_offset_neg128(ptr inreg %sbase, i32 %voffset, <2 x ptr> %data) {
|
|
; GFX1250-SDAG-LABEL: flat_store_saddr_v2p1_zext_vgpr_offset_neg128:
|
|
; GFX1250-SDAG: ; %bb.0:
|
|
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
|
|
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
|
|
; GFX1250-SDAG-NEXT: flat_store_b128 v0, v[2:5], s[2:3] offset:-128
|
|
; GFX1250-SDAG-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-GISEL-LABEL: flat_store_saddr_v2p1_zext_vgpr_offset_neg128:
|
|
; GFX1250-GISEL: ; %bb.0:
|
|
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v6, v1 :: v_dual_mov_b32 v7, v2
|
|
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v4
|
|
; GFX1250-GISEL-NEXT: flat_store_b128 v0, v[6:9], s[2:3] offset:-128
|
|
; GFX1250-GISEL-NEXT: s_endpgm
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
%gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128
|
|
store <2 x ptr> %data, ptr %gep1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @flat_store_saddr_v4p3_zext_vgpr(ptr inreg %sbase, i32 %voffset, <4 x ptr addrspace(3)> %data) {
|
|
; GFX1250-SDAG-LABEL: flat_store_saddr_v4p3_zext_vgpr:
|
|
; GFX1250-SDAG: ; %bb.0:
|
|
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
|
|
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
|
|
; GFX1250-SDAG-NEXT: flat_store_b128 v0, v[2:5], s[2:3]
|
|
; GFX1250-SDAG-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-GISEL-LABEL: flat_store_saddr_v4p3_zext_vgpr:
|
|
; GFX1250-GISEL: ; %bb.0:
|
|
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v6, v1 :: v_dual_mov_b32 v7, v2
|
|
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v4
|
|
; GFX1250-GISEL-NEXT: flat_store_b128 v0, v[6:9], s[2:3]
|
|
; GFX1250-GISEL-NEXT: s_endpgm
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
store <4 x ptr addrspace(3)> %data, ptr %gep0
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @flat_store_saddr_v4p3_zext_vgpr_offset_neg128(ptr inreg %sbase, i32 %voffset, <4 x ptr addrspace(3)> %data) {
|
|
; GFX1250-SDAG-LABEL: flat_store_saddr_v4p3_zext_vgpr_offset_neg128:
|
|
; GFX1250-SDAG: ; %bb.0:
|
|
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
|
|
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
|
|
; GFX1250-SDAG-NEXT: flat_store_b128 v0, v[2:5], s[2:3] offset:-128
|
|
; GFX1250-SDAG-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-GISEL-LABEL: flat_store_saddr_v4p3_zext_vgpr_offset_neg128:
|
|
; GFX1250-GISEL: ; %bb.0:
|
|
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v6, v1 :: v_dual_mov_b32 v7, v2
|
|
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v4
|
|
; GFX1250-GISEL-NEXT: flat_store_b128 v0, v[6:9], s[2:3] offset:-128
|
|
; GFX1250-GISEL-NEXT: s_endpgm
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
%gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128
|
|
store <4 x ptr addrspace(3)> %data, ptr %gep1
|
|
ret void
|
|
}
|
|
|
|
; --------------------------------------------------------------------------------
|
|
; Atomic store
|
|
; --------------------------------------------------------------------------------
|
|
|
|
define amdgpu_ps void @atomic_flat_store_saddr_i32_zext_vgpr(ptr inreg %sbase, i32 %voffset, i32 %data) {
|
|
; GFX1250-LABEL: atomic_flat_store_saddr_i32_zext_vgpr:
|
|
; GFX1250: ; %bb.0:
|
|
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
|
|
; GFX1250-NEXT: s_wait_storecnt 0x0
|
|
; GFX1250-NEXT: flat_store_b32 v0, v1, s[2:3] scope:SCOPE_SYS
|
|
; GFX1250-NEXT: s_endpgm
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
store atomic i32 %data, ptr %gep0 seq_cst, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @atomic_flat_store_saddr_i32_zext_vgpr_offset_neg128(ptr inreg %sbase, i32 %voffset, i32 %data) {
|
|
; GFX1250-LABEL: atomic_flat_store_saddr_i32_zext_vgpr_offset_neg128:
|
|
; GFX1250: ; %bb.0:
|
|
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
|
|
; GFX1250-NEXT: s_wait_storecnt 0x0
|
|
; GFX1250-NEXT: flat_store_b32 v0, v1, s[2:3] offset:-128 scope:SCOPE_SYS
|
|
; GFX1250-NEXT: s_endpgm
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
%gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128
|
|
store atomic i32 %data, ptr %gep1 seq_cst, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @atomic_flat_store_saddr_i64_zext_vgpr(ptr inreg %sbase, i32 %voffset, i64 %data) {
|
|
; GFX1250-SDAG-LABEL: atomic_flat_store_saddr_i64_zext_vgpr:
|
|
; GFX1250-SDAG: ; %bb.0:
|
|
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
|
|
; GFX1250-SDAG-NEXT: global_wb scope:SCOPE_SYS
|
|
; GFX1250-SDAG-NEXT: s_wait_storecnt 0x0
|
|
; GFX1250-SDAG-NEXT: flat_store_b64 v0, v[2:3], s[2:3] scope:SCOPE_SYS
|
|
; GFX1250-SDAG-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-GISEL-LABEL: atomic_flat_store_saddr_i64_zext_vgpr:
|
|
; GFX1250-GISEL: ; %bb.0:
|
|
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
|
|
; GFX1250-GISEL-NEXT: global_wb scope:SCOPE_SYS
|
|
; GFX1250-GISEL-NEXT: s_wait_storecnt 0x0
|
|
; GFX1250-GISEL-NEXT: flat_store_b64 v0, v[4:5], s[2:3] scope:SCOPE_SYS
|
|
; GFX1250-GISEL-NEXT: s_endpgm
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
store atomic i64 %data, ptr %gep0 seq_cst, align 8
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @atomic_flat_store_saddr_i64_zext_vgpr_offset_neg128(ptr inreg %sbase, i32 %voffset, i64 %data) {
|
|
; GFX1250-SDAG-LABEL: atomic_flat_store_saddr_i64_zext_vgpr_offset_neg128:
|
|
; GFX1250-SDAG: ; %bb.0:
|
|
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
|
|
; GFX1250-SDAG-NEXT: global_wb scope:SCOPE_SYS
|
|
; GFX1250-SDAG-NEXT: s_wait_storecnt 0x0
|
|
; GFX1250-SDAG-NEXT: flat_store_b64 v0, v[2:3], s[2:3] offset:-128 scope:SCOPE_SYS
|
|
; GFX1250-SDAG-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-GISEL-LABEL: atomic_flat_store_saddr_i64_zext_vgpr_offset_neg128:
|
|
; GFX1250-GISEL: ; %bb.0:
|
|
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
|
|
; GFX1250-GISEL-NEXT: global_wb scope:SCOPE_SYS
|
|
; GFX1250-GISEL-NEXT: s_wait_storecnt 0x0
|
|
; GFX1250-GISEL-NEXT: flat_store_b64 v0, v[4:5], s[2:3] offset:-128 scope:SCOPE_SYS
|
|
; GFX1250-GISEL-NEXT: s_endpgm
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
%gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128
|
|
store atomic i64 %data, ptr %gep1 seq_cst, align 8
|
|
ret void
|
|
}
|
|
|
|
; --------------------------------------------------------------------------------
|
|
; D16 HI store (hi 16)
|
|
; --------------------------------------------------------------------------------
|
|
|
|
define amdgpu_ps void @flat_store_saddr_i16_d16hi_zext_vgpr(ptr inreg %sbase, i32 %voffset, <2 x i16> %data) {
|
|
; GFX1250-SDAG-LABEL: flat_store_saddr_i16_d16hi_zext_vgpr:
|
|
; GFX1250-SDAG: ; %bb.0:
|
|
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-SDAG-NEXT: flat_store_d16_hi_b16 v0, v1, s[2:3]
|
|
; GFX1250-SDAG-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-GISEL-FAKE16-LABEL: flat_store_saddr_i16_d16hi_zext_vgpr:
|
|
; GFX1250-GISEL-FAKE16: ; %bb.0:
|
|
; GFX1250-GISEL-FAKE16-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-GISEL-FAKE16-NEXT: flat_store_d16_hi_b16 v0, v1, s[2:3]
|
|
; GFX1250-GISEL-FAKE16-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-GISEL-REAL16-LABEL: flat_store_saddr_i16_d16hi_zext_vgpr:
|
|
; GFX1250-GISEL-REAL16: ; %bb.0:
|
|
; GFX1250-GISEL-REAL16-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-GISEL-REAL16-NEXT: v_lshrrev_b32_e32 v1, 16, v1
|
|
; GFX1250-GISEL-REAL16-NEXT: flat_store_b16 v0, v1, s[2:3]
|
|
; GFX1250-GISEL-REAL16-NEXT: s_endpgm
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
%data.hi = extractelement <2 x i16> %data, i32 1
|
|
store i16 %data.hi, ptr %gep0
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @flat_store_saddr_i16_d16hi_zext_vgpr_offset_neg128(ptr inreg %sbase, i32 %voffset, <2 x i16> %data) {
|
|
; GFX1250-SDAG-LABEL: flat_store_saddr_i16_d16hi_zext_vgpr_offset_neg128:
|
|
; GFX1250-SDAG: ; %bb.0:
|
|
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-SDAG-NEXT: flat_store_d16_hi_b16 v0, v1, s[2:3] offset:-128
|
|
; GFX1250-SDAG-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-GISEL-FAKE16-LABEL: flat_store_saddr_i16_d16hi_zext_vgpr_offset_neg128:
|
|
; GFX1250-GISEL-FAKE16: ; %bb.0:
|
|
; GFX1250-GISEL-FAKE16-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-GISEL-FAKE16-NEXT: flat_store_d16_hi_b16 v0, v1, s[2:3] offset:-128
|
|
; GFX1250-GISEL-FAKE16-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-GISEL-REAL16-LABEL: flat_store_saddr_i16_d16hi_zext_vgpr_offset_neg128:
|
|
; GFX1250-GISEL-REAL16: ; %bb.0:
|
|
; GFX1250-GISEL-REAL16-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-GISEL-REAL16-NEXT: v_lshrrev_b32_e32 v1, 16, v1
|
|
; GFX1250-GISEL-REAL16-NEXT: flat_store_b16 v0, v1, s[2:3] offset:-128
|
|
; GFX1250-GISEL-REAL16-NEXT: s_endpgm
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
%gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128
|
|
%data.hi = extractelement <2 x i16> %data, i32 1
|
|
store i16 %data.hi, ptr %gep1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @flat_store_saddr_i16_d16hi_trunci8_zext_vgpr(ptr inreg %sbase, i32 %voffset, <2 x i16> %data) {
|
|
; GFX1250-LABEL: flat_store_saddr_i16_d16hi_trunci8_zext_vgpr:
|
|
; GFX1250: ; %bb.0:
|
|
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-NEXT: flat_store_d16_hi_b8 v0, v1, s[2:3]
|
|
; GFX1250-NEXT: s_endpgm
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
%data.hi = extractelement <2 x i16> %data, i32 1
|
|
%data.hi.trunc = trunc i16 %data.hi to i8
|
|
store i8 %data.hi.trunc, ptr %gep0
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @flat_store_saddr_i16_d16hi_trunci8_zext_vgpr_offset_neg128(ptr inreg %sbase, i32 %voffset, <2 x i16> %data) {
|
|
; GFX1250-LABEL: flat_store_saddr_i16_d16hi_trunci8_zext_vgpr_offset_neg128:
|
|
; GFX1250: ; %bb.0:
|
|
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-NEXT: flat_store_d16_hi_b8 v0, v1, s[2:3] offset:-128
|
|
; GFX1250-NEXT: s_endpgm
|
|
%zext.offset = zext i32 %voffset to i64
|
|
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
|
%gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128
|
|
%data.hi = extractelement <2 x i16> %data, i32 1
|
|
%data.hi.trunc = trunc i16 %data.hi to i8
|
|
store i8 %data.hi.trunc, ptr %gep1
|
|
ret void
|
|
}
|
|
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
|
|
; GFX1250-SDAG-FAKE16: {{.*}}
|
|
; GFX1250-SDAG-REAL16: {{.*}}
|