Files
llvm-project/llvm/test/CodeGen/AMDGPU/integer-canonicalizing-src-modifiers.ll
Matt Arsenault 056e5a32c8 AMDGPU: Change ABI of 16-bit scalar values for gfx6/gfx7 (#175795)
Keep bf16/f16 values encoded as the low half of a 32-bit register,
instead of promoting to float. This avoids unwanted FP effects
from the fpext/fptrunc which should not be implied by just
passing an argument. This also fixes ABI divergence between
SelectionDAG and GlobalISel.

I've wanted to make this change for ages, and failed the last
few times. The main complication was the hack to return
shader integer types in SGPRs, which now needs to inspect
the underlying IR type.
2026-01-22 18:34:06 +00:00

234 lines
8.8 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck -check-prefixes=GCN,GFX7 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
; Demonstrate that the conversion of bitmasks affecting the sign bit on integers to srcmods
; does not apply to canonicalizing instructions.
define double @v_uitofp_i32_to_f64_abs(i32 %arg0) nounwind {
; GCN-LABEL: v_uitofp_i32_to_f64_abs:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
; GCN-NEXT: v_cvt_f64_u32_e32 v[0:1], v0
; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_uitofp_i32_to_f64_abs:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cvt_f64_u32_e32 v[0:1], v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%arg0.abs = and i32 %arg0, u0x7fffffff
%cvt = uitofp i32 %arg0.abs to double
ret double %cvt
}
define double @v_uitofp_i32_to_f64_neg(i32 %arg0) nounwind {
; GCN-LABEL: v_uitofp_i32_to_f64_neg:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_and_b32_e32 v0, 0x80000000, v0
; GCN-NEXT: v_cvt_f64_u32_e32 v[0:1], v0
; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_uitofp_i32_to_f64_neg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_and_b32_e32 v0, 0x80000000, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cvt_f64_u32_e32 v[0:1], v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%arg0.neg = and i32 %arg0, u0x80000000
%cvt = uitofp i32 %arg0.neg to double
ret double %cvt
}
define double @s_uitofp_i32_to_f64_abs(i32 inreg %arg0) nounwind {
; GCN-LABEL: s_uitofp_i32_to_f64_abs:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: s_bitset0_b32 s16, 31
; GCN-NEXT: v_cvt_f64_u32_e32 v[0:1], s16
; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: s_uitofp_i32_to_f64_abs:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_bitset0_b32 s0, 31
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%arg0.abs = and i32 %arg0, u0x7fffffff
%cvt = uitofp i32 %arg0.abs to double
ret double %cvt
}
define double @s_uitofp_i32_to_f64_neg(i32 inreg %arg0) nounwind {
; GCN-LABEL: s_uitofp_i32_to_f64_neg:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: s_and_b32 s4, s16, 0x80000000
; GCN-NEXT: v_cvt_f64_u32_e32 v[0:1], s4
; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: s_uitofp_i32_to_f64_neg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_and_b32 s0, s0, 0x80000000
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%arg0.neg = and i32 %arg0, u0x80000000
%cvt = uitofp i32 %arg0.neg to double
ret double %cvt
}
define half @v_uitofp_i16_to_f16_abs(i16 %arg0) nounwind {
; GFX7-LABEL: v_uitofp_i16_to_f16_abs:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_and_b32_e32 v0, 0x7fff, v0
; GFX7-NEXT: v_cvt_f32_u32_e32 v0, v0
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_uitofp_i16_to_f16_abs:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_and_b32_e32 v0, 0x7fff, v0
; GFX9-NEXT: v_cvt_f16_u16_e32 v0, v0
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-TRUE16-LABEL: v_uitofp_i16_to_f16_abs:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0x7fff, v0.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_cvt_f16_u16_e32 v0.l, v0.l
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-FAKE16-LABEL: v_uitofp_i16_to_f16_abs:
; GFX11-FAKE16: ; %bb.0:
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0x7fff, v0
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-FAKE16-NEXT: v_cvt_f16_u16_e32 v0, v0
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%arg0.abs = and i16 %arg0, u0x7fff
%cvt = uitofp i16 %arg0.abs to half
ret half %cvt
}
define half @v_uitofp_i16_to_f16_neg(i16 %arg0) nounwind {
; GFX7-LABEL: v_uitofp_i16_to_f16_neg:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_and_b32_e32 v0, 0x8000, v0
; GFX7-NEXT: v_cvt_f32_u32_e32 v0, v0
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_uitofp_i16_to_f16_neg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_and_b32_e32 v0, 0xffff8000, v0
; GFX9-NEXT: v_cvt_f16_u16_e32 v0, v0
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-TRUE16-LABEL: v_uitofp_i16_to_f16_neg:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0x8000, v0.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_cvt_f16_u16_e32 v0.l, v0.l
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-FAKE16-LABEL: v_uitofp_i16_to_f16_neg:
; GFX11-FAKE16: ; %bb.0:
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff8000, v0
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-FAKE16-NEXT: v_cvt_f16_u16_e32 v0, v0
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%arg0.neg = and i16 %arg0, u0x8000
%cvt = uitofp i16 %arg0.neg to half
ret half %cvt
}
define half @s_uitofp_i16_to_f16_abs(i16 inreg %arg0) nounwind {
; GFX7-LABEL: s_uitofp_i16_to_f16_abs:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: s_and_b32 s4, s16, 0x7fff
; GFX7-NEXT: v_cvt_f32_u32_e32 v0, s4
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_uitofp_i16_to_f16_abs:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_and_b32 s4, s16, 0x7fff
; GFX9-NEXT: v_cvt_f16_u16_e32 v0, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-TRUE16-LABEL: s_uitofp_i16_to_f16_abs:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, 0x7fff
; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: v_cvt_f16_u16_e32 v0.l, s0
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-FAKE16-LABEL: s_uitofp_i16_to_f16_abs:
; GFX11-FAKE16: ; %bb.0:
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-FAKE16-NEXT: s_and_b32 s0, s0, 0x7fff
; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-FAKE16-NEXT: v_cvt_f16_u16_e32 v0, s0
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%arg0.abs = and i16 %arg0, u0x7fff
%cvt = uitofp i16 %arg0.abs to half
ret half %cvt
}
define half @s_uitofp_i16_to_f16_neg(i16 inreg %arg0) nounwind {
; GFX7-LABEL: s_uitofp_i16_to_f16_neg:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: s_and_b32 s4, s16, 0x8000
; GFX7-NEXT: v_cvt_f32_u32_e32 v0, s4
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_uitofp_i16_to_f16_neg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_and_b32 s4, s16, 0x8000
; GFX9-NEXT: v_cvt_f16_u16_e32 v0, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-TRUE16-LABEL: s_uitofp_i16_to_f16_neg:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, 0x8000
; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: v_cvt_f16_u16_e32 v0.l, s0
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-FAKE16-LABEL: s_uitofp_i16_to_f16_neg:
; GFX11-FAKE16: ; %bb.0:
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-FAKE16-NEXT: s_and_b32 s0, s0, 0x8000
; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-FAKE16-NEXT: v_cvt_f16_u16_e32 v0, s0
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%arg0.neg = and i16 %arg0, u0x8000
%cvt = uitofp i16 %arg0.neg to half
ret half %cvt
}