Keep bf16/f16 values encoded as the low half of a 32-bit register, instead of promoting to float. This avoids unwanted FP effects from the fpext/fptrunc which should not be implied by just passing an argument. This also fixes ABI divergence between SelectionDAG and GlobalISel. I've wanted to make this change for ages, and failed the last few times. The main complication was the hack to return shader integer types in SGPRs, which now needs to inspect the underlying IR type.
234 lines
8.8 KiB
LLVM
234 lines
8.8 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck -check-prefixes=GCN,GFX7 %s
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
|
|
|
|
; Demonstrate that the conversion of bitmasks affecting the sign bit on integers to srcmods
|
|
; does not apply to canonicalizing instructions.
|
|
|
|
define double @v_uitofp_i32_to_f64_abs(i32 %arg0) nounwind {
|
|
; GCN-LABEL: v_uitofp_i32_to_f64_abs:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
|
|
; GCN-NEXT: v_cvt_f64_u32_e32 v[0:1], v0
|
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_uitofp_i32_to_f64_abs:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_cvt_f64_u32_e32 v[0:1], v0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%arg0.abs = and i32 %arg0, u0x7fffffff
|
|
%cvt = uitofp i32 %arg0.abs to double
|
|
ret double %cvt
|
|
}
|
|
|
|
define double @v_uitofp_i32_to_f64_neg(i32 %arg0) nounwind {
|
|
; GCN-LABEL: v_uitofp_i32_to_f64_neg:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: v_and_b32_e32 v0, 0x80000000, v0
|
|
; GCN-NEXT: v_cvt_f64_u32_e32 v[0:1], v0
|
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: v_uitofp_i32_to_f64_neg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_and_b32_e32 v0, 0x80000000, v0
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_cvt_f64_u32_e32 v[0:1], v0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%arg0.neg = and i32 %arg0, u0x80000000
|
|
%cvt = uitofp i32 %arg0.neg to double
|
|
ret double %cvt
|
|
}
|
|
|
|
define double @s_uitofp_i32_to_f64_abs(i32 inreg %arg0) nounwind {
|
|
; GCN-LABEL: s_uitofp_i32_to_f64_abs:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: s_bitset0_b32 s16, 31
|
|
; GCN-NEXT: v_cvt_f64_u32_e32 v[0:1], s16
|
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: s_uitofp_i32_to_f64_abs:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_bitset0_b32 s0, 31
|
|
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; GFX11-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%arg0.abs = and i32 %arg0, u0x7fffffff
|
|
%cvt = uitofp i32 %arg0.abs to double
|
|
ret double %cvt
|
|
}
|
|
|
|
define double @s_uitofp_i32_to_f64_neg(i32 inreg %arg0) nounwind {
|
|
; GCN-LABEL: s_uitofp_i32_to_f64_neg:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: s_and_b32 s4, s16, 0x80000000
|
|
; GCN-NEXT: v_cvt_f64_u32_e32 v[0:1], s4
|
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: s_uitofp_i32_to_f64_neg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_and_b32 s0, s0, 0x80000000
|
|
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; GFX11-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%arg0.neg = and i32 %arg0, u0x80000000
|
|
%cvt = uitofp i32 %arg0.neg to double
|
|
ret double %cvt
|
|
}
|
|
|
|
define half @v_uitofp_i16_to_f16_abs(i16 %arg0) nounwind {
|
|
; GFX7-LABEL: v_uitofp_i16_to_f16_abs:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_and_b32_e32 v0, 0x7fff, v0
|
|
; GFX7-NEXT: v_cvt_f32_u32_e32 v0, v0
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: v_uitofp_i16_to_f16_abs:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_and_b32_e32 v0, 0x7fff, v0
|
|
; GFX9-NEXT: v_cvt_f16_u16_e32 v0, v0
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-TRUE16-LABEL: v_uitofp_i16_to_f16_abs:
|
|
; GFX11-TRUE16: ; %bb.0:
|
|
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0x7fff, v0.l
|
|
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-TRUE16-NEXT: v_cvt_f16_u16_e32 v0.l, v0.l
|
|
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-FAKE16-LABEL: v_uitofp_i16_to_f16_abs:
|
|
; GFX11-FAKE16: ; %bb.0:
|
|
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0x7fff, v0
|
|
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-FAKE16-NEXT: v_cvt_f16_u16_e32 v0, v0
|
|
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
%arg0.abs = and i16 %arg0, u0x7fff
|
|
%cvt = uitofp i16 %arg0.abs to half
|
|
ret half %cvt
|
|
}
|
|
|
|
define half @v_uitofp_i16_to_f16_neg(i16 %arg0) nounwind {
|
|
; GFX7-LABEL: v_uitofp_i16_to_f16_neg:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_and_b32_e32 v0, 0x8000, v0
|
|
; GFX7-NEXT: v_cvt_f32_u32_e32 v0, v0
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: v_uitofp_i16_to_f16_neg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_and_b32_e32 v0, 0xffff8000, v0
|
|
; GFX9-NEXT: v_cvt_f16_u16_e32 v0, v0
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-TRUE16-LABEL: v_uitofp_i16_to_f16_neg:
|
|
; GFX11-TRUE16: ; %bb.0:
|
|
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0x8000, v0.l
|
|
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-TRUE16-NEXT: v_cvt_f16_u16_e32 v0.l, v0.l
|
|
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-FAKE16-LABEL: v_uitofp_i16_to_f16_neg:
|
|
; GFX11-FAKE16: ; %bb.0:
|
|
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff8000, v0
|
|
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-FAKE16-NEXT: v_cvt_f16_u16_e32 v0, v0
|
|
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
%arg0.neg = and i16 %arg0, u0x8000
|
|
%cvt = uitofp i16 %arg0.neg to half
|
|
ret half %cvt
|
|
}
|
|
|
|
define half @s_uitofp_i16_to_f16_abs(i16 inreg %arg0) nounwind {
|
|
; GFX7-LABEL: s_uitofp_i16_to_f16_abs:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: s_and_b32 s4, s16, 0x7fff
|
|
; GFX7-NEXT: v_cvt_f32_u32_e32 v0, s4
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: s_uitofp_i16_to_f16_abs:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: s_and_b32 s4, s16, 0x7fff
|
|
; GFX9-NEXT: v_cvt_f16_u16_e32 v0, s4
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-TRUE16-LABEL: s_uitofp_i16_to_f16_abs:
|
|
; GFX11-TRUE16: ; %bb.0:
|
|
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, 0x7fff
|
|
; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; GFX11-TRUE16-NEXT: v_cvt_f16_u16_e32 v0.l, s0
|
|
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-FAKE16-LABEL: s_uitofp_i16_to_f16_abs:
|
|
; GFX11-FAKE16: ; %bb.0:
|
|
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-FAKE16-NEXT: s_and_b32 s0, s0, 0x7fff
|
|
; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; GFX11-FAKE16-NEXT: v_cvt_f16_u16_e32 v0, s0
|
|
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
%arg0.abs = and i16 %arg0, u0x7fff
|
|
%cvt = uitofp i16 %arg0.abs to half
|
|
ret half %cvt
|
|
}
|
|
|
|
define half @s_uitofp_i16_to_f16_neg(i16 inreg %arg0) nounwind {
|
|
; GFX7-LABEL: s_uitofp_i16_to_f16_neg:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: s_and_b32 s4, s16, 0x8000
|
|
; GFX7-NEXT: v_cvt_f32_u32_e32 v0, s4
|
|
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: s_uitofp_i16_to_f16_neg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: s_and_b32 s4, s16, 0x8000
|
|
; GFX9-NEXT: v_cvt_f16_u16_e32 v0, s4
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-TRUE16-LABEL: s_uitofp_i16_to_f16_neg:
|
|
; GFX11-TRUE16: ; %bb.0:
|
|
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, 0x8000
|
|
; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; GFX11-TRUE16-NEXT: v_cvt_f16_u16_e32 v0.l, s0
|
|
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-FAKE16-LABEL: s_uitofp_i16_to_f16_neg:
|
|
; GFX11-FAKE16: ; %bb.0:
|
|
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-FAKE16-NEXT: s_and_b32 s0, s0, 0x8000
|
|
; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; GFX11-FAKE16-NEXT: v_cvt_f16_u16_e32 v0, s0
|
|
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
|
%arg0.neg = and i16 %arg0, u0x8000
|
|
%cvt = uitofp i16 %arg0.neg to half
|
|
ret half %cvt
|
|
}
|
|
|