Files
llvm-project/llvm/test/CodeGen/DirectX/WavePrefixSum.ll
Kai 751a546fa9 [HLSL][DXIL][SPIRV] WavePrefixSum intrinsic support (#167946)
Issue: https://github.com/llvm/llvm-project/issues/99172
- [x] Implement `WavePrefixSum` clang builtin
- [x] Link `WavePrefixSum` clang builtin with `hlsl_intrinsics.h`
- [x] Add sema checks for `WavePrefixSum` to
`CheckHLSLBuiltinFunctionCall` in `SemaChecking.cpp`
- [x] Add codegen for `WavePrefixSum` to `EmitHLSLBuiltinExpr` in
`CGBuiltin.cpp`
- [x] Add codegen tests to
`clang/test/CodeGenHLSL/builtins/WavePrefixSum.hlsl`
- [x] Add sema tests to
`clang/test/SemaHLSL/BuiltIns/WavePrefixSum-errors.hlsl`
- [x] Create the `int_dx_WavePrefixSum` intrinsic in
`IntrinsicsDirectX.td`
- [x] Create the `DXILOpMapping` of `int_dx_WavePrefixSum` to `121` in
`DXIL.td`
- [x] Create the `WavePrefixSum.ll` and `WavePrefixSum_errors.ll` tests
in `llvm/test/CodeGen/DirectX/`
- [x] Create the `int_spv_WavePrefixSum` intrinsic in
`IntrinsicsSPIRV.td`
- [x] In SPIRVInstructionSelector.cpp create the `WavePrefixSum`
lowering and map it to `int_spv_WavePrefixSum` in
`SPIRVInstructionSelector::selectIntrinsic`.
- [x] Create SPIR-V backend test case in
`llvm/test/CodeGen/SPIRV/hlsl-intrinsics/WavePrefixSum.ll`

I also added a new macro
`GENERATE_HLSL_INTRINSIC_FUNCTION_SELECT_UNSIGNED` in conjunction with
the new function `getUnsignedIntrinsicVariant` to make selecting
unsigned variants of the intrinsic easier. As a result, I was able to
replace `getWaveActiveSumIntrinsic`, `getWaveActiveMaxIntrinsic`, and
`getWaveActiveMinIntrinsic` using the new macro.
2026-02-03 03:00:45 -05:00

144 lines
5.5 KiB
LLVM

; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library < %s | FileCheck %s
; Test that for scalar values, WavePrefixSum maps down to the DirectX op
define noundef half @wave_prefix_sum_half(half noundef %expr) {
entry:
; CHECK: call half @dx.op.wavePrefixOp.f16(i32 121, half %expr, i8 0, i8 0)
%ret = call half @llvm.dx.wave.prefix.sum.f16(half %expr)
ret half %ret
}
define noundef float @wave_prefix_sum_float(float noundef %expr) {
entry:
; CHECK: call float @dx.op.wavePrefixOp.f32(i32 121, float %expr, i8 0, i8 0)
%ret = call float @llvm.dx.wave.prefix.sum.f32(float %expr)
ret float %ret
}
define noundef double @wave_prefix_sum_double(double noundef %expr) {
entry:
; CHECK: call double @dx.op.wavePrefixOp.f64(i32 121, double %expr, i8 0, i8 0)
%ret = call double @llvm.dx.wave.prefix.sum.f64(double %expr)
ret double %ret
}
define noundef i16 @wave_prefix_sum_i16(i16 noundef %expr) {
entry:
; CHECK: call i16 @dx.op.wavePrefixOp.i16(i32 121, i16 %expr, i8 0, i8 0)
%ret = call i16 @llvm.dx.wave.prefix.sum.i16(i16 %expr)
ret i16 %ret
}
define noundef i32 @wave_prefix_sum_i32(i32 noundef %expr) {
entry:
; CHECK: call i32 @dx.op.wavePrefixOp.i32(i32 121, i32 %expr, i8 0, i8 0)
%ret = call i32 @llvm.dx.wave.prefix.sum.i32(i32 %expr)
ret i32 %ret
}
define noundef i64 @wave_prefix_sum_i64(i64 noundef %expr) {
entry:
; CHECK: call i64 @dx.op.wavePrefixOp.i64(i32 121, i64 %expr, i8 0, i8 0)
%ret = call i64 @llvm.dx.wave.prefix.sum.i64(i64 %expr)
ret i64 %ret
}
define noundef i16 @wave_prefix_usum_i16(i16 noundef %expr) {
entry:
; CHECK: call i16 @dx.op.wavePrefixOp.i16(i32 121, i16 %expr, i8 0, i8 1)
%ret = call i16 @llvm.dx.wave.prefix.usum.i16(i16 %expr)
ret i16 %ret
}
define noundef i32 @wave_prefix_usum_i32(i32 noundef %expr) {
entry:
; CHECK: call i32 @dx.op.wavePrefixOp.i32(i32 121, i32 %expr, i8 0, i8 1)
%ret = call i32 @llvm.dx.wave.prefix.usum.i32(i32 %expr)
ret i32 %ret
}
define noundef i64 @wave_prefix_usum_i64(i64 noundef %expr) {
entry:
; CHECK: call i64 @dx.op.wavePrefixOp.i64(i32 121, i64 %expr, i8 0, i8 1)
%ret = call i64 @llvm.dx.wave.prefix.usum.i64(i64 %expr)
ret i64 %ret
}
declare half @llvm.dx.wave.prefix.sum.f16(half)
declare float @llvm.dx.wave.prefix.sum.f32(float)
declare double @llvm.dx.wave.prefix.sum.f64(double)
declare i16 @llvm.dx.wave.prefix.sum.i16(i16)
declare i32 @llvm.dx.wave.prefix.sum.i32(i32)
declare i64 @llvm.dx.wave.prefix.sum.i64(i64)
declare i16 @llvm.dx.wave.prefix.usum.i16(i16)
declare i32 @llvm.dx.wave.prefix.usum.i32(i32)
declare i64 @llvm.dx.wave.prefix.usum.i64(i64)
; Test that for vector values, WavePrefixSum scalarizes and maps down to the
; DirectX op
define noundef <2 x half> @wave_prefix_sum_v2half(<2 x half> noundef %expr) {
entry:
; CHECK: call half @dx.op.wavePrefixOp.f16(i32 121, half %expr.i0, i8 0, i8 0)
; CHECK: call half @dx.op.wavePrefixOp.f16(i32 121, half %expr.i1, i8 0, i8 0)
%ret = call <2 x half> @llvm.dx.wave.prefix.sum.v2f16(<2 x half> %expr)
ret <2 x half> %ret
}
define noundef <3 x i32> @wave_prefix_sum_v3i32(<3 x i32> noundef %expr) {
entry:
; CHECK: call i32 @dx.op.wavePrefixOp.i32(i32 121, i32 %expr.i0, i8 0, i8 0)
; CHECK: call i32 @dx.op.wavePrefixOp.i32(i32 121, i32 %expr.i1, i8 0, i8 0)
; CHECK: call i32 @dx.op.wavePrefixOp.i32(i32 121, i32 %expr.i2, i8 0, i8 0)
%ret = call <3 x i32> @llvm.dx.wave.prefix.sum.v3i32(<3 x i32> %expr)
ret <3 x i32> %ret
}
define noundef <4 x double> @wave_prefix_sum_v4f64(<4 x double> noundef %expr) {
entry:
; CHECK: call double @dx.op.wavePrefixOp.f64(i32 121, double %expr.i0, i8 0, i8 0)
; CHECK: call double @dx.op.wavePrefixOp.f64(i32 121, double %expr.i1, i8 0, i8 0)
; CHECK: call double @dx.op.wavePrefixOp.f64(i32 121, double %expr.i2, i8 0, i8 0)
; CHECK: call double @dx.op.wavePrefixOp.f64(i32 121, double %expr.i3, i8 0, i8 0)
%ret = call <4 x double> @llvm.dx.wave.prefix.sum.v464(<4 x double> %expr)
ret <4 x double> %ret
}
declare <2 x half> @llvm.dx.wave.prefix.sum.v2f16(<2 x half>)
declare <3 x i32> @llvm.dx.wave.prefix.sum.v3i32(<3 x i32>)
declare <4 x double> @llvm.dx.wave.prefix.sum.v4f64(<4 x double>)
define noundef <2 x i16> @wave_prefix_usum_v2i16(<2 x i16> noundef %expr) {
entry:
; CHECK: call i16 @dx.op.wavePrefixOp.i16(i32 121, i16 %expr.i0, i8 0, i8 1)
; CHECK: call i16 @dx.op.wavePrefixOp.i16(i32 121, i16 %expr.i1, i8 0, i8 1)
%ret = call <2 x i16> @llvm.dx.wave.prefix.usum.v2f16(<2 x i16> %expr)
ret <2 x i16> %ret
}
define noundef <3 x i32> @wave_prefix_usum_v3i32(<3 x i32> noundef %expr) {
entry:
; CHECK: call i32 @dx.op.wavePrefixOp.i32(i32 121, i32 %expr.i0, i8 0, i8 1)
; CHECK: call i32 @dx.op.wavePrefixOp.i32(i32 121, i32 %expr.i1, i8 0, i8 1)
; CHECK: call i32 @dx.op.wavePrefixOp.i32(i32 121, i32 %expr.i2, i8 0, i8 1)
%ret = call <3 x i32> @llvm.dx.wave.prefix.usum.v3i32(<3 x i32> %expr)
ret <3 x i32> %ret
}
define noundef <4 x i64> @wave_prefix_usum_v4f64(<4 x i64> noundef %expr) {
entry:
; CHECK: call i64 @dx.op.wavePrefixOp.i64(i32 121, i64 %expr.i0, i8 0, i8 1)
; CHECK: call i64 @dx.op.wavePrefixOp.i64(i32 121, i64 %expr.i1, i8 0, i8 1)
; CHECK: call i64 @dx.op.wavePrefixOp.i64(i32 121, i64 %expr.i2, i8 0, i8 1)
; CHECK: call i64 @dx.op.wavePrefixOp.i64(i32 121, i64 %expr.i3, i8 0, i8 1)
%ret = call <4 x i64> @llvm.dx.wave.prefix.usum.v464(<4 x i64> %expr)
ret <4 x i64> %ret
}
declare <2 x i16> @llvm.dx.wave.prefix.usum.v2f16(<2 x i16>)
declare <3 x i32> @llvm.dx.wave.prefix.usum.v3i32(<3 x i32>)
declare <4 x i64> @llvm.dx.wave.prefix.usum.v4f64(<4 x i64>)