This change appears to introduce complications when trying to do a full loop unroll that is exhibited here: https://github.com/llvm/llvm-project/actions/runs/24577221310/job/71865579618. This results in invalid DXIL as the unreachable branch is not correctly cleaned up. Initial leads look like this is because the instructions with convergence control tokens are still being used for analysis when they are within an unreachable branch. Reverts llvm/llvm-project#188792
192 lines
11 KiB
HLSL
192 lines
11 KiB
HLSL
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
|
|
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.7-library -disable-llvm-passes -emit-llvm -finclude-default-header -o - %s | FileCheck %s
|
|
|
|
// CHECK-LABEL: define hidden void @_Z10setMatrix1Ru11matrix_typeILm4ELm4EfEDv4_f(
|
|
// CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[M:%.*]], <4 x float> noundef nofpclass(nan inf) [[V:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[M_ADDR:%.*]] = alloca ptr, align 4
|
|
// CHECK-NEXT: [[V_ADDR:%.*]] = alloca <4 x float>, align 4
|
|
// CHECK-NEXT: store ptr [[M]], ptr [[M_ADDR]], align 4
|
|
// CHECK-NEXT: store <4 x float> [[V]], ptr [[V_ADDR]], align 4
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V_ADDR]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[M_ADDR]], align 4, !nonnull [[META3:![0-9]+]], !align [[META4:![0-9]+]]
|
|
// CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 0, i32 15
|
|
// CHECK-NEXT: store float [[TMP2]], ptr [[TMP3]], align 4
|
|
// CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
// CHECK-NEXT: [[TMP5:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 0, i32 11
|
|
// CHECK-NEXT: store float [[TMP4]], ptr [[TMP5]], align 4
|
|
// CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
// CHECK-NEXT: [[TMP7:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 0, i32 7
|
|
// CHECK-NEXT: store float [[TMP6]], ptr [[TMP7]], align 4
|
|
// CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
// CHECK-NEXT: [[TMP9:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 0, i32 3
|
|
// CHECK-NEXT: store float [[TMP8]], ptr [[TMP9]], align 4
|
|
// CHECK-NEXT: ret void
|
|
//
|
|
void setMatrix1(out float4x4 M, float4 V) {
|
|
M[3].abgr = V;
|
|
}
|
|
|
|
// CHECK-LABEL: define hidden void @_Z10setMatrix2Ru11matrix_typeILm4ELm4EiEDv4_i(
|
|
// CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[M:%.*]], <4 x i32> noundef [[V:%.*]]) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[M_ADDR:%.*]] = alloca ptr, align 4
|
|
// CHECK-NEXT: [[V_ADDR:%.*]] = alloca <4 x i32>, align 4
|
|
// CHECK-NEXT: store ptr [[M]], ptr [[M_ADDR]], align 4
|
|
// CHECK-NEXT: store <4 x i32> [[V]], ptr [[V_ADDR]], align 4
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[V_ADDR]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[M_ADDR]], align 4, !nonnull [[META3]], !align [[META4]]
|
|
// CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP0]], i32 0
|
|
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 2
|
|
// CHECK-NEXT: store i32 [[TMP2]], ptr [[TMP3]], align 4
|
|
// CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP0]], i32 1
|
|
// CHECK-NEXT: [[TMP5:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 6
|
|
// CHECK-NEXT: store i32 [[TMP4]], ptr [[TMP5]], align 4
|
|
// CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP0]], i32 2
|
|
// CHECK-NEXT: [[TMP7:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 10
|
|
// CHECK-NEXT: store i32 [[TMP6]], ptr [[TMP7]], align 4
|
|
// CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3
|
|
// CHECK-NEXT: [[TMP9:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 14
|
|
// CHECK-NEXT: store i32 [[TMP8]], ptr [[TMP9]], align 4
|
|
// CHECK-NEXT: ret void
|
|
//
|
|
void setMatrix2(out int4x4 M, int4 V) {
|
|
M[2].rgba = V;
|
|
}
|
|
|
|
// CHECK-LABEL: define hidden void @_Z22setMatrixVectorSwizzleRu11matrix_typeILm2ELm3EiEDv3_i(
|
|
// CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(24) [[M:%.*]], <3 x i32> noundef [[V:%.*]]) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[M_ADDR:%.*]] = alloca ptr, align 4
|
|
// CHECK-NEXT: [[V_ADDR:%.*]] = alloca <3 x i32>, align 4
|
|
// CHECK-NEXT: store ptr [[M]], ptr [[M_ADDR]], align 4
|
|
// CHECK-NEXT: store <3 x i32> [[V]], ptr [[V_ADDR]], align 4
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr [[V_ADDR]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i32> [[TMP0]], <3 x i32> poison, <3 x i32> <i32 2, i32 1, i32 0>
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[M_ADDR]], align 4, !nonnull [[META3]], !align [[META4]]
|
|
// CHECK-NEXT: [[TMP3:%.*]] = extractelement <3 x i32> [[TMP1]], i32 0
|
|
// CHECK-NEXT: [[TMP4:%.*]] = getelementptr <6 x i32>, ptr [[TMP2]], i32 0, i32 0
|
|
// CHECK-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4
|
|
// CHECK-NEXT: [[TMP5:%.*]] = extractelement <3 x i32> [[TMP1]], i32 1
|
|
// CHECK-NEXT: [[TMP6:%.*]] = getelementptr <6 x i32>, ptr [[TMP2]], i32 0, i32 2
|
|
// CHECK-NEXT: store i32 [[TMP5]], ptr [[TMP6]], align 4
|
|
// CHECK-NEXT: [[TMP7:%.*]] = extractelement <3 x i32> [[TMP1]], i32 2
|
|
// CHECK-NEXT: [[TMP8:%.*]] = getelementptr <6 x i32>, ptr [[TMP2]], i32 0, i32 4
|
|
// CHECK-NEXT: store i32 [[TMP7]], ptr [[TMP8]], align 4
|
|
// CHECK-NEXT: ret void
|
|
//
|
|
void setMatrixVectorSwizzle(out int2x3 M, int3 V) {
|
|
M[0] = V.bgr;
|
|
}
|
|
|
|
// CHECK-LABEL: define hidden void @_Z24setVectorOnMatrixSwizzleRu11matrix_typeILm2ELm3EiEDv3_i(
|
|
// CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(24) [[M:%.*]], <3 x i32> noundef [[V:%.*]]) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[M_ADDR:%.*]] = alloca ptr, align 4
|
|
// CHECK-NEXT: [[V_ADDR:%.*]] = alloca <3 x i32>, align 4
|
|
// CHECK-NEXT: store ptr [[M]], ptr [[M_ADDR]], align 4
|
|
// CHECK-NEXT: store <3 x i32> [[V]], ptr [[V_ADDR]], align 4
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr [[V_ADDR]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[M_ADDR]], align 4, !nonnull [[META3]], !align [[META4]]
|
|
// CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x i32> [[TMP0]], i32 0
|
|
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr <6 x i32>, ptr [[TMP1]], i32 0, i32 1
|
|
// CHECK-NEXT: store i32 [[TMP2]], ptr [[TMP3]], align 4
|
|
// CHECK-NEXT: [[TMP4:%.*]] = extractelement <3 x i32> [[TMP0]], i32 1
|
|
// CHECK-NEXT: [[TMP5:%.*]] = getelementptr <6 x i32>, ptr [[TMP1]], i32 0, i32 5
|
|
// CHECK-NEXT: store i32 [[TMP4]], ptr [[TMP5]], align 4
|
|
// CHECK-NEXT: [[TMP6:%.*]] = extractelement <3 x i32> [[TMP0]], i32 2
|
|
// CHECK-NEXT: [[TMP7:%.*]] = getelementptr <6 x i32>, ptr [[TMP1]], i32 0, i32 3
|
|
// CHECK-NEXT: store i32 [[TMP6]], ptr [[TMP7]], align 4
|
|
// CHECK-NEXT: ret void
|
|
//
|
|
void setVectorOnMatrixSwizzle(out int2x3 M, int3 V) {
|
|
M[1].rbg = V;
|
|
}
|
|
|
|
// CHECK-LABEL: define hidden void @_Z19setMatrixFromMatrixRu11matrix_typeILm2ELm3EiES_i(
|
|
// CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(24) [[M:%.*]], <6 x i32> noundef [[N:%.*]], i32 noundef [[MINDEX:%.*]]) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[M_ADDR:%.*]] = alloca ptr, align 4
|
|
// CHECK-NEXT: [[N_ADDR:%.*]] = alloca [3 x <2 x i32>], align 4
|
|
// CHECK-NEXT: [[MINDEX_ADDR:%.*]] = alloca i32, align 4
|
|
// CHECK-NEXT: store ptr [[M]], ptr [[M_ADDR]], align 4
|
|
// CHECK-NEXT: store <6 x i32> [[N]], ptr [[N_ADDR]], align 4
|
|
// CHECK-NEXT: store i32 [[MINDEX]], ptr [[MINDEX_ADDR]], align 4
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load <6 x i32>, ptr [[N_ADDR]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = shufflevector <6 x i32> [[TMP0]], <6 x i32> poison, <3 x i32> <i32 3, i32 5, i32 1>
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[M_ADDR]], align 4, !nonnull [[META3]], !align [[META4]]
|
|
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[MINDEX_ADDR]], align 4
|
|
// CHECK-NEXT: [[TMP4:%.*]] = add i32 0, [[TMP3]]
|
|
// CHECK-NEXT: [[TMP5:%.*]] = extractelement <3 x i32> [[TMP1]], i32 0
|
|
// CHECK-NEXT: [[TMP6:%.*]] = getelementptr <6 x i32>, ptr [[TMP2]], i32 0, i32 [[TMP4]]
|
|
// CHECK-NEXT: store i32 [[TMP5]], ptr [[TMP6]], align 4
|
|
// CHECK-NEXT: [[TMP7:%.*]] = add i32 2, [[TMP3]]
|
|
// CHECK-NEXT: [[TMP8:%.*]] = extractelement <3 x i32> [[TMP1]], i32 1
|
|
// CHECK-NEXT: [[TMP9:%.*]] = getelementptr <6 x i32>, ptr [[TMP2]], i32 0, i32 [[TMP7]]
|
|
// CHECK-NEXT: store i32 [[TMP8]], ptr [[TMP9]], align 4
|
|
// CHECK-NEXT: [[TMP10:%.*]] = add i32 4, [[TMP3]]
|
|
// CHECK-NEXT: [[TMP11:%.*]] = extractelement <3 x i32> [[TMP1]], i32 2
|
|
// CHECK-NEXT: [[TMP12:%.*]] = getelementptr <6 x i32>, ptr [[TMP2]], i32 0, i32 [[TMP10]]
|
|
// CHECK-NEXT: store i32 [[TMP11]], ptr [[TMP12]], align 4
|
|
// CHECK-NEXT: ret void
|
|
//
|
|
void setMatrixFromMatrix(out int2x3 M, int2x3 N, int MIndex) {
|
|
M[MIndex] = N[1].gbr;
|
|
}
|
|
|
|
// CHECK-LABEL: define hidden void @_Z26setMatrixSwizzleFromMatrixRu11matrix_typeILm2ELm3EiES_i(
|
|
// CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(24) [[M:%.*]], <6 x i32> noundef [[N:%.*]], i32 noundef [[NINDEX:%.*]]) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[M_ADDR:%.*]] = alloca ptr, align 4
|
|
// CHECK-NEXT: [[N_ADDR:%.*]] = alloca [3 x <2 x i32>], align 4
|
|
// CHECK-NEXT: [[NINDEX_ADDR:%.*]] = alloca i32, align 4
|
|
// CHECK-NEXT: store ptr [[M]], ptr [[M_ADDR]], align 4
|
|
// CHECK-NEXT: store <6 x i32> [[N]], ptr [[N_ADDR]], align 4
|
|
// CHECK-NEXT: store i32 [[NINDEX]], ptr [[NINDEX_ADDR]], align 4
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[NINDEX_ADDR]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load <6 x i32>, ptr [[N_ADDR]], align 4
|
|
// CHECK-NEXT: [[TMP2:%.*]] = add i32 0, [[TMP0]]
|
|
// CHECK-NEXT: [[MATRIX_ELEM:%.*]] = extractelement <6 x i32> [[TMP1]], i32 [[TMP2]]
|
|
// CHECK-NEXT: [[MATRIX_ROW_INS:%.*]] = insertelement <3 x i32> poison, i32 [[MATRIX_ELEM]], i32 0
|
|
// CHECK-NEXT: [[TMP3:%.*]] = add i32 2, [[TMP0]]
|
|
// CHECK-NEXT: [[MATRIX_ELEM1:%.*]] = extractelement <6 x i32> [[TMP1]], i32 [[TMP3]]
|
|
// CHECK-NEXT: [[MATRIX_ROW_INS2:%.*]] = insertelement <3 x i32> [[MATRIX_ROW_INS]], i32 [[MATRIX_ELEM1]], i32 1
|
|
// CHECK-NEXT: [[TMP4:%.*]] = add i32 4, [[TMP0]]
|
|
// CHECK-NEXT: [[MATRIX_ELEM3:%.*]] = extractelement <6 x i32> [[TMP1]], i32 [[TMP4]]
|
|
// CHECK-NEXT: [[MATRIX_ROW_INS4:%.*]] = insertelement <3 x i32> [[MATRIX_ROW_INS2]], i32 [[MATRIX_ELEM3]], i32 2
|
|
// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[M_ADDR]], align 4, !nonnull [[META3]], !align [[META4]]
|
|
// CHECK-NEXT: [[TMP6:%.*]] = extractelement <3 x i32> [[MATRIX_ROW_INS4]], i32 0
|
|
// CHECK-NEXT: [[TMP7:%.*]] = getelementptr <6 x i32>, ptr [[TMP5]], i32 0, i32 5
|
|
// CHECK-NEXT: store i32 [[TMP6]], ptr [[TMP7]], align 4
|
|
// CHECK-NEXT: [[TMP8:%.*]] = extractelement <3 x i32> [[MATRIX_ROW_INS4]], i32 1
|
|
// CHECK-NEXT: [[TMP9:%.*]] = getelementptr <6 x i32>, ptr [[TMP5]], i32 0, i32 1
|
|
// CHECK-NEXT: store i32 [[TMP8]], ptr [[TMP9]], align 4
|
|
// CHECK-NEXT: [[TMP10:%.*]] = extractelement <3 x i32> [[MATRIX_ROW_INS4]], i32 2
|
|
// CHECK-NEXT: [[TMP11:%.*]] = getelementptr <6 x i32>, ptr [[TMP5]], i32 0, i32 3
|
|
// CHECK-NEXT: store i32 [[TMP10]], ptr [[TMP11]], align 4
|
|
// CHECK-NEXT: ret void
|
|
//
|
|
void setMatrixSwizzleFromMatrix(out int2x3 M, int2x3 N, int NIndex) {
|
|
M[1].brg = N[NIndex];
|
|
}
|
|
|
|
|
|
// CHECK-LABEL: define hidden noundef nofpclass(nan inf) <2 x float> @_Z2fnu11matrix_typeILm4ELm4EfE(
|
|
// CHECK-SAME: <16 x float> noundef nofpclass(nan inf) [[M:%.*]]) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[M_ADDR:%.*]] = alloca [4 x <4 x float>], align 4
|
|
// CHECK-NEXT: store <16 x float> [[M]], ptr [[M_ADDR]], align 4
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x float>, ptr [[M_ADDR]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[TMP0]], <16 x float> poison, <2 x i32> <i32 0, i32 4>
|
|
// CHECK-NEXT: ret <2 x float> [[TMP1]]
|
|
//
|
|
float2 fn(float4x4 M) {
|
|
return M[0].xy;
|
|
}
|
|
|
|
//.
|
|
// CHECK: [[META3]] = !{}
|
|
// CHECK: [[META4]] = !{i64 4}
|
|
//.
|