Fixes #185518 The SPIR-V backend does not handle the lowering of `shufflevector` instructions on vectors with more than 4 elements. This PR changes the codegen of matrix init lists to directly emit vectors with elements in column-major order when the default matrix memory layout is column-major, as opposed to in linear/row-major order followed by a vector shuffle. While an alternative fix could be to change the default depth of [`canEvaluateShuffled`](https://github.com/llvm/llvm-project/blob/main/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp#L1865-L1866) to 16 in `InstCombineVectorOps.cpp` to eliminate the vector shuffle for vectors of up to 16 elements in size (to handle 4x4 matrices), this change would have broader impacts than just HLSL, which does not seem necessary for the scope of this issue (which regards only matrix initializer list codegen). Another alternative fix would be to extend the `shufflevector` lowering in the SPIR-V backend to support vectors of more than 4 elements. However, again, this goes beyond the scope of just matrix initializer list codegen which is so far the only case where a vector shuffle of a vector more than 4 elements appeared. Assisted-by: claude-opus-4.6
61 lines
3.5 KiB
HLSL
61 lines
3.5 KiB
HLSL
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library -disable-llvm-passes \
|
|
// RUN: -emit-llvm -finclude-default-header -o - %s | FileCheck %s --check-prefix=CHECK,COL-CHECK
|
|
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library -disable-llvm-passes \
|
|
// RUN: -emit-llvm -finclude-default-header -fmatrix-memory-layout=row-major -o - %s \
|
|
// RUN: | FileCheck %s --check-prefix=CHECK,ROW-CHECK
|
|
|
|
// Verify that matrix initializer lists store elements in the correct memory
|
|
// layout. The initializer list {1,2,3,4,5,6} for a float2x3 (2 rows, 3 cols)
|
|
// is in row-major order: row0=[1,2,3], row1=[4,5,6].
|
|
//
|
|
// With column-major (default) memory layout, the stored vector should be
|
|
// reordered to: col0=[1,4], col1=[2,5], col2=[3,6] = <1,4,2,5,3,6>.
|
|
//
|
|
// With row-major memory layout, the stored vector stays as-is: <1,2,3,4,5,6>.
|
|
|
|
export float test_row0_col2() {
|
|
// CHECK-LABEL: define {{.*}} float @_Z14test_row0_col2v
|
|
// COL-CHECK: store <6 x float> <float 1.000000e+00, float 4.000000e+00, float 2.000000e+00, float 5.000000e+00, float 3.000000e+00, float 6.000000e+00>
|
|
// COL-CHECK: extractelement <6 x float> %{{.*}}, i32 4
|
|
// ROW-CHECK: store <6 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00>
|
|
// ROW-CHECK: extractelement <6 x float> %{{.*}}, i32 2
|
|
float2x3 M = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0};
|
|
// Row 0, Col 2 in row-major is the 3rd element = 3.0
|
|
return M[0][2];
|
|
}
|
|
|
|
export float test_row1_col0() {
|
|
// CHECK-LABEL: define {{.*}} float @_Z14test_row1_col0v
|
|
// COL-CHECK: store <6 x float> <float 1.000000e+00, float 4.000000e+00, float 2.000000e+00, float 5.000000e+00, float 3.000000e+00, float 6.000000e+00>
|
|
// COL-CHECK: extractelement <6 x float> %{{.*}}, i32 1
|
|
// ROW-CHECK: store <6 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00>
|
|
// ROW-CHECK: extractelement <6 x float> %{{.*}}, i32 3
|
|
float2x3 M = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0};
|
|
// Row 1, Col 0 in row-major is the 4th element = 4.0
|
|
return M[1][0];
|
|
}
|
|
|
|
// Verify that elements are inserted at the correct positions according to the default matrix memory layout.
|
|
|
|
export float2x3 test_dynamic(float a, float b, float c,
|
|
float d, float e, float f) {
|
|
// CHECK-LABEL: define {{.*}} <6 x float> @_Z12test_dynamicffffff
|
|
// CHECK: [[A:%.*]] = load float, ptr %a.addr
|
|
// CHECK: [[VECINIT0:%.*]] = insertelement <6 x float> poison, float [[A]], i32 0
|
|
// CHECK: [[B:%.*]] = load float, ptr %b.addr
|
|
// COL-CHECK: [[VECINIT1:%.*]] = insertelement <6 x float> [[VECINIT0]], float [[B]], i32 2
|
|
// ROW-CHECK: [[VECINIT1:%.*]] = insertelement <6 x float> [[VECINIT0]], float [[B]], i32 1
|
|
// CHECK: [[C:%.*]] = load float, ptr %c.addr
|
|
// COL-CHECK: [[VECINIT2:%.*]] = insertelement <6 x float> [[VECINIT1]], float [[C]], i32 4
|
|
// ROW-CHECK: [[VECINIT2:%.*]] = insertelement <6 x float> [[VECINIT1]], float [[C]], i32 2
|
|
// CHECK: [[D:%.*]] = load float, ptr %d.addr
|
|
// COL-CHECK: [[VECINIT3:%.*]] = insertelement <6 x float> [[VECINIT2]], float [[D]], i32 1
|
|
// ROW-CHECK: [[VECINIT3:%.*]] = insertelement <6 x float> [[VECINIT2]], float [[D]], i32 3
|
|
// CHECK: [[E:%.*]] = load float, ptr %e.addr
|
|
// COL-CHECK: [[VECINIT4:%.*]] = insertelement <6 x float> [[VECINIT3]], float [[E]], i32 3
|
|
// ROW-CHECK: [[VECINIT4:%.*]] = insertelement <6 x float> [[VECINIT3]], float [[E]], i32 4
|
|
// CHECK: [[F:%.*]] = load float, ptr %f.addr
|
|
// CHECK: [[VECINIT5:%.*]] = insertelement <6 x float> [[VECINIT4]], float [[F]], i32 5
|
|
return (float2x3){a, b, c, d, e, f};
|
|
}
|