Files
llvm-project/clang/test/CodeGenHLSL/BasicFeatures/MatrixSingleSubscriptGetter.hlsl
apple-fcloutier d28a8b0b3f [CodeGen] Change -O0 bool load codegen to have nonzero model (#193783)
The main follow-up item to
https://github.com/llvm/llvm-project/pull/160790 was changing -O0
codegen to convert in-memory i8 bool values to i1 with the `nonzero`
rule (`icmp ne i8 %val, 0`) rather than the `truncate` rule (`trunc i8
%val to i1`).

Bool values can only be `true` or `false`. While they are notionally a
single bit, the smallest addressable unit is CHAR_BIT bits large, and
CHAR_BIT is typically 8. Programming errors (such as memcpying a random
byte to a `bool`) can cause the 8-bit storage for a `bool` value to have
a bit pattern that is different from `true` or `false`, which then leads
to undefined behavior.

Clang has historically taken advantage of this in optimized builds
(everything other than -O0) by attaching range metadata to `bool` loads
to assume that the value loaded can only be 0 or 1. This leads to
exploitable security issues, and the correct behavior is not always easy
to explain to C developers. To remedy this situation, Clang accepted a
[-fstrict-bool](https://discourse.llvm.org/t/defining-what-happens-when-a-bool-isn-t-0-or-1/86778)
switch to control whether it can assume that loaded bool values are
always necessarily 0 or 1. By default, it does (maintaining the status
quo), and users must specify `-fno-strict-bool` to opt out of that
behavior.

When opting out, users can optionally request that bool i8 values are
converted to i1 either by truncation or by comparing to 0. The default
is comparing to 0. However, since `-O0` alone _technically_ uses
-fstrict-bool, unoptimized builds convert i8 bool values to i1 with a
`trunc` operation, whereas `-O1 -fno-strict-bool` converts i8 bool
values to i1 with `icmp ne 0`. This is a surprising inconsistency.

This PR changes -O0 codegen to align with -fno-strict-bool. This is
achieved with a single-line change:

```
   bool isConvertingBoolWithCmp0() const {
     switch (getLoadBoolFromMem()) {
     case BoolFromMem::Strict:
+      return !isOptimizedBuild();
     case BoolFromMem::Truncate:
```

However, it impacts a _very large_ number of tests, so we agreed to move
it out of the -fstrict-bool PR to reduce the chances we would have to
back out the whole thing for this secondary item.

This PR does the change and modifies the tests accordingly. I expect
that it will go stale rather quickly. If this needs more discussion,
I'll only update it once we reach consensus.
2026-04-27 17:07:20 -04:00

271 lines
19 KiB
HLSL

// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.7-library -disable-llvm-passes -emit-llvm -finclude-default-header -o - %s | FileCheck %s
// CHECK-LABEL: define hidden noundef nofpclass(nan inf) <4 x float> @_Z24getFloatVecMatrixDynamicu11matrix_typeILm4ELm4EfEi(
// CHECK-SAME: <16 x float> noundef nofpclass(nan inf) [[M:%.*]], i32 noundef [[INDEX:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[M_ADDR:%.*]] = alloca [4 x <4 x float>], align 4
// CHECK-NEXT: [[INDEX_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT: store <16 x float> [[M]], ptr [[M_ADDR]], align 4
// CHECK-NEXT: store i32 [[INDEX]], ptr [[INDEX_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[INDEX_ADDR]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load <16 x float>, ptr [[M_ADDR]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = add i32 0, [[TMP0]]
// CHECK-NEXT: [[MATRIX_ELEM:%.*]] = extractelement <16 x float> [[TMP1]], i32 [[TMP2]]
// CHECK-NEXT: [[MATRIX_ROW_INS:%.*]] = insertelement <4 x float> poison, float [[MATRIX_ELEM]], i32 0
// CHECK-NEXT: [[TMP3:%.*]] = add i32 4, [[TMP0]]
// CHECK-NEXT: [[MATRIX_ELEM1:%.*]] = extractelement <16 x float> [[TMP1]], i32 [[TMP3]]
// CHECK-NEXT: [[MATRIX_ROW_INS2:%.*]] = insertelement <4 x float> [[MATRIX_ROW_INS]], float [[MATRIX_ELEM1]], i32 1
// CHECK-NEXT: [[TMP4:%.*]] = add i32 8, [[TMP0]]
// CHECK-NEXT: [[MATRIX_ELEM3:%.*]] = extractelement <16 x float> [[TMP1]], i32 [[TMP4]]
// CHECK-NEXT: [[MATRIX_ROW_INS4:%.*]] = insertelement <4 x float> [[MATRIX_ROW_INS2]], float [[MATRIX_ELEM3]], i32 2
// CHECK-NEXT: [[TMP5:%.*]] = add i32 12, [[TMP0]]
// CHECK-NEXT: [[MATRIX_ELEM5:%.*]] = extractelement <16 x float> [[TMP1]], i32 [[TMP5]]
// CHECK-NEXT: [[MATRIX_ROW_INS6:%.*]] = insertelement <4 x float> [[MATRIX_ROW_INS4]], float [[MATRIX_ELEM5]], i32 3
// CHECK-NEXT: ret <4 x float> [[MATRIX_ROW_INS6]]
//
float4 getFloatVecMatrixDynamic(float4x4 M, int index) {
return M[index];
}
// CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z27getFloatScalarMatrixDynamicu11matrix_typeILm2ELm1EfEi(
// CHECK-SAME: <2 x float> noundef nofpclass(nan inf) [[M:%.*]], i32 noundef [[INDEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[M_ADDR:%.*]] = alloca [1 x <2 x float>], align 4
// CHECK-NEXT: [[INDEX_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT: store <2 x float> [[M]], ptr [[M_ADDR]], align 4
// CHECK-NEXT: store i32 [[INDEX]], ptr [[INDEX_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[INDEX_ADDR]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[M_ADDR]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = add i32 0, [[TMP0]]
// CHECK-NEXT: [[MATRIX_ELEM:%.*]] = extractelement <2 x float> [[TMP1]], i32 [[TMP2]]
// CHECK-NEXT: [[MATRIX_ROW_INS:%.*]] = insertelement <1 x float> poison, float [[MATRIX_ELEM]], i32 0
// CHECK-NEXT: [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[MATRIX_ROW_INS]], i32 0
// CHECK-NEXT: ret float [[CAST_VTRUNC]]
//
float getFloatScalarMatrixDynamic(float2x1 M, int index) {
return M[index];
}
// CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z28getFloatScalarMatrixConstantu11matrix_typeILm2ELm1EfE(
// CHECK-SAME: <2 x float> noundef nofpclass(nan inf) [[M:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[M_ADDR:%.*]] = alloca [1 x <2 x float>], align 4
// CHECK-NEXT: store <2 x float> [[M]], ptr [[M_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[M_ADDR]], align 4
// CHECK-NEXT: [[MATRIX_ELEM:%.*]] = extractelement <2 x float> [[TMP0]], i32 0
// CHECK-NEXT: [[MATRIX_ROW_INS:%.*]] = insertelement <1 x float> poison, float [[MATRIX_ELEM]], i32 0
// CHECK-NEXT: [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[MATRIX_ROW_INS]], i32 0
// CHECK-NEXT: ret float [[CAST_VTRUNC]]
//
float getFloatScalarMatrixConstant(float2x1 M) {
return M[0];
}
// CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z29getFloatScalarMatrixConstant2u11matrix_typeILm2ELm1EfE(
// CHECK-SAME: <2 x float> noundef nofpclass(nan inf) [[M:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[M_ADDR:%.*]] = alloca [1 x <2 x float>], align 4
// CHECK-NEXT: store <2 x float> [[M]], ptr [[M_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[M_ADDR]], align 4
// CHECK-NEXT: [[MATRIX_ELEM:%.*]] = extractelement <2 x float> [[TMP0]], i32 1
// CHECK-NEXT: [[MATRIX_ROW_INS:%.*]] = insertelement <1 x float> poison, float [[MATRIX_ELEM]], i32 0
// CHECK-NEXT: [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[MATRIX_ROW_INS]], i32 0
// CHECK-NEXT: ret float [[CAST_VTRUNC]]
//
float getFloatScalarMatrixConstant2(float2x1 M) {
return M[1];
}
// CHECK-LABEL: define hidden noundef <4 x i32> @_Z19getIntMatrixDynamicu11matrix_typeILm4ELm4EiEi(
// CHECK-SAME: <16 x i32> noundef [[M:%.*]], i32 noundef [[INDEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[M_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
// CHECK-NEXT: [[INDEX_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT: store <16 x i32> [[M]], ptr [[M_ADDR]], align 4
// CHECK-NEXT: store i32 [[INDEX]], ptr [[INDEX_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[INDEX_ADDR]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr [[M_ADDR]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = add i32 0, [[TMP0]]
// CHECK-NEXT: [[MATRIX_ELEM:%.*]] = extractelement <16 x i32> [[TMP1]], i32 [[TMP2]]
// CHECK-NEXT: [[MATRIX_ROW_INS:%.*]] = insertelement <4 x i32> poison, i32 [[MATRIX_ELEM]], i32 0
// CHECK-NEXT: [[TMP3:%.*]] = add i32 4, [[TMP0]]
// CHECK-NEXT: [[MATRIX_ELEM1:%.*]] = extractelement <16 x i32> [[TMP1]], i32 [[TMP3]]
// CHECK-NEXT: [[MATRIX_ROW_INS2:%.*]] = insertelement <4 x i32> [[MATRIX_ROW_INS]], i32 [[MATRIX_ELEM1]], i32 1
// CHECK-NEXT: [[TMP4:%.*]] = add i32 8, [[TMP0]]
// CHECK-NEXT: [[MATRIX_ELEM3:%.*]] = extractelement <16 x i32> [[TMP1]], i32 [[TMP4]]
// CHECK-NEXT: [[MATRIX_ROW_INS4:%.*]] = insertelement <4 x i32> [[MATRIX_ROW_INS2]], i32 [[MATRIX_ELEM3]], i32 2
// CHECK-NEXT: [[TMP5:%.*]] = add i32 12, [[TMP0]]
// CHECK-NEXT: [[MATRIX_ELEM5:%.*]] = extractelement <16 x i32> [[TMP1]], i32 [[TMP5]]
// CHECK-NEXT: [[MATRIX_ROW_INS6:%.*]] = insertelement <4 x i32> [[MATRIX_ROW_INS4]], i32 [[MATRIX_ELEM5]], i32 3
// CHECK-NEXT: ret <4 x i32> [[MATRIX_ROW_INS6]]
//
int4 getIntMatrixDynamic(int4x4 M, int index) {
return M[index];
}
// CHECK-LABEL: define hidden noundef nofpclass(nan inf) <4 x float> @_Z22AddFloatMatrixConstantu11matrix_typeILm4ELm4EfE(
// CHECK-SAME: <16 x float> noundef nofpclass(nan inf) [[M:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[M_ADDR:%.*]] = alloca [4 x <4 x float>], align 4
// CHECK-NEXT: store <16 x float> [[M]], ptr [[M_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x float>, ptr [[M_ADDR]], align 4
// CHECK-NEXT: [[MATRIX_ELEM:%.*]] = extractelement <16 x float> [[TMP0]], i32 0
// CHECK-NEXT: [[MATRIX_ROW_INS:%.*]] = insertelement <4 x float> poison, float [[MATRIX_ELEM]], i32 0
// CHECK-NEXT: [[MATRIX_ELEM1:%.*]] = extractelement <16 x float> [[TMP0]], i32 4
// CHECK-NEXT: [[MATRIX_ROW_INS2:%.*]] = insertelement <4 x float> [[MATRIX_ROW_INS]], float [[MATRIX_ELEM1]], i32 1
// CHECK-NEXT: [[MATRIX_ELEM3:%.*]] = extractelement <16 x float> [[TMP0]], i32 8
// CHECK-NEXT: [[MATRIX_ROW_INS4:%.*]] = insertelement <4 x float> [[MATRIX_ROW_INS2]], float [[MATRIX_ELEM3]], i32 2
// CHECK-NEXT: [[MATRIX_ELEM5:%.*]] = extractelement <16 x float> [[TMP0]], i32 12
// CHECK-NEXT: [[MATRIX_ROW_INS6:%.*]] = insertelement <4 x float> [[MATRIX_ROW_INS4]], float [[MATRIX_ELEM5]], i32 3
// CHECK-NEXT: [[TMP1:%.*]] = load <16 x float>, ptr [[M_ADDR]], align 4
// CHECK-NEXT: [[MATRIX_ELEM7:%.*]] = extractelement <16 x float> [[TMP1]], i32 1
// CHECK-NEXT: [[MATRIX_ROW_INS8:%.*]] = insertelement <4 x float> poison, float [[MATRIX_ELEM7]], i32 0
// CHECK-NEXT: [[MATRIX_ELEM9:%.*]] = extractelement <16 x float> [[TMP1]], i32 5
// CHECK-NEXT: [[MATRIX_ROW_INS10:%.*]] = insertelement <4 x float> [[MATRIX_ROW_INS8]], float [[MATRIX_ELEM9]], i32 1
// CHECK-NEXT: [[MATRIX_ELEM11:%.*]] = extractelement <16 x float> [[TMP1]], i32 9
// CHECK-NEXT: [[MATRIX_ROW_INS12:%.*]] = insertelement <4 x float> [[MATRIX_ROW_INS10]], float [[MATRIX_ELEM11]], i32 2
// CHECK-NEXT: [[MATRIX_ELEM13:%.*]] = extractelement <16 x float> [[TMP1]], i32 13
// CHECK-NEXT: [[MATRIX_ROW_INS14:%.*]] = insertelement <4 x float> [[MATRIX_ROW_INS12]], float [[MATRIX_ELEM13]], i32 3
// CHECK-NEXT: [[ADD:%.*]] = fadd reassoc nnan ninf nsz arcp afn <4 x float> [[MATRIX_ROW_INS6]], [[MATRIX_ROW_INS14]]
// CHECK-NEXT: [[TMP2:%.*]] = load <16 x float>, ptr [[M_ADDR]], align 4
// CHECK-NEXT: [[MATRIX_ELEM15:%.*]] = extractelement <16 x float> [[TMP2]], i32 2
// CHECK-NEXT: [[MATRIX_ROW_INS16:%.*]] = insertelement <4 x float> poison, float [[MATRIX_ELEM15]], i32 0
// CHECK-NEXT: [[MATRIX_ELEM17:%.*]] = extractelement <16 x float> [[TMP2]], i32 6
// CHECK-NEXT: [[MATRIX_ROW_INS18:%.*]] = insertelement <4 x float> [[MATRIX_ROW_INS16]], float [[MATRIX_ELEM17]], i32 1
// CHECK-NEXT: [[MATRIX_ELEM19:%.*]] = extractelement <16 x float> [[TMP2]], i32 10
// CHECK-NEXT: [[MATRIX_ROW_INS20:%.*]] = insertelement <4 x float> [[MATRIX_ROW_INS18]], float [[MATRIX_ELEM19]], i32 2
// CHECK-NEXT: [[MATRIX_ELEM21:%.*]] = extractelement <16 x float> [[TMP2]], i32 14
// CHECK-NEXT: [[MATRIX_ROW_INS22:%.*]] = insertelement <4 x float> [[MATRIX_ROW_INS20]], float [[MATRIX_ELEM21]], i32 3
// CHECK-NEXT: [[ADD23:%.*]] = fadd reassoc nnan ninf nsz arcp afn <4 x float> [[ADD]], [[MATRIX_ROW_INS22]]
// CHECK-NEXT: [[TMP3:%.*]] = load <16 x float>, ptr [[M_ADDR]], align 4
// CHECK-NEXT: [[MATRIX_ELEM24:%.*]] = extractelement <16 x float> [[TMP3]], i32 3
// CHECK-NEXT: [[MATRIX_ROW_INS25:%.*]] = insertelement <4 x float> poison, float [[MATRIX_ELEM24]], i32 0
// CHECK-NEXT: [[MATRIX_ELEM26:%.*]] = extractelement <16 x float> [[TMP3]], i32 7
// CHECK-NEXT: [[MATRIX_ROW_INS27:%.*]] = insertelement <4 x float> [[MATRIX_ROW_INS25]], float [[MATRIX_ELEM26]], i32 1
// CHECK-NEXT: [[MATRIX_ELEM28:%.*]] = extractelement <16 x float> [[TMP3]], i32 11
// CHECK-NEXT: [[MATRIX_ROW_INS29:%.*]] = insertelement <4 x float> [[MATRIX_ROW_INS27]], float [[MATRIX_ELEM28]], i32 2
// CHECK-NEXT: [[MATRIX_ELEM30:%.*]] = extractelement <16 x float> [[TMP3]], i32 15
// CHECK-NEXT: [[MATRIX_ROW_INS31:%.*]] = insertelement <4 x float> [[MATRIX_ROW_INS29]], float [[MATRIX_ELEM30]], i32 3
// CHECK-NEXT: [[ADD32:%.*]] = fadd reassoc nnan ninf nsz arcp afn <4 x float> [[ADD23]], [[MATRIX_ROW_INS31]]
// CHECK-NEXT: ret <4 x float> [[ADD32]]
//
float4 AddFloatMatrixConstant(float4x4 M) {
return M[0] + M[1] + M[2] + M[3];
}
// CHECK-LABEL: define hidden noundef <4 x i32> @_Z20AddIntMatrixConstantu11matrix_typeILm4ELm4EiE(
// CHECK-SAME: <16 x i32> noundef [[M:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[M_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
// CHECK-NEXT: store <16 x i32> [[M]], ptr [[M_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[M_ADDR]], align 4
// CHECK-NEXT: [[MATRIX_ELEM:%.*]] = extractelement <16 x i32> [[TMP0]], i32 0
// CHECK-NEXT: [[MATRIX_ROW_INS:%.*]] = insertelement <4 x i32> poison, i32 [[MATRIX_ELEM]], i32 0
// CHECK-NEXT: [[MATRIX_ELEM1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 4
// CHECK-NEXT: [[MATRIX_ROW_INS2:%.*]] = insertelement <4 x i32> [[MATRIX_ROW_INS]], i32 [[MATRIX_ELEM1]], i32 1
// CHECK-NEXT: [[MATRIX_ELEM3:%.*]] = extractelement <16 x i32> [[TMP0]], i32 8
// CHECK-NEXT: [[MATRIX_ROW_INS4:%.*]] = insertelement <4 x i32> [[MATRIX_ROW_INS2]], i32 [[MATRIX_ELEM3]], i32 2
// CHECK-NEXT: [[MATRIX_ELEM5:%.*]] = extractelement <16 x i32> [[TMP0]], i32 12
// CHECK-NEXT: [[MATRIX_ROW_INS6:%.*]] = insertelement <4 x i32> [[MATRIX_ROW_INS4]], i32 [[MATRIX_ELEM5]], i32 3
// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr [[M_ADDR]], align 4
// CHECK-NEXT: [[MATRIX_ELEM7:%.*]] = extractelement <16 x i32> [[TMP1]], i32 1
// CHECK-NEXT: [[MATRIX_ROW_INS8:%.*]] = insertelement <4 x i32> poison, i32 [[MATRIX_ELEM7]], i32 0
// CHECK-NEXT: [[MATRIX_ELEM9:%.*]] = extractelement <16 x i32> [[TMP1]], i32 5
// CHECK-NEXT: [[MATRIX_ROW_INS10:%.*]] = insertelement <4 x i32> [[MATRIX_ROW_INS8]], i32 [[MATRIX_ELEM9]], i32 1
// CHECK-NEXT: [[MATRIX_ELEM11:%.*]] = extractelement <16 x i32> [[TMP1]], i32 9
// CHECK-NEXT: [[MATRIX_ROW_INS12:%.*]] = insertelement <4 x i32> [[MATRIX_ROW_INS10]], i32 [[MATRIX_ELEM11]], i32 2
// CHECK-NEXT: [[MATRIX_ELEM13:%.*]] = extractelement <16 x i32> [[TMP1]], i32 13
// CHECK-NEXT: [[MATRIX_ROW_INS14:%.*]] = insertelement <4 x i32> [[MATRIX_ROW_INS12]], i32 [[MATRIX_ELEM13]], i32 3
// CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[MATRIX_ROW_INS6]], [[MATRIX_ROW_INS14]]
// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr [[M_ADDR]], align 4
// CHECK-NEXT: [[MATRIX_ELEM15:%.*]] = extractelement <16 x i32> [[TMP2]], i32 2
// CHECK-NEXT: [[MATRIX_ROW_INS16:%.*]] = insertelement <4 x i32> poison, i32 [[MATRIX_ELEM15]], i32 0
// CHECK-NEXT: [[MATRIX_ELEM17:%.*]] = extractelement <16 x i32> [[TMP2]], i32 6
// CHECK-NEXT: [[MATRIX_ROW_INS18:%.*]] = insertelement <4 x i32> [[MATRIX_ROW_INS16]], i32 [[MATRIX_ELEM17]], i32 1
// CHECK-NEXT: [[MATRIX_ELEM19:%.*]] = extractelement <16 x i32> [[TMP2]], i32 10
// CHECK-NEXT: [[MATRIX_ROW_INS20:%.*]] = insertelement <4 x i32> [[MATRIX_ROW_INS18]], i32 [[MATRIX_ELEM19]], i32 2
// CHECK-NEXT: [[MATRIX_ELEM21:%.*]] = extractelement <16 x i32> [[TMP2]], i32 14
// CHECK-NEXT: [[MATRIX_ROW_INS22:%.*]] = insertelement <4 x i32> [[MATRIX_ROW_INS20]], i32 [[MATRIX_ELEM21]], i32 3
// CHECK-NEXT: [[ADD23:%.*]] = add <4 x i32> [[ADD]], [[MATRIX_ROW_INS22]]
// CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr [[M_ADDR]], align 4
// CHECK-NEXT: [[MATRIX_ELEM24:%.*]] = extractelement <16 x i32> [[TMP3]], i32 3
// CHECK-NEXT: [[MATRIX_ROW_INS25:%.*]] = insertelement <4 x i32> poison, i32 [[MATRIX_ELEM24]], i32 0
// CHECK-NEXT: [[MATRIX_ELEM26:%.*]] = extractelement <16 x i32> [[TMP3]], i32 7
// CHECK-NEXT: [[MATRIX_ROW_INS27:%.*]] = insertelement <4 x i32> [[MATRIX_ROW_INS25]], i32 [[MATRIX_ELEM26]], i32 1
// CHECK-NEXT: [[MATRIX_ELEM28:%.*]] = extractelement <16 x i32> [[TMP3]], i32 11
// CHECK-NEXT: [[MATRIX_ROW_INS29:%.*]] = insertelement <4 x i32> [[MATRIX_ROW_INS27]], i32 [[MATRIX_ELEM28]], i32 2
// CHECK-NEXT: [[MATRIX_ELEM30:%.*]] = extractelement <16 x i32> [[TMP3]], i32 15
// CHECK-NEXT: [[MATRIX_ROW_INS31:%.*]] = insertelement <4 x i32> [[MATRIX_ROW_INS29]], i32 [[MATRIX_ELEM30]], i32 3
// CHECK-NEXT: [[ADD32:%.*]] = add <4 x i32> [[ADD23]], [[MATRIX_ROW_INS31]]
// CHECK-NEXT: ret <4 x i32> [[ADD32]]
//
int4 AddIntMatrixConstant(int4x4 M) {
return M[0] + M[1] + M[2] + M[3];
}
// CHECK-LABEL: define hidden noundef <3 x i1> @_Z23getBoolVecMatrixDynamicu11matrix_typeILm2ELm3EbEi(
// CHECK-SAME: <6 x i1> noundef [[M:%.*]], i32 noundef [[INDEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[M_ADDR:%.*]] = alloca [3 x <2 x i32>], align 4
// CHECK-NEXT: [[INDEX_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[TMP0:%.*]] = zext <6 x i1> [[M]] to <6 x i32>
// CHECK-NEXT: store <6 x i32> [[TMP0]], ptr [[M_ADDR]], align 4
// CHECK-NEXT: store i32 [[INDEX]], ptr [[INDEX_ADDR]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[INDEX_ADDR]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load <6 x i32>, ptr [[M_ADDR]], align 4
// CHECK-NEXT: [[TMP3:%.*]] = add i32 0, [[TMP1]]
// CHECK-NEXT: [[MATRIX_ELEM:%.*]] = extractelement <6 x i32> [[TMP2]], i32 [[TMP3]]
// CHECK-NEXT: [[MATRIX_ROW_INS:%.*]] = insertelement <3 x i32> poison, i32 [[MATRIX_ELEM]], i32 0
// CHECK-NEXT: [[TMP4:%.*]] = add i32 2, [[TMP1]]
// CHECK-NEXT: [[MATRIX_ELEM1:%.*]] = extractelement <6 x i32> [[TMP2]], i32 [[TMP4]]
// CHECK-NEXT: [[MATRIX_ROW_INS2:%.*]] = insertelement <3 x i32> [[MATRIX_ROW_INS]], i32 [[MATRIX_ELEM1]], i32 1
// CHECK-NEXT: [[TMP5:%.*]] = add i32 4, [[TMP1]]
// CHECK-NEXT: [[MATRIX_ELEM3:%.*]] = extractelement <6 x i32> [[TMP2]], i32 [[TMP5]]
// CHECK-NEXT: [[MATRIX_ROW_INS4:%.*]] = insertelement <3 x i32> [[MATRIX_ROW_INS2]], i32 [[MATRIX_ELEM3]], i32 2
// CHECK-NEXT: [[LOADEDV:%.*]] = icmp ne <3 x i32> [[MATRIX_ROW_INS4]], zeroinitializer
// CHECK-NEXT: ret <3 x i1> [[LOADEDV]]
//
bool3 getBoolVecMatrixDynamic(bool2x3 M, int index) {
return M[index];
}
// CHECK-LABEL: define hidden noundef <4 x i1> @_Z24getBoolVecMatrixConstantu11matrix_typeILm4ELm4EbE(
// CHECK-SAME: <16 x i1> noundef [[M:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[M_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
// CHECK-NEXT: [[TMP0:%.*]] = zext <16 x i1> [[M]] to <16 x i32>
// CHECK-NEXT: store <16 x i32> [[TMP0]], ptr [[M_ADDR]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr [[M_ADDR]], align 4
// CHECK-NEXT: [[MATRIX_ELEM:%.*]] = extractelement <16 x i32> [[TMP1]], i32 0
// CHECK-NEXT: [[MATRIX_ROW_INS:%.*]] = insertelement <4 x i32> poison, i32 [[MATRIX_ELEM]], i32 0
// CHECK-NEXT: [[MATRIX_ELEM1:%.*]] = extractelement <16 x i32> [[TMP1]], i32 4
// CHECK-NEXT: [[MATRIX_ROW_INS2:%.*]] = insertelement <4 x i32> [[MATRIX_ROW_INS]], i32 [[MATRIX_ELEM1]], i32 1
// CHECK-NEXT: [[MATRIX_ELEM3:%.*]] = extractelement <16 x i32> [[TMP1]], i32 8
// CHECK-NEXT: [[MATRIX_ROW_INS4:%.*]] = insertelement <4 x i32> [[MATRIX_ROW_INS2]], i32 [[MATRIX_ELEM3]], i32 2
// CHECK-NEXT: [[MATRIX_ELEM5:%.*]] = extractelement <16 x i32> [[TMP1]], i32 12
// CHECK-NEXT: [[MATRIX_ROW_INS6:%.*]] = insertelement <4 x i32> [[MATRIX_ROW_INS4]], i32 [[MATRIX_ELEM5]], i32 3
// CHECK-NEXT: [[LOADEDV:%.*]] = icmp ne <4 x i32> [[MATRIX_ROW_INS6]], zeroinitializer
// CHECK-NEXT: ret <4 x i1> [[LOADEDV]]
//
bool4 getBoolVecMatrixConstant(bool4x4 M) {
return M[0];
}
// CHECK-LABEL: define hidden noundef i1 @_Z27getBoolScalarMatrixConstantu11matrix_typeILm3ELm1EbE(
// CHECK-SAME: <3 x i1> noundef [[M:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[M_ADDR:%.*]] = alloca [1 x <3 x i32>], align 4
// CHECK-NEXT: [[TMP0:%.*]] = zext <3 x i1> [[M]] to <3 x i32>
// CHECK-NEXT: store <3 x i32> [[TMP0]], ptr [[M_ADDR]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load <3 x i32>, ptr [[M_ADDR]], align 4
// CHECK-NEXT: [[MATRIX_ELEM:%.*]] = extractelement <3 x i32> [[TMP1]], i32 1
// CHECK-NEXT: [[MATRIX_ROW_INS:%.*]] = insertelement <1 x i32> poison, i32 [[MATRIX_ELEM]], i32 0
// CHECK-NEXT: [[LOADEDV:%.*]] = icmp ne <1 x i32> [[MATRIX_ROW_INS]], zeroinitializer
// CHECK-NEXT: [[CAST_VTRUNC:%.*]] = extractelement <1 x i1> [[LOADEDV]], i32 0
// CHECK-NEXT: ret i1 [[CAST_VTRUNC]]
//
bool getBoolScalarMatrixConstant(bool3x1 M) {
return M[1];
}