The main follow-up item to https://github.com/llvm/llvm-project/pull/160790 was changing -O0 codegen to convert in-memory i8 bool values to i1 with the `nonzero` rule (`icmp ne i8 %val, 0`) rather than the `truncate` rule (`trunc i8 %val to i1`). Bool values can only be `true` or `false`. While they are notionally a single bit, the smallest addressable unit is CHAR_BIT bits large, and CHAR_BIT is typically 8. Programming errors (such as memcpying a random byte to a `bool`) can cause the 8-bit storage for a `bool` value to have a bit pattern that is different from `true` or `false`, which then leads to undefined behavior. Clang has historically taken advantage of this in optimized builds (everything other than -O0) by attaching range metadata to `bool` loads to assume that the value loaded can only be 0 or 1. This leads to exploitable security issues, and the correct behavior is not always easy to explain to C developers. To remedy this situation, Clang accepted a [-fstrict-bool](https://discourse.llvm.org/t/defining-what-happens-when-a-bool-isn-t-0-or-1/86778) switch to control whether it can assume that loaded bool values are always necessarily 0 or 1. By default, it does (maintaining the status quo), and users must specify `-fno-strict-bool` to opt out of that behavior. When opting out, users can optionally request that bool i8 values are converted to i1 either by truncation or by comparing to 0. The default is comparing to 0. However, since `-O0` alone _technically_ uses -fstrict-bool, unoptimized builds convert i8 bool values to i1 with a `trunc` operation, whereas `-O1 -fno-strict-bool` converts i8 bool values to i1 with `icmp ne 0`. This is a surprising inconsistency. This PR changes -O0 codegen to align with -fno-strict-bool. This is achieved with a single-line change: ``` bool isConvertingBoolWithCmp0() const { switch (getLoadBoolFromMem()) { case BoolFromMem::Strict: + return !isOptimizedBuild(); case BoolFromMem::Truncate: ``` However, it impacts a _very large_ number of tests, so we agreed to move it out of the -fstrict-bool PR to reduce the chances we would have to back out the whole thing for this secondary item. This PR does the change and modifies the tests accordingly. I expect that it will go stale rather quickly. If this needs more discussion, I'll only update it once we reach consensus.
160 lines
7.2 KiB
HLSL
160 lines
7.2 KiB
HLSL
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
|
|
// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s
|
|
|
|
|
|
struct S {
|
|
bool2x2 bM;
|
|
float f;
|
|
};
|
|
|
|
// CHECK-LABEL: define hidden noundef i1 @_Z3fn1v(
|
|
// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[RETVAL:%.*]] = alloca i1, align 4
|
|
// CHECK-NEXT: [[B:%.*]] = alloca [2 x <2 x i32>], align 4
|
|
// CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[B]], align 4
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[B]], align 4
|
|
// CHECK-NEXT: [[MATRIXEXT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 0
|
|
// CHECK-NEXT: store i32 [[MATRIXEXT]], ptr [[RETVAL]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load i1, ptr [[RETVAL]], align 4
|
|
// CHECK-NEXT: ret i1 [[TMP1]]
|
|
//
|
|
bool fn1() {
|
|
bool2x2 B = {true,true,true,true};
|
|
return B[0][0];
|
|
}
|
|
|
|
// CHECK-LABEL: define hidden noundef <4 x i1> @_Z3fn2b(
|
|
// CHECK-SAME: i1 noundef [[V:%.*]]) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x i1>, align 4
|
|
// CHECK-NEXT: [[V_ADDR:%.*]] = alloca i32, align 4
|
|
// CHECK-NEXT: [[A:%.*]] = alloca [2 x <2 x i32>], align 4
|
|
// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[V]] to i32
|
|
// CHECK-NEXT: store i32 [[STOREDV]], ptr [[V_ADDR]], align 4
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[V_ADDR]], align 4
|
|
// CHECK-NEXT: [[LOADEDV:%.*]] = icmp ne i32 [[TMP0]], 0
|
|
// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x i1> poison, i1 [[LOADEDV]], i32 0
|
|
// CHECK-NEXT: [[VECINIT1:%.*]] = insertelement <4 x i1> [[VECINIT]], i1 true, i32 2
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[V_ADDR]], align 4
|
|
// CHECK-NEXT: [[LOADEDV2:%.*]] = icmp ne i32 [[TMP1]], 0
|
|
// CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <4 x i1> [[VECINIT1]], i1 [[LOADEDV2]], i32 1
|
|
// CHECK-NEXT: [[VECINIT4:%.*]] = insertelement <4 x i1> [[VECINIT3]], i1 false, i32 3
|
|
// CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i1> [[VECINIT4]] to <4 x i32>
|
|
// CHECK-NEXT: store <4 x i32> [[TMP2]], ptr [[A]], align 4
|
|
// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[A]], align 4
|
|
// CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[RETVAL]], align 4
|
|
// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i1>, ptr [[RETVAL]], align 4
|
|
// CHECK-NEXT: ret <4 x i1> [[TMP4]]
|
|
//
|
|
bool2x2 fn2(bool V) {
|
|
bool2x2 A = {V, true, V, false};
|
|
return A;
|
|
}
|
|
|
|
// CHECK-LABEL: define hidden noundef i1 @_Z3fn3v(
|
|
// CHECK-SAME: ) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[RETVAL:%.*]] = alloca i1, align 4
|
|
// CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 1
|
|
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[S]], ptr align 1 @__const._Z3fn3v.s, i32 20, i1 false)
|
|
// CHECK-NEXT: [[BM:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[S]], i32 0, i32 0
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[BM]], align 1
|
|
// CHECK-NEXT: [[MATRIXEXT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 0
|
|
// CHECK-NEXT: store i32 [[MATRIXEXT]], ptr [[RETVAL]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load i1, ptr [[RETVAL]], align 4
|
|
// CHECK-NEXT: ret i1 [[TMP1]]
|
|
//
|
|
bool fn3() {
|
|
S s = {{true,true,false,false}, 1.0};
|
|
return s.bM[0][0];
|
|
}
|
|
|
|
// CHECK-LABEL: define hidden noundef i1 @_Z3fn4v(
|
|
// CHECK-SAME: ) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[RETVAL:%.*]] = alloca i1, align 4
|
|
// CHECK-NEXT: [[ARR:%.*]] = alloca [2 x [2 x <2 x i32>]], align 4
|
|
// CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[ARR]], align 4
|
|
// CHECK-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[ARR]], i32 1
|
|
// CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[ARRAYINIT_ELEMENT]], align 4
|
|
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x [2 x <2 x i32>]], ptr [[ARR]], i32 0, i32 0
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 4
|
|
// CHECK-NEXT: [[MATRIXEXT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 1
|
|
// CHECK-NEXT: store i32 [[MATRIXEXT]], ptr [[RETVAL]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load i1, ptr [[RETVAL]], align 4
|
|
// CHECK-NEXT: ret i1 [[TMP1]]
|
|
//
|
|
bool fn4() {
|
|
bool2x2 Arr[2] = {{true,true,true,true}, {false,false,false,false}};
|
|
return Arr[0][1][0];
|
|
}
|
|
|
|
// CHECK-LABEL: define hidden void @_Z3fn5v(
|
|
// CHECK-SAME: ) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[M:%.*]] = alloca [2 x <2 x i32>], align 4
|
|
// CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[M]], align 4
|
|
// CHECK-NEXT: [[TMP0:%.*]] = getelementptr <4 x i32>, ptr [[M]], i32 0, i32 3
|
|
// CHECK-NEXT: store i32 0, ptr [[TMP0]], align 4
|
|
// CHECK-NEXT: ret void
|
|
//
|
|
void fn5() {
|
|
bool2x2 M = {true,true,true,true};
|
|
M[1][1] = false;
|
|
}
|
|
|
|
// CHECK-LABEL: define hidden void @_Z3fn6v(
|
|
// CHECK-SAME: ) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[V:%.*]] = alloca i32, align 4
|
|
// CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 1
|
|
// CHECK-NEXT: store i32 0, ptr [[V]], align 4
|
|
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[S]], ptr align 1 @__const._Z3fn6v.s, i32 20, i1 false)
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[V]], align 4
|
|
// CHECK-NEXT: [[LOADEDV:%.*]] = icmp ne i32 [[TMP0]], 0
|
|
// CHECK-NEXT: [[BM:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[S]], i32 0, i32 0
|
|
// CHECK-NEXT: [[TMP1:%.*]] = zext i1 [[LOADEDV]] to i32
|
|
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <4 x i32>, ptr [[BM]], i32 0, i32 1
|
|
// CHECK-NEXT: store i32 [[TMP1]], ptr [[TMP2]], align 4
|
|
// CHECK-NEXT: ret void
|
|
//
|
|
void fn6() {
|
|
bool V = false;
|
|
S s = {{true,true,false,false}, 1.0};
|
|
s.bM[1][0] = V;
|
|
}
|
|
|
|
// CHECK-LABEL: define hidden void @_Z3fn7v(
|
|
// CHECK-SAME: ) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[ARR:%.*]] = alloca [2 x [2 x <2 x i32>]], align 4
|
|
// CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[ARR]], align 4
|
|
// CHECK-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[ARR]], i32 1
|
|
// CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[ARRAYINIT_ELEMENT]], align 4
|
|
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x [2 x <2 x i32>]], ptr [[ARR]], i32 0, i32 0
|
|
// CHECK-NEXT: [[TMP0:%.*]] = getelementptr <4 x i32>, ptr [[ARRAYIDX]], i32 0, i32 1
|
|
// CHECK-NEXT: store i32 0, ptr [[TMP0]], align 4
|
|
// CHECK-NEXT: ret void
|
|
//
|
|
void fn7() {
|
|
bool2x2 Arr[2] = {{true,true,true,true}, {false,false,false,false}};
|
|
Arr[0][1][0] = false;
|
|
}
|
|
|
|
// CHECK-LABEL: define hidden noundef <16 x i1> @_Z3fn8u11matrix_typeILm4ELm4EbE(
|
|
// CHECK-SAME: <16 x i1> noundef [[M:%.*]]) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[RETVAL:%.*]] = alloca <16 x i1>, align 4
|
|
// CHECK-NEXT: [[M_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
|
|
// CHECK-NEXT: [[TMP0:%.*]] = zext <16 x i1> [[M]] to <16 x i32>
|
|
// CHECK-NEXT: store <16 x i32> [[TMP0]], ptr [[M_ADDR]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr [[M_ADDR]], align 4
|
|
// CHECK-NEXT: store <16 x i32> [[TMP1]], ptr [[RETVAL]], align 4
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i1>, ptr [[RETVAL]], align 4
|
|
// CHECK-NEXT: ret <16 x i1> [[TMP2]]
|
|
//
|
|
bool4x4 fn8(bool4x4 m) {
|
|
return m;
|
|
}
|