Files
apple-fcloutier d28a8b0b3f [CodeGen] Change -O0 bool load codegen to have nonzero model (#193783)
The main follow-up item to
https://github.com/llvm/llvm-project/pull/160790 was changing -O0
codegen to convert in-memory i8 bool values to i1 with the `nonzero`
rule (`icmp ne i8 %val, 0`) rather than the `truncate` rule (`trunc i8
%val to i1`).

Bool values can only be `true` or `false`. While they are notionally a
single bit, the smallest addressable unit is CHAR_BIT bits large, and
CHAR_BIT is typically 8. Programming errors (such as memcpying a random
byte to a `bool`) can cause the 8-bit storage for a `bool` value to have
a bit pattern that is different from `true` or `false`, which then leads
to undefined behavior.

Clang has historically taken advantage of this in optimized builds
(everything other than -O0) by attaching range metadata to `bool` loads
to assume that the value loaded can only be 0 or 1. This leads to
exploitable security issues, and the correct behavior is not always easy
to explain to C developers. To remedy this situation, Clang accepted a
[-fstrict-bool](https://discourse.llvm.org/t/defining-what-happens-when-a-bool-isn-t-0-or-1/86778)
switch to control whether it can assume that loaded bool values are
always necessarily 0 or 1. By default, it does (maintaining the status
quo), and users must specify `-fno-strict-bool` to opt out of that
behavior.

When opting out, users can optionally request that bool i8 values are
converted to i1 either by truncation or by comparing to 0. The default
is comparing to 0. However, since `-O0` alone _technically_ uses
-fstrict-bool, unoptimized builds convert i8 bool values to i1 with a
`trunc` operation, whereas `-O1 -fno-strict-bool` converts i8 bool
values to i1 with `icmp ne 0`. This is a surprising inconsistency.

This PR changes -O0 codegen to align with -fno-strict-bool. This is
achieved with a single-line change:

```
   bool isConvertingBoolWithCmp0() const {
     switch (getLoadBoolFromMem()) {
     case BoolFromMem::Strict:
+      return !isOptimizedBuild();
     case BoolFromMem::Truncate:
```

However, it impacts a _very large_ number of tests, so we agreed to move
it out of the -fstrict-bool PR to reduce the chances we would have to
back out the whole thing for this secondary item.

This PR does the change and modifies the tests accordingly. I expect
that it will go stale rather quickly. If this needs more discussion,
I'll only update it once we reach consensus.
2026-04-27 17:07:20 -04:00

193 lines
9.5 KiB
HLSL

// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.7-library -disable-llvm-passes -emit-llvm -finclude-default-header -o - %s | FileCheck %s
// CHECK-LABEL: define hidden void @_Z13ConstantSplatv(
// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[M:%.*]] = alloca [4 x <4 x i32>], align 4
// CHECK-NEXT: store <16 x i32> splat (i32 1), ptr [[M]], align 4
// CHECK-NEXT: ret void
//
void ConstantSplat() {
int4x4 M = 1;
}
// CHECK-LABEL: define hidden void @_Z18ConstantFloatSplatv(
// CHECK-SAME: ) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[M:%.*]] = alloca [2 x <2 x float>], align 4
// CHECK-NEXT: store <4 x float> splat (float 3.250000e+00), ptr [[M]], align 4
// CHECK-NEXT: ret void
//
void ConstantFloatSplat() {
float2x2 M = 3.25;
}
// CHECK-LABEL: define hidden void @_Z21ConstantTrueBoolSplatv(
// CHECK-SAME: ) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[M:%.*]] = alloca [3 x <3 x i32>], align 4
// CHECK-NEXT: store <9 x i32> splat (i32 1), ptr [[M]], align 4
// CHECK-NEXT: ret void
//
void ConstantTrueBoolSplat() {
bool3x3 M = true;
}
// CHECK-LABEL: define hidden void @_Z22ConstantFalseBoolSplatv(
// CHECK-SAME: ) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[M:%.*]] = alloca [3 x <3 x i32>], align 4
// CHECK-NEXT: store <9 x i32> zeroinitializer, ptr [[M]], align 4
// CHECK-NEXT: ret void
//
void ConstantFalseBoolSplat() {
bool3x3 M = false;
}
// CHECK-LABEL: define hidden void @_Z12DynamicSplatf(
// CHECK-SAME: float noundef nofpclass(nan inf) [[VALUE:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[VALUE_ADDR:%.*]] = alloca float, align 4
// CHECK-NEXT: [[M:%.*]] = alloca [3 x <3 x float>], align 4
// CHECK-NEXT: store float [[VALUE]], ptr [[VALUE_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[VALUE_ADDR]], align 4
// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <9 x float> poison, float [[TMP0]], i64 0
// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <9 x float> [[SPLAT_SPLATINSERT]], <9 x float> poison, <9 x i32> zeroinitializer
// CHECK-NEXT: store <9 x float> [[SPLAT_SPLAT]], ptr [[M]], align 4
// CHECK-NEXT: ret void
//
void DynamicSplat(float Value) {
float3x3 M = Value;
}
// CHECK-LABEL: define hidden void @_Z16DynamicBoolSplatb(
// CHECK-SAME: i1 noundef [[VALUE:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[VALUE_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[M:%.*]] = alloca [4 x <4 x i32>], align 4
// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[VALUE]] to i32
// CHECK-NEXT: store i32 [[STOREDV]], ptr [[VALUE_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[VALUE_ADDR]], align 4
// CHECK-NEXT: [[LOADEDV:%.*]] = icmp ne i32 [[TMP0]], 0
// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i1> poison, i1 [[LOADEDV]], i64 0
// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i1> [[SPLAT_SPLATINSERT]], <16 x i1> poison, <16 x i32> zeroinitializer
// CHECK-NEXT: [[TMP1:%.*]] = zext <16 x i1> [[SPLAT_SPLAT]] to <16 x i32>
// CHECK-NEXT: store <16 x i32> [[TMP1]], ptr [[M]], align 4
// CHECK-NEXT: ret void
//
void DynamicBoolSplat(bool Value) {
bool4x4 M = Value;
}
// CHECK-LABEL: define hidden void @_Z13CastThenSplatDv4_f(
// CHECK-SAME: <4 x float> noundef nofpclass(nan inf) [[VALUE:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[VALUE_ADDR:%.*]] = alloca <4 x float>, align 4
// CHECK-NEXT: [[M:%.*]] = alloca [3 x <3 x float>], align 4
// CHECK-NEXT: store <4 x float> [[VALUE]], ptr [[VALUE_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[VALUE_ADDR]], align 4
// CHECK-NEXT: [[CAST_VTRUNC:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <9 x float> poison, float [[CAST_VTRUNC]], i64 0
// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <9 x float> [[SPLAT_SPLATINSERT]], <9 x float> poison, <9 x i32> zeroinitializer
// CHECK-NEXT: store <9 x float> [[SPLAT_SPLAT]], ptr [[M]], align 4
// CHECK-NEXT: ret void
//
void CastThenSplat(float4 Value) {
float3x3 M = (float) Value;
}
// CHECK-LABEL: define hidden void @_Z30ExplicitIntToBoolCastThenSplatDv3_i(
// CHECK-SAME: <3 x i32> noundef [[VALUE:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[VALUE_ADDR:%.*]] = alloca <3 x i32>, align 4
// CHECK-NEXT: [[M:%.*]] = alloca [2 x <2 x i32>], align 4
// CHECK-NEXT: store <3 x i32> [[VALUE]], ptr [[VALUE_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr [[VALUE_ADDR]], align 4
// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne <3 x i32> [[TMP0]], zeroinitializer
// CHECK-NEXT: [[CAST_VTRUNC:%.*]] = extractelement <3 x i1> [[TOBOOL]], i32 0
// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[CAST_VTRUNC]], i64 0
// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i1> [[SPLAT_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i1> [[SPLAT_SPLAT]] to <4 x i32>
// CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[M]], align 4
// CHECK-NEXT: ret void
//
void ExplicitIntToBoolCastThenSplat(int3 Value) {
bool2x2 M = (bool) Value;
}
// CHECK-LABEL: define hidden void @_Z32ExplicitFloatToBoolCastThenSplatDv2_f(
// CHECK-SAME: <2 x float> noundef nofpclass(nan inf) [[VALUE:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[VALUE_ADDR:%.*]] = alloca <2 x float>, align 4
// CHECK-NEXT: [[M:%.*]] = alloca [3 x <2 x i32>], align 4
// CHECK-NEXT: store <2 x float> [[VALUE]], ptr [[VALUE_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[VALUE_ADDR]], align 4
// CHECK-NEXT: [[TOBOOL:%.*]] = fcmp reassoc nnan ninf nsz arcp afn une <2 x float> [[TMP0]], zeroinitializer
// CHECK-NEXT: [[CAST_VTRUNC:%.*]] = extractelement <2 x i1> [[TOBOOL]], i32 0
// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <6 x i1> poison, i1 [[CAST_VTRUNC]], i64 0
// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <6 x i1> [[SPLAT_SPLATINSERT]], <6 x i1> poison, <6 x i32> zeroinitializer
// CHECK-NEXT: [[TMP1:%.*]] = zext <6 x i1> [[SPLAT_SPLAT]] to <6 x i32>
// CHECK-NEXT: store <6 x i32> [[TMP1]], ptr [[M]], align 4
// CHECK-NEXT: ret void
//
void ExplicitFloatToBoolCastThenSplat(float2 Value) {
bool2x3 M = (bool) Value;
}
// CHECK-LABEL: define hidden void @_Z32ExplicitBoolToFloatCastThenSplatb(
// CHECK-SAME: i1 noundef [[VALUE:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[VALUE_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[M:%.*]] = alloca [2 x <3 x float>], align 4
// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[VALUE]] to i32
// CHECK-NEXT: store i32 [[STOREDV]], ptr [[VALUE_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[VALUE_ADDR]], align 4
// CHECK-NEXT: [[LOADEDV:%.*]] = icmp ne i32 [[TMP0]], 0
// CHECK-NEXT: [[CONV:%.*]] = uitofp i1 [[LOADEDV]] to float
// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <6 x float> poison, float [[CONV]], i64 0
// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <6 x float> [[SPLAT_SPLATINSERT]], <6 x float> poison, <6 x i32> zeroinitializer
// CHECK-NEXT: store <6 x float> [[SPLAT_SPLAT]], ptr [[M]], align 4
// CHECK-NEXT: ret void
//
void ExplicitBoolToFloatCastThenSplat(bool Value) {
float3x2 M = (float) Value;
}
// CHECK-LABEL: define hidden void @_Z32ImplicitFloatToBoolCastThenSplatf(
// CHECK-SAME: float noundef nofpclass(nan inf) [[VALUE:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[VALUE_ADDR:%.*]] = alloca float, align 4
// CHECK-NEXT: [[M:%.*]] = alloca [3 x <2 x i32>], align 4
// CHECK-NEXT: store float [[VALUE]], ptr [[VALUE_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[VALUE_ADDR]], align 4
// CHECK-NEXT: [[TOBOOL:%.*]] = fcmp reassoc nnan ninf nsz arcp afn une float [[TMP0]], 0.000000e+00
// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <6 x i1> poison, i1 [[TOBOOL]], i64 0
// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <6 x i1> [[SPLAT_SPLATINSERT]], <6 x i1> poison, <6 x i32> zeroinitializer
// CHECK-NEXT: [[TMP1:%.*]] = zext <6 x i1> [[SPLAT_SPLAT]] to <6 x i32>
// CHECK-NEXT: store <6 x i32> [[TMP1]], ptr [[M]], align 4
// CHECK-NEXT: ret void
//
void ImplicitFloatToBoolCastThenSplat(float Value) {
bool2x3 M = Value;
}
// CHECK-LABEL: define hidden void @_Z32ImplicitBoolToFloatCastThenSplatb(
// CHECK-SAME: i1 noundef [[VALUE:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[VALUE_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[M:%.*]] = alloca [2 x <3 x float>], align 4
// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[VALUE]] to i32
// CHECK-NEXT: store i32 [[STOREDV]], ptr [[VALUE_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[VALUE_ADDR]], align 4
// CHECK-NEXT: [[LOADEDV:%.*]] = icmp ne i32 [[TMP0]], 0
// CHECK-NEXT: [[CONV:%.*]] = uitofp i1 [[LOADEDV]] to float
// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <6 x float> poison, float [[CONV]], i64 0
// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <6 x float> [[SPLAT_SPLATINSERT]], <6 x float> poison, <6 x i32> zeroinitializer
// CHECK-NEXT: store <6 x float> [[SPLAT_SPLAT]], ptr [[M]], align 4
// CHECK-NEXT: ret void
//
void ImplicitBoolToFloatCastThenSplat(bool Value) {
float3x2 M = Value;
}