Summary: This PR simply changes the behavior of the `wchar_size` flag. Currently, we emit this in all cases for all targets. This causes problems during LLVM-IR linking, specifically because this would vary between Linux and Windows in unintuitive ways. Now we have an llvm::Triple helper to determine the size from the known values. The module flag will only be emitted if these do not match (indicating a non-standard environment). In addition to fixing AMDGCN bitcode linking, this also means we don't need to bloat *every* IR module compiled by clang with this flag. The changed tests reflects this, one less unnecessary piece of metadata.
68 lines
2.5 KiB
C++
68 lines
2.5 KiB
C++
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --version 3
|
|
// RUN: %clang_cc1 -cc1 -triple amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s
|
|
|
|
int [[clang::opencl_global]] a = 100;
|
|
int [[clang::opencl_generic]] b = 42;
|
|
int [[clang::opencl_constant]] c = 999;
|
|
[[clang::loader_uninitialized]] int [[clang::opencl_local]] d;
|
|
[[clang::loader_uninitialized]] int [[clang::opencl_private]] e;
|
|
|
|
int [[clang::address_space(1)]] x = 100;
|
|
int [[clang::address_space(0)]] y = 42;
|
|
int [[clang::address_space(4)]] z = 999;
|
|
[[clang::loader_uninitialized]] int [[clang::address_space(3)]] w;
|
|
[[clang::loader_uninitialized]] int [[clang::address_space(5)]] u;
|
|
|
|
int [[clang::address_space(6)]] aaa = 1000;
|
|
int [[clang::address_space(999)]] bbb = 1234;
|
|
|
|
//.
|
|
// CHECK: @a = addrspace(1) global i32 100, align 4
|
|
// CHECK: @b = global i32 42, align 4
|
|
// CHECK: @c = addrspace(4) constant i32 999, align 4
|
|
// CHECK: @d = addrspace(3) global i32 undef, align 4
|
|
// CHECK: @e = addrspace(5) global i32 undef, align 4
|
|
// CHECK: @x = addrspace(1) global i32 100, align 4
|
|
// CHECK: @y = global i32 42, align 4
|
|
// CHECK: @z = addrspace(4) global i32 999, align 4
|
|
// CHECK: @w = addrspace(3) global i32 undef, align 4
|
|
// CHECK: @u = addrspace(5) global i32 undef, align 4
|
|
// CHECK: @aaa = addrspace(6) global i32 1000, align 4
|
|
// CHECK: @bbb = addrspace(999) global i32 1234, align 4
|
|
//.
|
|
// CHECK-LABEL: define dso_local amdgpu_kernel void @foo(
|
|
// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
|
|
// CHECK-NEXT: entry:
|
|
// CHECK-NEXT: store i32 0, ptr addrspace(1) @a, align 4
|
|
// CHECK-NEXT: store i32 0, ptr @b, align 4
|
|
// CHECK-NEXT: store i32 0, ptr addrspace(3) @d, align 4
|
|
// CHECK-NEXT: store i32 0, ptr addrspace(5) @e, align 4
|
|
// CHECK-NEXT: store i32 0, ptr addrspace(1) @x, align 4
|
|
// CHECK-NEXT: store i32 0, ptr @y, align 4
|
|
// CHECK-NEXT: store i32 0, ptr addrspace(3) @d, align 4
|
|
// CHECK-NEXT: store i32 0, ptr addrspace(5) @u, align 4
|
|
// CHECK-NEXT: store i32 0, ptr addrspace(6) @aaa, align 4
|
|
// CHECK-NEXT: store i32 0, ptr addrspace(999) @bbb, align 4
|
|
// CHECK-NEXT: ret void
|
|
//
|
|
extern "C" [[clang::amdgpu_kernel]] void foo() {
|
|
a = 0;
|
|
b = 0;
|
|
d = 0;
|
|
e = 0;
|
|
|
|
x = 0;
|
|
y = 0;
|
|
d = 0;
|
|
u = 0;
|
|
|
|
aaa = 0;
|
|
bbb = 0;
|
|
}
|
|
//.
|
|
// CHECK: attributes #[[ATTR0]] = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
|
|
//.
|
|
// CHECK: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
|
|
// CHECK: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
|
|
//.
|