Files
llvm-project/clang/test/CodeGen/link-builtin-bitcode.c
Shilei Tian f3a674a2ef [RFC][Clang][AMDGPU] Emit only delta target-features to reduce IR bloat (#176533)
Currently, AMDGPU functions have `target-features` attribute populated
with all default features for the target GPU. This is redundant because
the backend can derive these defaults from the `target-cpu` attribute
via `AMDGPUTargetMachine::getFeatureString()`.

In this PR, for AMDGPU targets only:

- Functions without explicit target attributes no longer emit
`target-features`
- Functions with `__attribute__((target(...)))` or `-target-feature`
emit only features that differ from the target's defaults (delta)

The backend already handles missing `target-features` correctly by
falling back to the TargetMachine's defaults.

A new cc1 flag `-famdgpu-emit-full-target-features` is added to emit
full features when needed.

Example:

Before:

```llvm
attributes #0 = { "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,..." }
```

After (default):

```llvm
attributes #0 = { "target-cpu"="gfx90a" }
```

After (with explicit `+wavefrontsize32` override):

```llvm
attributes #0 = { "target-cpu"="gfx90a" "target-features"="+wavefrontsize32" }
```
2026-01-20 14:49:35 -05:00

50 lines
3.4 KiB
C

// Build two version of the bitcode library, one with a target-cpu set and one without
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx803 -DBITCODE -emit-llvm-bc -o %t-lib.bc %s
// RUN: %clang_cc1 -triple amdgcn-- -DBITCODE -emit-llvm-bc -o %t-lib.no-cpu.bc %s
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx90a -emit-llvm-bc -o %t.bc %s
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx90a -emit-llvm \
// RUN: -mlink-builtin-bitcode %t-lib.bc -o - %t.bc | FileCheck %s
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx90a -emit-llvm-bc -o %t.bc %s
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx90a -emit-llvm \
// RUN: -mlink-builtin-bitcode %t-lib.no-cpu.bc -o - %t.bc | FileCheck %s
#ifdef BITCODE
int no_attr(void) { return 42; }
int __attribute__((target("gfx8-insts"))) attr_in_target(void) { return 42; }
int __attribute__((target("extended-image-insts"))) attr_not_in_target(void) { return 42; }
int __attribute__((target("no-gfx9-insts"))) attr_incompatible(void) { return 42; }
int x = 12;
#endif
extern int no_attr(void);
extern int attr_in_target(void);
extern int attr_not_in_target(void);
extern int attr_incompatible(void);
extern int x;
int bar() { return no_attr() + attr_in_target() + attr_not_in_target() + attr_incompatible() + x; }
// CHECK: @x = internal addrspace(1) global i32 12, align 4
// CHECK-LABEL: define dso_local i32 @bar
// CHECK-SAME: () #[[ATTR_BAR:[0-9]+]] {
//
// CHECK-LABEL: define internal i32 @no_attr
// CHECK-SAME: () #[[ATTR_COMPATIBLE:[0-9]+]] {
// CHECK-LABEL: define internal i32 @attr_in_target
// CHECK-SAME: () #[[ATTR_COMPATIBLE:[0-9]+]] {
// CHECK-LABEL: define internal i32 @attr_not_in_target
// CHECK-SAME: () #[[ATTR_EXTEND:[0-9]+]] {
// CHECK-LABEL: @attr_incompatible
// CHECK-SAME: () #[[ATTR_INCOMPATIBLE:[0-9]+]] {
// CHECK: attributes #[[ATTR_BAR]] = { {{.*}} "target-cpu"="gfx90a" }
// CHECK: attributes #[[ATTR_COMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+lerp-inst,+mai-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+vmem-to-lds-load-insts,+wavefrontsize64" }
// CHECK: attributes #[[ATTR_EXTEND]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+extended-image-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+lerp-inst,+mai-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+vmem-to-lds-load-insts,+wavefrontsize64" }
// CHECK: attributes #[[ATTR_INCOMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx90a-insts,+gws,+image-insts,+lerp-inst,+mai-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+vmem-to-lds-load-insts,+wavefrontsize64,-gfx9-insts" }