Files
llvm-project/clang/test/CodeGen/regcall4.c
Henry Baba-Weiss 6adef02db5 [X86][regcall] Rework struct classification for non-Windows x86-64 targets (#187134)
Currently, when `X86_64ABIInfo::classifyRegCallStructTypeImpl`
classifies a struct argument or return value as direct, it leaves the
LLVM IR coerce type unspecified, implicitly relying on
`CodeGenTypes::ConvertType` to eventually construct a default IR type
based on the struct's layout. This conversion is neither stable nor
guaranteed to adhere to the ABI's classification rules.

Instead, rewrite `classifyRegCallStructTypeImpl` to construct an
explicit sequence of coerce types, using the existing field
classification to obtain a coerce type for each member of the struct.
Also, rename the function to `passRegCallStructTypeDirectly` and return
a boolean instead, so that now `classifyRegCallStructType` is the only
place that computes `ABIArgInfo`.

This rewrite also fixes several other issues with the `X86_64ABIInfo`
implementation of `__regcall`:

* Empty structs are now ignored instead of being misclassified as
direct.
* Arrays are now classified specially based on the element type, since
`X86_64ABIInfo::classifyArgumentType` ignores standalone array types.
* SSE registers used for return values are now correctly reused for
arguments, matching the 64-bit Windows behavior.

Since this is an ABI change, it has the potential to cause
incompatibilities with `__regcall` code compiled by earlier versions of
Clang. Specifically:

* Because SSE return registers can now be reused as argument registers,
functions will now pass more floating point arguments in SSE registers.
* `_Complex float` struct fields are now passed in one SSE register
instead of two.

Fixes #62999
Fixes #98635
2026-04-13 13:03:39 -07:00

159 lines
12 KiB
C

// RUN: %clang_cc1 -regcall4 -emit-llvm %s -o - -ffreestanding -triple=i386-pc-win32 | FileCheck %s --check-prefixes=X86,Win32,Windows
// RUN: %clang_cc1 -regcall4 -emit-llvm %s -o - -ffreestanding -triple=x86_64-pc-win32 | FileCheck %s --check-prefixes=X64,Win64,Windows
// RUN: %clang_cc1 -regcall4 -emit-llvm %s -o - -ffreestanding -triple=i386-pc-linux-gnu | FileCheck %s --check-prefixes=X86,Lin32,Linux
// RUN: %clang_cc1 -regcall4 -emit-llvm %s -o - -ffreestanding -triple=x86_64-pc-linux-gnu | FileCheck %s --check-prefixes=X64,Lin64,Linux
#include <xmmintrin.h>
void __regcall v1(int a, int b) {}
// X86: define dso_local x86_regcallcc void @__regcall4__v1(i32 inreg noundef %a, i32 inreg noundef %b)
// X64: define dso_local x86_regcallcc void @__regcall4__v1(i32 noundef %a, i32 noundef %b)
void __attribute__((regcall)) v1b(int a, int b) {}
// X86: define dso_local x86_regcallcc void @__regcall4__v1b(i32 inreg noundef %a, i32 inreg noundef %b)
// X64: define dso_local x86_regcallcc void @__regcall4__v1b(i32 noundef %a, i32 noundef %b)
void __regcall v2(char a, char b) {}
// X86: define dso_local x86_regcallcc void @__regcall4__v2(i8 inreg noundef signext %a, i8 inreg noundef signext %b)
// Win64: define dso_local x86_regcallcc void @__regcall4__v2(i8 noundef %a, i8 noundef %b)
// Lin64: define dso_local x86_regcallcc void @__regcall4__v2(i8 noundef signext %a, i8 noundef signext %b)
struct Small { int x; };
void __regcall v3(int a, struct Small b, int c) {}
// Win32: define dso_local x86_regcallcc void @__regcall4__v3(i32 inreg noundef %a, i32 %b.0, i32 inreg noundef %c)
// Lin32: define dso_local x86_regcallcc void @__regcall4__v3(i32 inreg noundef %a, i32 inreg %0, i32 %b.0, i32 inreg noundef %c)
// X64: define dso_local x86_regcallcc void @__regcall4__v3(i32 noundef %a, i32 %b.coerce, i32 noundef %c)
struct Large { int a[5]; };
void __regcall v4(int a, struct Large b, int c) {}
// Win32: define dso_local x86_regcallcc void @__regcall4__v4(i32 inreg noundef %a, ptr noundef byval(%struct.Large) align 4 %b, i32 inreg noundef %c)
// Lin32: define dso_local x86_regcallcc void @__regcall4__v4(i32 inreg noundef %a, ptr noundef byval(%struct.Large) align 4 %b, i32 noundef %c)
// Win64: define dso_local x86_regcallcc void @__regcall4__v4(i32 noundef %a, ptr noundef dead_on_return %b, i32 noundef %c)
// Lin64: define dso_local x86_regcallcc void @__regcall4__v4(i32 noundef %a, [5 x i32] %b.coerce, i32 noundef %c)
void __regcall v5(long long a, int b, int c) {}
// X86: define dso_local x86_regcallcc void @__regcall4__v5(i64 noundef %a, i32 inreg noundef %b, i32 inreg noundef %c)
// X64: define dso_local x86_regcallcc void @__regcall4__v5(i64 noundef %a, i32 noundef %b, i32 noundef %c)
struct Empty {};
void __regcall v6(struct Empty a, int b) {}
// Win32: define dso_local x86_regcallcc void @__regcall4__v6(ptr noundef byval(%struct.Empty) align 4 %a, i32 inreg noundef %b)
// Lin32: define dso_local x86_regcallcc void @__regcall4__v6(i32 inreg noundef %b)
// Win64: define dso_local x86_regcallcc void @__regcall4__v6(i32 %a.coerce, i32 noundef %b)
// Lin64: define dso_local x86_regcallcc void @__regcall4__v6(i32 noundef %b)
struct IntDouble { int x; double y; };
void __regcall v7(struct IntDouble a) {}
// Win32: define dso_local x86_regcallcc void @__regcall4__v7(ptr noundef byval(%struct.IntDouble) align 4 %0)
// Win64: define dso_local x86_regcallcc void @__regcall4__v7(ptr noundef dead_on_return %a)
// Linux: define dso_local x86_regcallcc void @__regcall4__v7(i32 %{{.*}}, double %{{.*}})
struct Nested { struct IntDouble a; double b; };
void __regcall v8(struct Nested a) {}
// X86: define dso_local x86_regcallcc void @__regcall4__v8(ptr noundef byval(%struct.Nested) align 4 %{{.*}})
// Win64: define dso_local x86_regcallcc void @__regcall4__v8(ptr noundef dead_on_return %a)
// Lin64: define dso_local x86_regcallcc void @__regcall4__v8(i32 %a.coerce0, double %a.coerce1, double %a.coerce2)
struct NestedEmpty { struct Empty e; int x; };
void __regcall v9(struct NestedEmpty a) {}
// X86: define dso_local x86_regcallcc void @__regcall4__v9(ptr noundef byval(%struct.NestedEmpty) align 4 %a)
// Win64: define dso_local x86_regcallcc void @__regcall4__v9(i64 %a.coerce)
// Lin64: define dso_local x86_regcallcc void @__regcall4__v9(i32 %a.coerce)
struct LongDouble { long double x, y; };
void __regcall v10(struct LongDouble a) {}
// Windows: define dso_local x86_regcallcc void @__regcall4__v10(double %a.0, double %a.1)
// Lin32: define dso_local x86_regcallcc void @__regcall4__v10(ptr noundef byval(%struct.LongDouble) align 4 %a)
// Lin64: define dso_local x86_regcallcc void @__regcall4__v10(x86_fp80 %a.coerce0, x86_fp80 %a.coerce1)
// Ensure that we correctly count the number of registers used by an array of
// elements that each consume more than one register.
struct MultiIntDirect { _Complex long long a[5]; };
void __regcall v11(struct MultiIntDirect a) {}
// X86: define dso_local x86_regcallcc void @__regcall4__v11(ptr noundef byval(%struct.MultiIntDirect) align 4 %{{.*}})
// Win64: define dso_local x86_regcallcc void @__regcall4__v11(ptr noundef dead_on_return %a)
// Lin64: define dso_local x86_regcallcc void @__regcall4__v11([5 x { i64, i64 }] %a.coerce)
struct MultiSSESpill { _Complex double a[9]; };
void __regcall v12(struct MultiSSESpill a) {}
// X86: define dso_local x86_regcallcc void @__regcall4__v12(ptr noundef byval(%struct.MultiSSESpill) align 4 %{{.*}})
// Win64: define dso_local x86_regcallcc void @__regcall4__v12(ptr noundef dead_on_return %a)
// Lin64: define dso_local x86_regcallcc void @__regcall4__v12(ptr noundef byval(%struct.MultiSSESpill) align 8 %a)
struct ComplexFloat { _Complex float a; int b; };
void __regcall v13(struct ComplexFloat a) {}
// X86: define dso_local x86_regcallcc void @__regcall4__v13(float %a.0, float %a.1, i32 %a.2)
// Win64: define dso_local x86_regcallcc void @__regcall4__v13(ptr noundef dead_on_return %a)
// Lin64: define dso_local x86_regcallcc void @__regcall4__v13(<2 x float> %a.coerce0, i32 %a.coerce1)
struct FlexibleArrayMember { int a; int b[]; };
void __regcall v14(struct FlexibleArrayMember a) {}
// X86: define dso_local x86_regcallcc void @__regcall4__v14(ptr noundef byval(%struct.FlexibleArrayMember) align 4 %a)
// Win64: define dso_local x86_regcallcc void @__regcall4__v14(ptr noundef dead_on_return %a)
// Lin64: define dso_local x86_regcallcc void @__regcall4__v14(ptr noundef byval(%struct.FlexibleArrayMember) align 4 %a)
struct HFA2 { double x, y; };
struct HFA4 { double w, x, y, z; };
struct HFA5 { double v, w, x, y, z; };
void __regcall hfa1(int a, struct HFA4 b, int c) {}
// X86: define dso_local x86_regcallcc void @__regcall4__hfa1(i32 inreg noundef %a, double %b.0, double %b.1, double %b.2, double %b.3, i32 inreg noundef %c)
// X64: define dso_local x86_regcallcc void @__regcall4__hfa1(i32 noundef %a, double %{{.*}}, double %{{.*}}, double %{{.*}}, double %{{.*}}, i32 noundef %c)
// HFAs that would require more than six total SSE registers are passed
// indirectly. Additional vector arguments can consume the rest of the SSE
// registers.
void __regcall hfa2(struct HFA4 a, struct HFA4 b, double c) {}
// X86: define dso_local x86_regcallcc void @__regcall4__hfa2(double %a.0, double %a.1, double %a.2, double %a.3, double %b.0, double %b.1, double %b.2, double %b.3, ptr inreg noundef dead_on_return %0)
// X64: define dso_local x86_regcallcc void @__regcall4__hfa2(double %{{.*}}, double %{{.*}}, double %{{.*}}, double %{{.*}}, double %{{.*}}, double %{{.*}}, double %{{.*}}, double %{{.*}}, double noundef %c)
// Ensure that we pass builtin types directly while counting them against the
// SSE register usage.
void __regcall hfa3(double a, double b, double c, double d, double e, struct HFA2 f) {}
// X86: define dso_local x86_regcallcc void @__regcall4__hfa3(double noundef %a, double noundef %b, double noundef %c, double noundef %d, double noundef %e, double %f.0, double %f.1)
// X64: define dso_local x86_regcallcc void @__regcall4__hfa3(double noundef %a, double noundef %b, double noundef %c, double noundef %d, double noundef %e, double %{{.*}}, double %{{.*}})
// Aggregates with more than four elements are not HFAs and are passed byval(%b.3, double noundef).
// Because they are not classified as homogeneous, they don't get special
// handling to ensure alignment.
void __regcall hfa4(struct HFA5 a) {}
// X32: define dso_local x86_regcallcc void @__regcall4__hfa4(ptr noundef byval(%struct.HFA5) align 4 %{{.*}})
// Win64: define dso_local x86_regcallcc void @__regcall4__hfa4(ptr noundef dead_on_return %a)
// Lin64: define dso_local x86_regcallcc void @__regcall4__hfa4(double %a.coerce0, double %a.coerce1, double %a.coerce2, double %a.coerce3, double %a.coerce4)
// Return HFAs of 4 or fewer elements in registers.
static struct HFA2 g_hfa2;
struct HFA2 __regcall hfa5(void) { return g_hfa2; }
// X86: define dso_local x86_regcallcc %struct.HFA2 @__regcall4__hfa5()
// Win64: define dso_local x86_regcallcc %struct.HFA2 @__regcall4__hfa5()
// Lin64: define dso_local x86_regcallcc { double, double } @__regcall4__hfa5()
typedef float __attribute__((vector_size(16))) v4f32;
struct HVA2 { v4f32 x, y; };
struct HVA4 { v4f32 w, x, y, z; };
void __regcall hva1(int a, struct HVA4 b, int c) {}
// X86: define dso_local x86_regcallcc void @__regcall4__hva1(i32 inreg noundef %a, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, i32 inreg noundef %c)
// X64: define dso_local x86_regcallcc void @__regcall4__hva1(i32 noundef %a, <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 noundef %c)
void __regcall hva2(struct HVA4 a, struct HVA4 b, v4f32 c) {}
// X86: define dso_local x86_regcallcc void @__regcall4__hva2(<4 x float> %a.0, <4 x float> %a.1, <4 x float> %a.2, <4 x float> %a.3, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, ptr inreg noundef dead_on_return %0)
// X64: define dso_local x86_regcallcc void @__regcall4__hva2(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> noundef %c)
void __regcall hva3(v4f32 a, v4f32 b, v4f32 c, v4f32 d, v4f32 e, struct HVA2 f) {}
// X86: define dso_local x86_regcallcc void @__regcall4__hva3(<4 x float> noundef %a, <4 x float> noundef %b, <4 x float> noundef %c, <4 x float> noundef %d, <4 x float> noundef %e, <4 x float> %f.0, <4 x float> %f.1)
// X64: define dso_local x86_regcallcc void @__regcall4__hva3(<4 x float> noundef %a, <4 x float> noundef %b, <4 x float> noundef %c, <4 x float> noundef %d, <4 x float> noundef %e, <4 x float> %{{.*}}, <4 x float> %{{.*}})
typedef float __attribute__((ext_vector_type(3))) v3f32;
struct OddSizeHVA { v3f32 x, y; };
void __regcall odd_size_hva(struct OddSizeHVA a) {}
// X86: define dso_local x86_regcallcc void @__regcall4__odd_size_hva(<3 x float> %a.0, <3 x float> %a.1)
// X64: define dso_local x86_regcallcc void @__regcall4__odd_size_hva(<3 x float> %{{.*}}, <3 x float> %{{.*}})
struct HFA6 { __m128 f[4]; };
struct HFA6 __regcall ret_reg_reused(struct HFA6 a, struct HFA6 b, struct HFA6 c, struct HFA6 d){ struct HFA6 h; return h;}
// X86: define dso_local x86_regcallcc %struct.HFA6 @__regcall4__ret_reg_reused(<4 x float> %a.0, <4 x float> %a.1, <4 x float> %a.2, <4 x float> %a.3, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, ptr inreg noundef dead_on_return %c, ptr inreg noundef dead_on_return %d)
// Win64: define dso_local x86_regcallcc %struct.HFA6 @__regcall4__ret_reg_reused(<4 x float> %a.0, <4 x float> %a.1, <4 x float> %a.2, <4 x float> %a.3, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, <4 x float> %c.0, <4 x float> %c.1, <4 x float> %c.2, <4 x float> %c.3, <4 x float> %d.0, <4 x float> %d.1, <4 x float> %d.2, <4 x float> %d.3)
// Lin64: define dso_local x86_regcallcc { [4 x <4 x float>] } @__regcall4__ret_reg_reused([4 x <4 x float>] %a.coerce, [4 x <4 x float>] %b.coerce, [4 x <4 x float>] %c.coerce, [4 x <4 x float>] %d.coerce)