[HIP][MacOS] Mach-O support and Darwin toolchain fixes (#183991)

This PR adds support for HIP on macOS: Mach-O section naming, Darwin
host toolchain initialization guards, and HIPSPV behavior when Darwin is
the host.

This has been verified using chipStar on MacOS via the PoCL OpenCL
implementation.

## Uninitialized target workaround
Darwin’s toolchain is only initialized when its own TranslateArgs runs.
For HIP/CUDA device jobs, Darwin is used as the HostTC and never gets
its args translated, so its target stays uninitialized. The new checks
avoid asserting on that uninitialized state. A better long-term fix is
to initialize Darwin earlier (see the FIXME in Driver.cpp
BuildJobsForAction).

- [ ] Initialize Darwin toolchain during construction instead of lazily
in TranslateArgs. See Driver.cpp BuildJobsForAction FIXME.

- [x] In Darwin’s addClangTargetOptions, skip host-stdlib flags when
DeviceOffloadKind != OFK_None so HIPSPV can safely delegate to the host.
This commit is contained in:
Paulius Velesko
2026-04-28 20:43:59 +03:00
committed by GitHub
parent dddd0da8e6
commit 264ac2d3af
11 changed files with 263 additions and 43 deletions

View File

@@ -817,10 +817,14 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
llvm::Constant *FatBinStr; llvm::Constant *FatBinStr;
unsigned FatMagic; unsigned FatMagic;
if (IsHIP) { if (IsHIP) {
FatbinConstantName = ".hip_fatbin"; // On macOS (Mach-O), section names must be in "segment,section" format.
FatbinSectionName = ".hipFatBinSegment"; FatbinConstantName =
CGM.getTriple().isMacOSX() ? "__HIP,__hip_fatbin" : ".hip_fatbin";
FatbinSectionName =
CGM.getTriple().isMacOSX() ? "__HIP,__fatbin" : ".hipFatBinSegment";
ModuleIDSectionName = "__hip_module_id"; ModuleIDSectionName =
CGM.getTriple().isMacOSX() ? "__HIP,__module_id" : "__hip_module_id";
ModuleIDPrefix = "__hip_"; ModuleIDPrefix = "__hip_";
if (CudaGpuBinary) { if (CudaGpuBinary) {

View File

@@ -1140,6 +1140,50 @@ VersionTuple MachO::getLinkerVersion(const llvm::opt::ArgList &Args) const {
Darwin::~Darwin() {} Darwin::~Darwin() {}
void Darwin::ensureTargetInitialized() const {
if (TargetInitialized)
return;
llvm::Triple::OSType OS = getTriple().getOS();
DarwinPlatformKind Platform;
switch (OS) {
case llvm::Triple::Darwin:
case llvm::Triple::MacOSX:
Platform = MacOS;
break;
case llvm::Triple::IOS:
Platform = IPhoneOS;
break;
case llvm::Triple::TvOS:
Platform = TvOS;
break;
case llvm::Triple::WatchOS:
Platform = WatchOS;
break;
case llvm::Triple::XROS:
Platform = XROS;
break;
case llvm::Triple::DriverKit:
Platform = DriverKit;
break;
default:
// Unknown platform; leave uninitialized.
return;
}
DarwinEnvironmentKind Environment = NativeEnvironment;
if (getTriple().isSimulatorEnvironment())
Environment = Simulator;
else if (getTriple().isMacCatalystEnvironment())
Environment = MacCatalyst;
VersionTuple OsVer = getTriple().getOSVersion();
setTarget(Platform, Environment, OsVer.getMajor(),
OsVer.getMinor().value_or(0), OsVer.getSubminor().value_or(0),
VersionTuple());
}
AppleMachO::~AppleMachO() {} AppleMachO::~AppleMachO() {}
MachO::~MachO() {} MachO::~MachO() {}
@@ -1182,7 +1226,11 @@ std::string Darwin::ComputeEffectiveClangTriple(const ArgList &Args,
llvm::Triple Triple(ComputeLLVMTriple(Args, InputType)); llvm::Triple Triple(ComputeLLVMTriple(Args, InputType));
// If the target isn't initialized (e.g., an unknown Darwin platform, return // If the target isn't initialized (e.g., an unknown Darwin platform, return
// the default triple). // the default triple). Note: we intentionally do NOT call
// ensureTargetInitialized() here because this method is called before
// AddDeploymentTarget() in some code paths (e.g. -print-libgcc-file-name),
// and lazy init with version 0.0.0 would conflict with the real version
// that AddDeploymentTarget() later sets via setTarget().
if (!isTargetInitialized()) if (!isTargetInitialized())
return Triple.getTriple(); return Triple.getTriple();
@@ -1248,6 +1296,11 @@ void DarwinClang::addClangWarningOptions(ArgStringList &CC1Args) const {
CC1Args.push_back("-Werror=undef-prefix"); CC1Args.push_back("-Werror=undef-prefix");
// For modern targets, promote certain warnings to errors. // For modern targets, promote certain warnings to errors.
// Lazily initialize the target if needed (e.g. when Darwin is used as
// a host toolchain for device offloading).
ensureTargetInitialized();
if (!isTargetInitialized())
return;
if (isTargetWatchOSBased() || getTriple().isArch64Bit()) { if (isTargetWatchOSBased() || getTriple().isArch64Bit()) {
// Always enable -Wdeprecated-objc-isa-usage and promote it // Always enable -Wdeprecated-objc-isa-usage and promote it
// to an error. // to an error.
@@ -3399,6 +3452,12 @@ void Darwin::addClangTargetOptions(
MachO::addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadKind); MachO::addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadKind);
// When compiling device code (e.g. SPIR-V for HIP), skip host-specific
// flags like -faligned-alloc-unavailable and -fno-sized-deallocation
// that depend on the host OS version and are irrelevant to device code.
if (DeviceOffloadKind != Action::OFK_None)
return;
// Pass "-faligned-alloc-unavailable" only when the user hasn't manually // Pass "-faligned-alloc-unavailable" only when the user hasn't manually
// enabled or disabled aligned allocations. // enabled or disabled aligned allocations.
if (!DriverArgs.hasArgNoClaim(options::OPT_faligned_allocation, if (!DriverArgs.hasArgNoClaim(options::OPT_faligned_allocation,
@@ -3939,6 +3998,9 @@ void Darwin::addStartObjectFileArgs(const ArgList &Args,
} }
void Darwin::CheckObjCARC() const { void Darwin::CheckObjCARC() const {
ensureTargetInitialized();
if (!isTargetInitialized())
return;
if (isTargetIOSBased() || isTargetWatchOSBased() || isTargetXROS() || if (isTargetIOSBased() || isTargetWatchOSBased() || isTargetXROS() ||
(isTargetMacOSBased() && !isMacosxVersionLT(10, 6))) (isTargetMacOSBased() && !isMacosxVersionLT(10, 6)))
return; return;
@@ -3958,6 +4020,9 @@ SanitizerMask Darwin::getSupportedSanitizers() const {
Res |= SanitizerKind::FuzzerNoLink; Res |= SanitizerKind::FuzzerNoLink;
Res |= SanitizerKind::ObjCCast; Res |= SanitizerKind::ObjCCast;
ensureTargetInitialized();
if (!isTargetInitialized())
return Res;
// Prior to 10.9, macOS shipped a version of the C++ standard library without // Prior to 10.9, macOS shipped a version of the C++ standard library without
// C++11 support. The same is true of iOS prior to version 5. These OS'es are // C++11 support. The same is true of iOS prior to version 5. These OS'es are
// incompatible with -fsanitize=vptr. // incompatible with -fsanitize=vptr.

View File

@@ -391,6 +391,12 @@ private:
void VerifyTripleForSDK(const llvm::opt::ArgList &Args, void VerifyTripleForSDK(const llvm::opt::ArgList &Args,
const llvm::Triple Triple) const; const llvm::Triple Triple) const;
protected:
/// Lazily initialize the target platform from the triple when
/// AddDeploymentTarget has not run yet (e.g. when Darwin is used as
/// a host toolchain for device offloading).
void ensureTargetInitialized() const;
public: public:
Darwin(const Driver &D, const llvm::Triple &Triple, Darwin(const Driver &D, const llvm::Triple &Triple,
const llvm::opt::ArgList &Args); const llvm::opt::ArgList &Args);

View File

@@ -409,9 +409,11 @@ void HIP::constructGenerateObjFileFromHIPFatBinary(
ObjStream << "# *** Automatically generated by Clang ***\n"; ObjStream << "# *** Automatically generated by Clang ***\n";
if (FoundPrimaryGpuBinHandleSymbol) { if (FoundPrimaryGpuBinHandleSymbol) {
// Define the first gpubin handle symbol // Define the first gpubin handle symbol
if (HostTriple.isWindowsMSVCEnvironment()) if (HostTriple.isWindowsMSVCEnvironment()) {
ObjStream << " .section .hip_gpubin_handle,\"dw\"\n"; ObjStream << " .section .hip_gpubin_handle,\"dw\"\n";
else { } else if (HostTriple.isMacOSX()) {
ObjStream << " .section __HIP,__gpubin_handle\n";
} else {
ObjStream << " .protected " << PrimaryGpuBinHandleSymbol << "\n"; ObjStream << " .protected " << PrimaryGpuBinHandleSymbol << "\n";
ObjStream << " .type " << PrimaryGpuBinHandleSymbol << ",@object\n"; ObjStream << " .type " << PrimaryGpuBinHandleSymbol << ",@object\n";
ObjStream << " .section .hip_gpubin_handle,\"aw\"\n"; ObjStream << " .section .hip_gpubin_handle,\"aw\"\n";
@@ -430,9 +432,12 @@ void HIP::constructGenerateObjFileFromHIPFatBinary(
} }
if (FoundPrimaryHipFatbinSymbol) { if (FoundPrimaryHipFatbinSymbol) {
// Define the first fatbin symbol // Define the first fatbin symbol
if (HostTriple.isWindowsMSVCEnvironment()) if (HostTriple.isWindowsMSVCEnvironment()) {
ObjStream << " .section .hip_fatbin,\"dw\"\n"; ObjStream << " .section .hip_fatbin,\"dw\"\n";
else { } else if (HostTriple.isMacOSX()) {
// Mach-O requires "segment,section" format
ObjStream << " .section __HIP,__hip_fatbin\n";
} else {
ObjStream << " .protected " << PrimaryHipFatbinSymbol << "\n"; ObjStream << " .protected " << PrimaryHipFatbinSymbol << "\n";
ObjStream << " .type " << PrimaryHipFatbinSymbol << ",@object\n"; ObjStream << " .type " << PrimaryHipFatbinSymbol << ",@object\n";
ObjStream << " .section .hip_fatbin,\"a\",@progbits\n"; ObjStream << " .section .hip_fatbin,\"a\",@progbits\n";

View File

@@ -0,0 +1,28 @@
// Verify that HIP fat binary sections use Mach-O "segment,section" format on Darwin.
// RUN: echo -n "GPU binary would be here." > %t
// RUN: %clang_cc1 -triple x86_64-apple-macosx10.15.0 -emit-llvm %s \
// RUN: -fcuda-include-gpubinary %t -o - -x hip \
// RUN: | FileCheck %s --check-prefix=HIPEF
// RUN: %clang_cc1 -cuid=123 -triple x86_64-apple-macosx10.15.0 -emit-llvm %s \
// RUN: -o - -x hip \
// RUN: | FileCheck %s --check-prefix=HIPNEF
#include "Inputs/cuda.h"
__device__ int device_var;
__constant__ int constant_var;
// When fat binary is embedded, section names use Mach-O format.
// HIPEF: @[[FATBIN:.*]] = private constant{{.*}} c"GPU binary would be here.",{{.*}}section "__HIP,__hip_fatbin"{{.*}}align 4096
// HIPEF: @__hip_fatbin_wrapper = internal constant { i32, i32, ptr, ptr }
// HIPEF-SAME: section "__HIP,__fatbin"
// When fat binary is external (no -fcuda-include-gpubinary), external symbol uses Mach-O section.
// HIPNEF: @[[FATBIN:__hip_fatbin_[0-9a-f]+]] = external constant i8, section "__HIP,__hip_fatbin"
// HIPNEF: @__hip_fatbin_wrapper = internal constant { i32, i32, ptr, ptr }
// HIPNEF-SAME: section "__HIP,__fatbin"
__global__ void kernelfunc(int i, int j, int k) {}
void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); }

View File

@@ -11,6 +11,9 @@
// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-windows-gnu -fgpu-rdc \ // RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-windows-gnu -fgpu-rdc \
// RUN: --offload-new-driver -emit-llvm -o - -x hip %s | FileCheck \ // RUN: --offload-new-driver -emit-llvm -o - -x hip %s | FileCheck \
// RUN: --check-prefix=HIP-COFF %s // RUN: --check-prefix=HIP-COFF %s
// RUN: %clang_cc1 -std=c++11 -triple x86_64-apple-macosx10.15.0 -fgpu-rdc \
// RUN: --offload-new-driver -emit-llvm -o - -x hip %s | FileCheck \
// RUN: --check-prefix=HIP-MACHO %s
#include "Inputs/cuda.h" #include "Inputs/cuda.h"
@@ -75,6 +78,21 @@
// HIP-COFF: @.offloading.entry_name.5 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading" // HIP-COFF: @.offloading.entry_name.5 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading"
// HIP-COFF: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 4, i32 35, ptr @tex, ptr @.offloading.entry_name.5, i64 1, i64 1, ptr null }, section "llvm_offload_entries$OE" // HIP-COFF: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 4, i32 35, ptr @tex, ptr @.offloading.entry_name.5, i64 1, i64 1, ptr null }, section "llvm_offload_entries$OE"
//. //.
// HIP-MACHO: @managed.managed = global i32 0, align 4
// HIP-MACHO: @managed = externally_initialized global ptr null
// HIP-MACHO: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00", section ".llvm.rodata.offloading"
// HIP-MACHO: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 4, i32 0, ptr @_Z3foov, ptr @.offloading.entry_name, i64 0, i64 0, ptr null }, section "__LLVM,offload_entries"
// HIP-MACHO: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00", section ".llvm.rodata.offloading"
// HIP-MACHO: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 4, i32 0, ptr @_Z6kernelv, ptr @.offloading.entry_name.1, i64 0, i64 0, ptr null }, section "__LLVM,offload_entries"
// HIP-MACHO: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00", section ".llvm.rodata.offloading"
// HIP-MACHO: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 4, i32 0, ptr @var, ptr @.offloading.entry_name.2, i64 4, i64 0, ptr null }, section "__LLVM,offload_entries"
// HIP-MACHO: @.offloading.entry_name.3 = internal unnamed_addr constant [8 x i8] c"managed\00", section ".llvm.rodata.offloading"
// HIP-MACHO: @.offloading.entry.managed = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 4, i32 1, ptr @managed.managed, ptr @.offloading.entry_name.3, i64 4, i64 4, ptr @managed }, section "__LLVM,offload_entries"
// HIP-MACHO: @.offloading.entry_name.4 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading"
// HIP-MACHO: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 4, i32 2, ptr @surf, ptr @.offloading.entry_name.4, i64 4, i64 1, ptr null }, section "__LLVM,offload_entries"
// HIP-MACHO: @.offloading.entry_name.5 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading"
// HIP-MACHO: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 4, i32 35, ptr @tex, ptr @.offloading.entry_name.5, i64 1, i64 1, ptr null }, section "__LLVM,offload_entries"
//.
// CUDA-LABEL: @_Z18__device_stub__foov( // CUDA-LABEL: @_Z18__device_stub__foov(
// CUDA-NEXT: entry: // CUDA-NEXT: entry:
// CUDA-NEXT: [[TMP0:%.*]] = call i32 @cudaLaunch(ptr @_Z18__device_stub__foov) // CUDA-NEXT: [[TMP0:%.*]] = call i32 @cudaLaunch(ptr @_Z18__device_stub__foov)
@@ -103,6 +121,13 @@
// HIP-COFF: setup.end: // HIP-COFF: setup.end:
// HIP-COFF-NEXT: ret void // HIP-COFF-NEXT: ret void
// //
// HIP-MACHO-LABEL: @_Z18__device_stub__foov(
// HIP-MACHO-NEXT: entry:
// HIP-MACHO-NEXT: [[TMP0:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z3foov)
// HIP-MACHO-NEXT: br label [[SETUP_END:%.*]]
// HIP-MACHO: setup.end:
// HIP-MACHO-NEXT: ret void
//
__global__ void foo() {} __global__ void foo() {}
__device__ int var = 1; __device__ int var = 1;
const __device__ int constant = 1; const __device__ int constant = 1;
@@ -137,6 +162,13 @@ __device__ __managed__ int managed = 0;
// HIP-COFF: setup.end: // HIP-COFF: setup.end:
// HIP-COFF-NEXT: ret void // HIP-COFF-NEXT: ret void
// //
// HIP-MACHO-LABEL: @_Z21__device_stub__kernelv(
// HIP-MACHO-NEXT: entry:
// HIP-MACHO-NEXT: [[TMP0:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z6kernelv)
// HIP-MACHO-NEXT: br label [[SETUP_END:%.*]]
// HIP-MACHO: setup.end:
// HIP-MACHO-NEXT: ret void
//
__global__ void kernel() { external = 1; } __global__ void kernel() { external = 1; }
surface<void> surf; surface<void> surf;

View File

@@ -162,6 +162,8 @@
// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=HIP,HIP-ELF // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=HIP,HIP-ELF
// RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-windows-gnu \ // RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-windows-gnu \
// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=HIP,HIP-COFF // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=HIP,HIP-COFF
// RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-apple-macosx10.15.0 \
// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=HIP,HIP-MACHO
// HIP-ELF: @__start_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry] // HIP-ELF: @__start_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry]
// HIP-ELF-NEXT: @__stop_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry] // HIP-ELF-NEXT: @__stop_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry]
@@ -170,13 +172,25 @@
// HIP-COFF: @__start_llvm_offload_entries = weak_odr hidden constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "llvm_offload_entries$OA" // HIP-COFF: @__start_llvm_offload_entries = weak_odr hidden constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "llvm_offload_entries$OA"
// HIP-COFF-NEXT: @__stop_llvm_offload_entries = weak_odr hidden constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "llvm_offload_entries$OZ" // HIP-COFF-NEXT: @__stop_llvm_offload_entries = weak_odr hidden constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "llvm_offload_entries$OZ"
// HIP: @.fatbin_image = internal constant [0 x i8] zeroinitializer, section ".hip_fatbin" // HIP-MACHO: @"\01section$start$__LLVM$offload_entries" = external hidden constant [0 x %struct.__tgt_offload_entry]
// HIP-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1212764230, i32 1, ptr @.fatbin_image, ptr null }, section ".hipFatBinSegment", align 8 // HIP-MACHO-NEXT: @"\01section$end$__LLVM$offload_entries" = external hidden constant [0 x %struct.__tgt_offload_entry]
// HIP-NEXT: @.hip.binary_handle = internal global ptr null // HIP-MACHO-NEXT: @"__dummy.__LLVM,offload_entries" = internal constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "__LLVM,offload_entries"
// HIP-ELF: @.fatbin_image = internal constant [0 x i8] zeroinitializer, section ".hip_fatbin"
// HIP-ELF-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1212764230, i32 1, ptr @.fatbin_image, ptr null }, section ".hipFatBinSegment", align 8
// HIP-ELF-NEXT: @.hip.binary_handle = internal global ptr null
// HIP-COFF: @.fatbin_image = internal constant [0 x i8] zeroinitializer, section ".hip_fatbin"
// HIP-COFF-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1212764230, i32 1, ptr @.fatbin_image, ptr null }, section ".hipFatBinSegment", align 8
// HIP-COFF-NEXT: @.hip.binary_handle = internal global ptr null
// HIP-MACHO: @.fatbin_image = internal constant [0 x i8] zeroinitializer, section "__HIP,__hip_fatbin"
// HIP-MACHO-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1212764230, i32 1, ptr @.fatbin_image, ptr null }, section "__HIP,__fatbin", align 8
// HIP-MACHO-NEXT: @.hip.binary_handle = internal global ptr null
// HIP: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 101, ptr @.hip.fatbin_reg, ptr null }] // HIP: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 101, ptr @.hip.fatbin_reg, ptr null }]
// HIP: define internal void @.hip.fatbin_reg() section ".text.startup" { // HIP: define internal void @.hip.fatbin_reg() section "{{\.text\.startup|__TEXT,__StaticInit}}" {
// HIP-NEXT: entry: // HIP-NEXT: entry:
// HIP-NEXT: %0 = call ptr @__hipRegisterFatBinary(ptr @.fatbin_wrapper) // HIP-NEXT: %0 = call ptr @__hipRegisterFatBinary(ptr @.fatbin_wrapper)
// HIP-NEXT: store ptr %0, ptr @.hip.binary_handle, align 8 // HIP-NEXT: store ptr %0, ptr @.hip.binary_handle, align 8
@@ -185,20 +199,20 @@
// HIP-NEXT: ret void // HIP-NEXT: ret void
// HIP-NEXT: } // HIP-NEXT: }
// //
// HIP: define internal void @.hip.fatbin_unreg() section ".text.startup" { // HIP: define internal void @.hip.fatbin_unreg() section "{{\.text\.startup|__TEXT,__StaticInit}}" {
// HIP-NEXT: entry: // HIP-NEXT: entry:
// HIP-NEXT: %0 = load ptr, ptr @.hip.binary_handle, align 8 // HIP-NEXT: %0 = load ptr, ptr @.hip.binary_handle, align 8
// HIP-NEXT: call void @__hipUnregisterFatBinary(ptr %0) // HIP-NEXT: call void @__hipUnregisterFatBinary(ptr %0)
// HIP-NEXT: ret void // HIP-NEXT: ret void
// HIP-NEXT: } // HIP-NEXT: }
// //
// HIP: define internal void @.hip.globals_reg(ptr %0) section ".text.startup" { // HIP: define internal void @.hip.globals_reg(ptr %0) section "{{\.text\.startup|__TEXT,__StaticInit}}" {
// HIP-NEXT: entry: // HIP-NEXT: entry:
// HIP-NEXT: %1 = icmp ne ptr @__start_llvm_offload_entries, @__stop_llvm_offload_entries // HIP-NEXT: %1 = icmp ne ptr @{{.*offload_entries.*}}, @{{.*offload_entries.*}}
// HIP-NEXT: br i1 %1, label %while.entry, label %while.end // HIP-NEXT: br i1 %1, label %while.entry, label %while.end
// //
// HIP: while.entry: // HIP: while.entry:
// HIP-NEXT: %entry1 = phi ptr [ @__start_llvm_offload_entries, %entry ], [ %16, %if.end ] // HIP-NEXT: %entry1 = phi ptr [ @{{.*offload_entries.*}}, %entry ], [ %16, %if.end ]
// HIP-NEXT: %2 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i32 0, i32 4 // HIP-NEXT: %2 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i32 0, i32 4
// HIP-NEXT: %addr = load ptr, ptr %2, align 8 // HIP-NEXT: %addr = load ptr, ptr %2, align 8
// HIP-NEXT: %3 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i32 0, i32 8 // HIP-NEXT: %3 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i32 0, i32 8
@@ -258,7 +272,7 @@
// //
// HIP: if.end: // HIP: if.end:
// HIP-NEXT: %16 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 1 // HIP-NEXT: %16 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 1
// HIP-NEXT: %17 = icmp eq ptr %16, @__stop_llvm_offload_entries // HIP-NEXT: %17 = icmp eq ptr %16, @{{.*offload_entries.*}}
// HIP-NEXT: br i1 %17, label %while.end, label %while.entry // HIP-NEXT: br i1 %17, label %while.end, label %while.entry
// //
// HIP: while.end: // HIP: while.end:

View File

@@ -84,12 +84,16 @@ LLVM_ABI StructType *getEntryTy(Module &M);
/// \param Data Extra data storage associated with the entry. /// \param Data Extra data storage associated with the entry.
/// \param SectionName The section this entry will be placed at. /// \param SectionName The section this entry will be placed at.
/// \param AuxAddr An extra pointer if needed. /// \param AuxAddr An extra pointer if needed.
/// Returns the section name for offloading entries based on the target triple.
/// ELF: "llvm_offload_entries", COFF: "llvm_offload_entries",
/// Mach-O: "__LLVM,offload_entries".
LLVM_ABI StringRef getOffloadEntrySection(Module &M);
/// \return The emitted global variable containing the offloading entry. /// \return The emitted global variable containing the offloading entry.
LLVM_ABI GlobalVariable * LLVM_ABI GlobalVariable *
emitOffloadingEntry(Module &M, object::OffloadKind Kind, Constant *Addr, emitOffloadingEntry(Module &M, object::OffloadKind Kind, Constant *Addr,
StringRef Name, uint64_t Size, uint32_t Flags, StringRef Name, uint64_t Size, uint32_t Flags,
uint64_t Data, Constant *AuxAddr = nullptr, uint64_t Data, Constant *AuxAddr = nullptr);
StringRef SectionName = "llvm_offload_entries");
/// Create a constant struct initializer used to register this global at /// Create a constant struct initializer used to register this global at
/// runtime. /// runtime.
@@ -102,7 +106,7 @@ getOffloadingEntryInitializer(Module &M, object::OffloadKind Kind,
/// Creates a pair of globals used to iterate the array of offloading entries by /// Creates a pair of globals used to iterate the array of offloading entries by
/// accessing the section variables provided by the linker. /// accessing the section variables provided by the linker.
LLVM_ABI std::pair<GlobalVariable *, GlobalVariable *> LLVM_ABI std::pair<GlobalVariable *, GlobalVariable *>
getOffloadEntryArray(Module &M, StringRef SectionName = "llvm_offload_entries"); getOffloadEntryArray(Module &M);
namespace amdgpu { namespace amdgpu {
/// Check if an image is compatible with current system's environment. The /// Check if an image is compatible with current system's environment. The

View File

@@ -44,6 +44,12 @@ IntegerType *getSizeTTy(Module &M) {
return M.getDataLayout().getIntPtrType(M.getContext()); return M.getDataLayout().getIntPtrType(M.getContext());
} }
/// Returns the appropriate startup section for registration functions.
/// Mach-O uses "__TEXT,__StaticInit"; ELF/COFF use ".text.startup".
StringRef getStartupSection(const Triple &T) {
return T.isOSBinFormatMachO() ? "__TEXT,__StaticInit" : ".text.startup";
}
// struct __tgt_device_image { // struct __tgt_device_image {
// void *ImageStart; // void *ImageStart;
// void *ImageEnd; // void *ImageEnd;
@@ -207,7 +213,7 @@ Function *createUnregisterFunction(Module &M, GlobalVariable *BinDesc,
auto *Func = auto *Func =
Function::Create(FuncTy, GlobalValue::InternalLinkage, Function::Create(FuncTy, GlobalValue::InternalLinkage,
".omp_offloading.descriptor_unreg" + Suffix, &M); ".omp_offloading.descriptor_unreg" + Suffix, &M);
Func->setSection(".text.startup"); Func->setSection(getStartupSection(M.getTargetTriple()));
// Get __tgt_unregister_lib function declaration. // Get __tgt_unregister_lib function declaration.
auto *UnRegFuncTy = FunctionType::get(Type::getVoidTy(C), getBinDescPtrTy(M), auto *UnRegFuncTy = FunctionType::get(Type::getVoidTy(C), getBinDescPtrTy(M),
@@ -229,7 +235,7 @@ void createRegisterFunction(Module &M, GlobalVariable *BinDesc,
auto *FuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false); auto *FuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
auto *Func = Function::Create(FuncTy, GlobalValue::InternalLinkage, auto *Func = Function::Create(FuncTy, GlobalValue::InternalLinkage,
".omp_offloading.descriptor_reg" + Suffix, &M); ".omp_offloading.descriptor_reg" + Suffix, &M);
Func->setSection(".text.startup"); Func->setSection(getStartupSection(M.getTargetTriple()));
// Get __tgt_register_lib function declaration. // Get __tgt_register_lib function declaration.
auto *RegFuncTy = FunctionType::get(Type::getVoidTy(C), getBinDescPtrTy(M), auto *RegFuncTy = FunctionType::get(Type::getVoidTy(C), getBinDescPtrTy(M),
@@ -285,7 +291,7 @@ GlobalVariable *createFatbinDesc(Module &M, ArrayRef<char> Image, bool IsHIP,
// Create the global string containing the fatbinary. // Create the global string containing the fatbinary.
StringRef FatbinConstantSection = StringRef FatbinConstantSection =
IsHIP ? ".hip_fatbin" IsHIP ? (Triple.isMacOSX() ? "__HIP,__hip_fatbin" : ".hip_fatbin")
: (Triple.isMacOSX() ? "__NV_CUDA,__nv_fatbin" : ".nv_fatbin"); : (Triple.isMacOSX() ? "__NV_CUDA,__nv_fatbin" : ".nv_fatbin");
auto *Data = ConstantDataArray::get(C, Image); auto *Data = ConstantDataArray::get(C, Image);
auto *Fatbin = new GlobalVariable(M, Data->getType(), /*isConstant*/ true, auto *Fatbin = new GlobalVariable(M, Data->getType(), /*isConstant*/ true,
@@ -294,9 +300,9 @@ GlobalVariable *createFatbinDesc(Module &M, ArrayRef<char> Image, bool IsHIP,
Fatbin->setSection(FatbinConstantSection); Fatbin->setSection(FatbinConstantSection);
// Create the fatbinary wrapper // Create the fatbinary wrapper
StringRef FatbinWrapperSection = IsHIP ? ".hipFatBinSegment" StringRef FatbinWrapperSection =
: Triple.isMacOSX() ? "__NV_CUDA,__fatbin" IsHIP ? (Triple.isMacOSX() ? "__HIP,__fatbin" : ".hipFatBinSegment")
: ".nvFatBinSegment"; : (Triple.isMacOSX() ? "__NV_CUDA,__fatbin" : ".nvFatBinSegment");
Constant *FatbinWrapper[] = { Constant *FatbinWrapper[] = {
ConstantInt::get(Type::getInt32Ty(C), IsHIP ? HIPFatMagic : CudaFatMagic), ConstantInt::get(Type::getInt32Ty(C), IsHIP ? HIPFatMagic : CudaFatMagic),
ConstantInt::get(Type::getInt32Ty(C), 1), ConstantInt::get(Type::getInt32Ty(C), 1),
@@ -403,7 +409,7 @@ Function *createRegisterGlobalsFunction(Module &M, bool IsHIP,
auto *RegGlobalsFn = auto *RegGlobalsFn =
Function::Create(RegGlobalsTy, GlobalValue::InternalLinkage, Function::Create(RegGlobalsTy, GlobalValue::InternalLinkage,
IsHIP ? ".hip.globals_reg" : ".cuda.globals_reg", &M); IsHIP ? ".hip.globals_reg" : ".cuda.globals_reg", &M);
RegGlobalsFn->setSection(".text.startup"); RegGlobalsFn->setSection(getStartupSection(M.getTargetTriple()));
// Create the loop to register all the entries. // Create the loop to register all the entries.
IRBuilder<> Builder(BasicBlock::Create(C, "entry", RegGlobalsFn)); IRBuilder<> Builder(BasicBlock::Create(C, "entry", RegGlobalsFn));
@@ -559,13 +565,13 @@ void createRegisterFatbinFunction(Module &M, GlobalVariable *FatbinDesc,
auto *CtorFunc = Function::Create( auto *CtorFunc = Function::Create(
CtorFuncTy, GlobalValue::InternalLinkage, CtorFuncTy, GlobalValue::InternalLinkage,
(IsHIP ? ".hip.fatbin_reg" : ".cuda.fatbin_reg") + Suffix, &M); (IsHIP ? ".hip.fatbin_reg" : ".cuda.fatbin_reg") + Suffix, &M);
CtorFunc->setSection(".text.startup"); CtorFunc->setSection(getStartupSection(M.getTargetTriple()));
auto *DtorFuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false); auto *DtorFuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
auto *DtorFunc = Function::Create( auto *DtorFunc = Function::Create(
DtorFuncTy, GlobalValue::InternalLinkage, DtorFuncTy, GlobalValue::InternalLinkage,
(IsHIP ? ".hip.fatbin_unreg" : ".cuda.fatbin_unreg") + Suffix, &M); (IsHIP ? ".hip.fatbin_unreg" : ".cuda.fatbin_unreg") + Suffix, &M);
DtorFunc->setSection(".text.startup"); DtorFunc->setSection(getStartupSection(M.getTargetTriple()));
auto *PtrTy = PointerType::getUnqual(C); auto *PtrTy = PointerType::getUnqual(C);
@@ -655,7 +661,7 @@ public:
FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false); FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
Function *Func = Function::Create(FuncTy, GlobalValue::InternalLinkage, Function *Func = Function::Create(FuncTy, GlobalValue::InternalLinkage,
Twine("sycl") + ".descriptor_reg", &M); Twine("sycl") + ".descriptor_reg", &M);
Func->setSection(".text.startup"); Func->setSection(getStartupSection(M.getTargetTriple()));
PointerType *PtrTy = PointerType::getUnqual(C); PointerType *PtrTy = PointerType::getUnqual(C);
IntegerType *Int64Ty = Type::getInt64Ty(C); IntegerType *Int64Ty = Type::getInt64Ty(C);
@@ -677,7 +683,7 @@ public:
FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false); FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
Function *Func = Function::Create(FuncTy, GlobalValue::InternalLinkage, Function *Func = Function::Create(FuncTy, GlobalValue::InternalLinkage,
"sycl.descriptor_unreg", &M); "sycl.descriptor_unreg", &M);
Func->setSection(".text.startup"); Func->setSection(getStartupSection(M.getTargetTriple()));
PointerType *PtrTy = PointerType::getUnqual(C); PointerType *PtrTy = PointerType::getUnqual(C);
IntegerType *Int64Ty = Type::getInt64Ty(C); IntegerType *Int64Ty = Type::getInt64Ty(C);

View File

@@ -84,12 +84,29 @@ offloading::getOffloadingEntryInitializer(Module &M, object::OffloadKind Kind,
return {EntryInitializer, Str}; return {EntryInitializer, Str};
} }
GlobalVariable * StringRef offloading::getOffloadEntrySection(Module &M) {
offloading::emitOffloadingEntry(Module &M, object::OffloadKind Kind, return M.getTargetTriple().isOSBinFormatMachO() ? "__LLVM,offload_entries"
Constant *Addr, StringRef Name, uint64_t Size, : "llvm_offload_entries";
uint32_t Flags, uint64_t Data, }
Constant *AuxAddr, StringRef SectionName) {
/// Returns the start/end symbol names for iterating offloading entries in a
/// given section. Mach-O uses \1section$start$/\1section$end$ convention;
/// ELF/COFF use __start_/__stop_ prefixes.
static std::pair<std::string, std::string>
getOffloadEntryBoundarySymbols(const Triple &T, StringRef SectionName) {
if (T.isOSBinFormatMachO()) {
std::string SymSection = SectionName.str();
std::replace(SymSection.begin(), SymSection.end(), ',', '$');
return {"\1section$start$" + SymSection, "\1section$end$" + SymSection};
}
return {("__start_" + SectionName).str(), ("__stop_" + SectionName).str()};
}
GlobalVariable *offloading::emitOffloadingEntry(
Module &M, object::OffloadKind Kind, Constant *Addr, StringRef Name,
uint64_t Size, uint32_t Flags, uint64_t Data, Constant *AuxAddr) {
const llvm::Triple &Triple = M.getTargetTriple(); const llvm::Triple &Triple = M.getTargetTriple();
StringRef SectionName = getOffloadEntrySection(M);
auto [EntryInitializer, NameGV] = getOffloadingEntryInitializer( auto [EntryInitializer, NameGV] = getOffloadingEntryInitializer(
M, Kind, Addr, Name, Size, Flags, Data, AuxAddr); M, Kind, Addr, Name, Size, Flags, Data, AuxAddr);
@@ -112,8 +129,9 @@ offloading::emitOffloadingEntry(Module &M, object::OffloadKind Kind,
} }
std::pair<GlobalVariable *, GlobalVariable *> std::pair<GlobalVariable *, GlobalVariable *>
offloading::getOffloadEntryArray(Module &M, StringRef SectionName) { offloading::getOffloadEntryArray(Module &M) {
const llvm::Triple &Triple = M.getTargetTriple(); const llvm::Triple &Triple = M.getTargetTriple();
StringRef SectionName = getOffloadEntrySection(M);
auto *ZeroInitilaizer = auto *ZeroInitilaizer =
ConstantAggregateZero::get(ArrayType::get(getEntryTy(M), 0u)); ConstantAggregateZero::get(ArrayType::get(getEntryTy(M), 0u));
@@ -122,13 +140,14 @@ offloading::getOffloadEntryArray(Module &M, StringRef SectionName) {
auto Linkage = Triple.isOSBinFormatCOFF() ? GlobalValue::WeakODRLinkage auto Linkage = Triple.isOSBinFormatCOFF() ? GlobalValue::WeakODRLinkage
: GlobalValue::ExternalLinkage; : GlobalValue::ExternalLinkage;
auto *EntriesB = auto [StartName, StopName] =
new GlobalVariable(M, EntryType, /*isConstant=*/true, Linkage, EntryInit, getOffloadEntryBoundarySymbols(Triple, SectionName);
"__start_" + SectionName);
auto *EntriesB = new GlobalVariable(M, EntryType, /*isConstant=*/true,
Linkage, EntryInit, StartName);
EntriesB->setVisibility(GlobalValue::HiddenVisibility); EntriesB->setVisibility(GlobalValue::HiddenVisibility);
auto *EntriesE = auto *EntriesE = new GlobalVariable(M, EntryType, /*isConstant=*/true,
new GlobalVariable(M, EntryType, /*isConstant=*/true, Linkage, EntryInit, Linkage, EntryInit, StopName);
"__stop_" + SectionName);
EntriesE->setVisibility(GlobalValue::HiddenVisibility); EntriesE->setVisibility(GlobalValue::HiddenVisibility);
if (Triple.isOSBinFormatELF()) { if (Triple.isOSBinFormatELF()) {
@@ -142,6 +161,15 @@ offloading::getOffloadEntryArray(Module &M, StringRef SectionName) {
DummyEntry->setSection(SectionName); DummyEntry->setSection(SectionName);
DummyEntry->setAlignment(Align(object::OffloadBinary::getAlignment())); DummyEntry->setAlignment(Align(object::OffloadBinary::getAlignment()));
appendToCompilerUsed(M, DummyEntry); appendToCompilerUsed(M, DummyEntry);
} else if (Triple.isOSBinFormatMachO()) {
// Mach-O needs a dummy variable in the section (like ELF) to ensure the
// linker provides the section boundary symbols.
auto *DummyEntry = new GlobalVariable(
M, ZeroInitilaizer->getType(), true, GlobalVariable::InternalLinkage,
ZeroInitilaizer, "__dummy." + SectionName);
DummyEntry->setSection(SectionName);
DummyEntry->setAlignment(Align(object::OffloadBinary::getAlignment()));
appendToCompilerUsed(M, DummyEntry);
} else { } else {
// The COFF linker will merge sections containing a '$' together into a // The COFF linker will merge sections containing a '$' together into a
// single section. The order of entries in this section will be sorted // single section. The order of entries in this section will be sorted

View File

@@ -51,6 +51,34 @@
; HIP-NEXT: ret void ; HIP-NEXT: ret void
; HIP-NEXT: } ; HIP-NEXT: }
; RUN: llvm-offload-wrapper --triple=x86_64-apple-macosx10.15.0 -kind=hip %s -o %t.bc
; RUN: llvm-dis %t.bc -o - | FileCheck %s --check-prefix=HIP-MACHO
; HIP-MACHO: @"\01section$start$__LLVM$offload_entries" = external hidden constant [0 x %struct.__tgt_offload_entry]
; HIP-MACHO-NEXT: @"\01section$end$__LLVM$offload_entries" = external hidden constant [0 x %struct.__tgt_offload_entry]
; HIP-MACHO-NEXT: @"__dummy.__LLVM,offload_entries" = internal constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "__LLVM,offload_entries", align 8
; HIP-MACHO-NEXT: @llvm.compiler.used = appending global [1 x ptr] [ptr @"__dummy.__LLVM,offload_entries"], section "llvm.metadata"
; HIP-MACHO-NEXT: @.fatbin_image = internal constant {{.*}}, section "__HIP,__hip_fatbin"
; HIP-MACHO-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1212764230, i32 1, ptr @.fatbin_image, ptr null }, section "__HIP,__fatbin", align 8
; HIP-MACHO-NEXT: @.hip.binary_handle = internal global ptr null
; HIP-MACHO-NEXT: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 101, ptr @.hip.fatbin_reg, ptr null }]
; HIP-MACHO: define internal void @.hip.fatbin_reg() section "__TEXT,__StaticInit" {
; HIP-MACHO-NEXT: entry:
; HIP-MACHO-NEXT: %0 = call ptr @__hipRegisterFatBinary(ptr @.fatbin_wrapper)
; HIP-MACHO-NEXT: store ptr %0, ptr @.hip.binary_handle, align 8
; HIP-MACHO-NEXT: call void @.hip.globals_reg(ptr %0)
; HIP-MACHO-NEXT: %1 = call i32 @atexit(ptr @.hip.fatbin_unreg)
; HIP-MACHO-NEXT: ret void
; HIP-MACHO-NEXT: }
; HIP-MACHO: define internal void @.hip.fatbin_unreg() section "__TEXT,__StaticInit" {
; HIP-MACHO-NEXT: entry:
; HIP-MACHO-NEXT: %0 = load ptr, ptr @.hip.binary_handle, align 8
; HIP-MACHO-NEXT: call void @__hipUnregisterFatBinary(ptr %0)
; HIP-MACHO-NEXT: ret void
; HIP-MACHO-NEXT: }
; RUN: llvm-offload-wrapper --triple=x86_64-unknown-linux-gnu -kind=cuda %s -o %t.bc ; RUN: llvm-offload-wrapper --triple=x86_64-unknown-linux-gnu -kind=cuda %s -o %t.bc
; RUN: llvm-dis %t.bc -o - | FileCheck %s --check-prefix=CUDA ; RUN: llvm-dis %t.bc -o - | FileCheck %s --check-prefix=CUDA