[HIP][MacOS] Mach-O support and Darwin toolchain fixes (#183991)

This PR adds support for HIP on macOS: Mach-O section naming, Darwin
host toolchain initialization guards, and HIPSPV behavior when Darwin is
the host.

This has been verified using chipStar on MacOS via the PoCL OpenCL
implementation.

## Uninitialized target workaround
Darwin’s toolchain is only initialized when its own TranslateArgs runs.
For HIP/CUDA device jobs, Darwin is used as the HostTC and never gets
its args translated, so its target stays uninitialized. The new checks
avoid asserting on that uninitialized state. A better long-term fix is
to initialize Darwin earlier (see the FIXME in Driver.cpp
BuildJobsForAction).

- [ ] Initialize Darwin toolchain during construction instead of lazily
in TranslateArgs. See Driver.cpp BuildJobsForAction FIXME.

- [x] In Darwin’s addClangTargetOptions, skip host-stdlib flags when
DeviceOffloadKind != OFK_None so HIPSPV can safely delegate to the host.
This commit is contained in:
Paulius Velesko
2026-04-28 20:43:59 +03:00
committed by GitHub
parent dddd0da8e6
commit 264ac2d3af
11 changed files with 263 additions and 43 deletions

View File

@@ -817,10 +817,14 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
llvm::Constant *FatBinStr;
unsigned FatMagic;
if (IsHIP) {
FatbinConstantName = ".hip_fatbin";
FatbinSectionName = ".hipFatBinSegment";
// On macOS (Mach-O), section names must be in "segment,section" format.
FatbinConstantName =
CGM.getTriple().isMacOSX() ? "__HIP,__hip_fatbin" : ".hip_fatbin";
FatbinSectionName =
CGM.getTriple().isMacOSX() ? "__HIP,__fatbin" : ".hipFatBinSegment";
ModuleIDSectionName = "__hip_module_id";
ModuleIDSectionName =
CGM.getTriple().isMacOSX() ? "__HIP,__module_id" : "__hip_module_id";
ModuleIDPrefix = "__hip_";
if (CudaGpuBinary) {

View File

@@ -1140,6 +1140,50 @@ VersionTuple MachO::getLinkerVersion(const llvm::opt::ArgList &Args) const {
Darwin::~Darwin() {}
void Darwin::ensureTargetInitialized() const {
if (TargetInitialized)
return;
llvm::Triple::OSType OS = getTriple().getOS();
DarwinPlatformKind Platform;
switch (OS) {
case llvm::Triple::Darwin:
case llvm::Triple::MacOSX:
Platform = MacOS;
break;
case llvm::Triple::IOS:
Platform = IPhoneOS;
break;
case llvm::Triple::TvOS:
Platform = TvOS;
break;
case llvm::Triple::WatchOS:
Platform = WatchOS;
break;
case llvm::Triple::XROS:
Platform = XROS;
break;
case llvm::Triple::DriverKit:
Platform = DriverKit;
break;
default:
// Unknown platform; leave uninitialized.
return;
}
DarwinEnvironmentKind Environment = NativeEnvironment;
if (getTriple().isSimulatorEnvironment())
Environment = Simulator;
else if (getTriple().isMacCatalystEnvironment())
Environment = MacCatalyst;
VersionTuple OsVer = getTriple().getOSVersion();
setTarget(Platform, Environment, OsVer.getMajor(),
OsVer.getMinor().value_or(0), OsVer.getSubminor().value_or(0),
VersionTuple());
}
AppleMachO::~AppleMachO() {}
MachO::~MachO() {}
@@ -1182,7 +1226,11 @@ std::string Darwin::ComputeEffectiveClangTriple(const ArgList &Args,
llvm::Triple Triple(ComputeLLVMTriple(Args, InputType));
// If the target isn't initialized (e.g., an unknown Darwin platform, return
// the default triple).
// the default triple). Note: we intentionally do NOT call
// ensureTargetInitialized() here because this method is called before
// AddDeploymentTarget() in some code paths (e.g. -print-libgcc-file-name),
// and lazy init with version 0.0.0 would conflict with the real version
// that AddDeploymentTarget() later sets via setTarget().
if (!isTargetInitialized())
return Triple.getTriple();
@@ -1248,6 +1296,11 @@ void DarwinClang::addClangWarningOptions(ArgStringList &CC1Args) const {
CC1Args.push_back("-Werror=undef-prefix");
// For modern targets, promote certain warnings to errors.
// Lazily initialize the target if needed (e.g. when Darwin is used as
// a host toolchain for device offloading).
ensureTargetInitialized();
if (!isTargetInitialized())
return;
if (isTargetWatchOSBased() || getTriple().isArch64Bit()) {
// Always enable -Wdeprecated-objc-isa-usage and promote it
// to an error.
@@ -3399,6 +3452,12 @@ void Darwin::addClangTargetOptions(
MachO::addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadKind);
// When compiling device code (e.g. SPIR-V for HIP), skip host-specific
// flags like -faligned-alloc-unavailable and -fno-sized-deallocation
// that depend on the host OS version and are irrelevant to device code.
if (DeviceOffloadKind != Action::OFK_None)
return;
// Pass "-faligned-alloc-unavailable" only when the user hasn't manually
// enabled or disabled aligned allocations.
if (!DriverArgs.hasArgNoClaim(options::OPT_faligned_allocation,
@@ -3939,6 +3998,9 @@ void Darwin::addStartObjectFileArgs(const ArgList &Args,
}
void Darwin::CheckObjCARC() const {
ensureTargetInitialized();
if (!isTargetInitialized())
return;
if (isTargetIOSBased() || isTargetWatchOSBased() || isTargetXROS() ||
(isTargetMacOSBased() && !isMacosxVersionLT(10, 6)))
return;
@@ -3958,6 +4020,9 @@ SanitizerMask Darwin::getSupportedSanitizers() const {
Res |= SanitizerKind::FuzzerNoLink;
Res |= SanitizerKind::ObjCCast;
ensureTargetInitialized();
if (!isTargetInitialized())
return Res;
// Prior to 10.9, macOS shipped a version of the C++ standard library without
// C++11 support. The same is true of iOS prior to version 5. These OS'es are
// incompatible with -fsanitize=vptr.

View File

@@ -391,6 +391,12 @@ private:
void VerifyTripleForSDK(const llvm::opt::ArgList &Args,
const llvm::Triple Triple) const;
protected:
/// Lazily initialize the target platform from the triple when
/// AddDeploymentTarget has not run yet (e.g. when Darwin is used as
/// a host toolchain for device offloading).
void ensureTargetInitialized() const;
public:
Darwin(const Driver &D, const llvm::Triple &Triple,
const llvm::opt::ArgList &Args);

View File

@@ -409,9 +409,11 @@ void HIP::constructGenerateObjFileFromHIPFatBinary(
ObjStream << "# *** Automatically generated by Clang ***\n";
if (FoundPrimaryGpuBinHandleSymbol) {
// Define the first gpubin handle symbol
if (HostTriple.isWindowsMSVCEnvironment())
if (HostTriple.isWindowsMSVCEnvironment()) {
ObjStream << " .section .hip_gpubin_handle,\"dw\"\n";
else {
} else if (HostTriple.isMacOSX()) {
ObjStream << " .section __HIP,__gpubin_handle\n";
} else {
ObjStream << " .protected " << PrimaryGpuBinHandleSymbol << "\n";
ObjStream << " .type " << PrimaryGpuBinHandleSymbol << ",@object\n";
ObjStream << " .section .hip_gpubin_handle,\"aw\"\n";
@@ -430,9 +432,12 @@ void HIP::constructGenerateObjFileFromHIPFatBinary(
}
if (FoundPrimaryHipFatbinSymbol) {
// Define the first fatbin symbol
if (HostTriple.isWindowsMSVCEnvironment())
if (HostTriple.isWindowsMSVCEnvironment()) {
ObjStream << " .section .hip_fatbin,\"dw\"\n";
else {
} else if (HostTriple.isMacOSX()) {
// Mach-O requires "segment,section" format
ObjStream << " .section __HIP,__hip_fatbin\n";
} else {
ObjStream << " .protected " << PrimaryHipFatbinSymbol << "\n";
ObjStream << " .type " << PrimaryHipFatbinSymbol << ",@object\n";
ObjStream << " .section .hip_fatbin,\"a\",@progbits\n";

View File

@@ -0,0 +1,28 @@
// Verify that HIP fat binary sections use Mach-O "segment,section" format on Darwin.
// RUN: echo -n "GPU binary would be here." > %t
// RUN: %clang_cc1 -triple x86_64-apple-macosx10.15.0 -emit-llvm %s \
// RUN: -fcuda-include-gpubinary %t -o - -x hip \
// RUN: | FileCheck %s --check-prefix=HIPEF
// RUN: %clang_cc1 -cuid=123 -triple x86_64-apple-macosx10.15.0 -emit-llvm %s \
// RUN: -o - -x hip \
// RUN: | FileCheck %s --check-prefix=HIPNEF
#include "Inputs/cuda.h"
__device__ int device_var;
__constant__ int constant_var;
// When fat binary is embedded, section names use Mach-O format.
// HIPEF: @[[FATBIN:.*]] = private constant{{.*}} c"GPU binary would be here.",{{.*}}section "__HIP,__hip_fatbin"{{.*}}align 4096
// HIPEF: @__hip_fatbin_wrapper = internal constant { i32, i32, ptr, ptr }
// HIPEF-SAME: section "__HIP,__fatbin"
// When fat binary is external (no -fcuda-include-gpubinary), external symbol uses Mach-O section.
// HIPNEF: @[[FATBIN:__hip_fatbin_[0-9a-f]+]] = external constant i8, section "__HIP,__hip_fatbin"
// HIPNEF: @__hip_fatbin_wrapper = internal constant { i32, i32, ptr, ptr }
// HIPNEF-SAME: section "__HIP,__fatbin"
__global__ void kernelfunc(int i, int j, int k) {}
void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); }

View File

@@ -11,6 +11,9 @@
// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-windows-gnu -fgpu-rdc \
// RUN: --offload-new-driver -emit-llvm -o - -x hip %s | FileCheck \
// RUN: --check-prefix=HIP-COFF %s
// RUN: %clang_cc1 -std=c++11 -triple x86_64-apple-macosx10.15.0 -fgpu-rdc \
// RUN: --offload-new-driver -emit-llvm -o - -x hip %s | FileCheck \
// RUN: --check-prefix=HIP-MACHO %s
#include "Inputs/cuda.h"
@@ -75,6 +78,21 @@
// HIP-COFF: @.offloading.entry_name.5 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading"
// HIP-COFF: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 4, i32 35, ptr @tex, ptr @.offloading.entry_name.5, i64 1, i64 1, ptr null }, section "llvm_offload_entries$OE"
//.
// HIP-MACHO: @managed.managed = global i32 0, align 4
// HIP-MACHO: @managed = externally_initialized global ptr null
// HIP-MACHO: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00", section ".llvm.rodata.offloading"
// HIP-MACHO: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 4, i32 0, ptr @_Z3foov, ptr @.offloading.entry_name, i64 0, i64 0, ptr null }, section "__LLVM,offload_entries"
// HIP-MACHO: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00", section ".llvm.rodata.offloading"
// HIP-MACHO: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 4, i32 0, ptr @_Z6kernelv, ptr @.offloading.entry_name.1, i64 0, i64 0, ptr null }, section "__LLVM,offload_entries"
// HIP-MACHO: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00", section ".llvm.rodata.offloading"
// HIP-MACHO: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 4, i32 0, ptr @var, ptr @.offloading.entry_name.2, i64 4, i64 0, ptr null }, section "__LLVM,offload_entries"
// HIP-MACHO: @.offloading.entry_name.3 = internal unnamed_addr constant [8 x i8] c"managed\00", section ".llvm.rodata.offloading"
// HIP-MACHO: @.offloading.entry.managed = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 4, i32 1, ptr @managed.managed, ptr @.offloading.entry_name.3, i64 4, i64 4, ptr @managed }, section "__LLVM,offload_entries"
// HIP-MACHO: @.offloading.entry_name.4 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading"
// HIP-MACHO: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 4, i32 2, ptr @surf, ptr @.offloading.entry_name.4, i64 4, i64 1, ptr null }, section "__LLVM,offload_entries"
// HIP-MACHO: @.offloading.entry_name.5 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading"
// HIP-MACHO: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 4, i32 35, ptr @tex, ptr @.offloading.entry_name.5, i64 1, i64 1, ptr null }, section "__LLVM,offload_entries"
//.
// CUDA-LABEL: @_Z18__device_stub__foov(
// CUDA-NEXT: entry:
// CUDA-NEXT: [[TMP0:%.*]] = call i32 @cudaLaunch(ptr @_Z18__device_stub__foov)
@@ -103,6 +121,13 @@
// HIP-COFF: setup.end:
// HIP-COFF-NEXT: ret void
//
// HIP-MACHO-LABEL: @_Z18__device_stub__foov(
// HIP-MACHO-NEXT: entry:
// HIP-MACHO-NEXT: [[TMP0:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z3foov)
// HIP-MACHO-NEXT: br label [[SETUP_END:%.*]]
// HIP-MACHO: setup.end:
// HIP-MACHO-NEXT: ret void
//
__global__ void foo() {}
__device__ int var = 1;
const __device__ int constant = 1;
@@ -137,6 +162,13 @@ __device__ __managed__ int managed = 0;
// HIP-COFF: setup.end:
// HIP-COFF-NEXT: ret void
//
// HIP-MACHO-LABEL: @_Z21__device_stub__kernelv(
// HIP-MACHO-NEXT: entry:
// HIP-MACHO-NEXT: [[TMP0:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z6kernelv)
// HIP-MACHO-NEXT: br label [[SETUP_END:%.*]]
// HIP-MACHO: setup.end:
// HIP-MACHO-NEXT: ret void
//
__global__ void kernel() { external = 1; }
surface<void> surf;

View File

@@ -162,6 +162,8 @@
// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=HIP,HIP-ELF
// RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-windows-gnu \
// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=HIP,HIP-COFF
// RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-apple-macosx10.15.0 \
// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=HIP,HIP-MACHO
// HIP-ELF: @__start_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry]
// HIP-ELF-NEXT: @__stop_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry]
@@ -170,13 +172,25 @@
// HIP-COFF: @__start_llvm_offload_entries = weak_odr hidden constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "llvm_offload_entries$OA"
// HIP-COFF-NEXT: @__stop_llvm_offload_entries = weak_odr hidden constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "llvm_offload_entries$OZ"
// HIP: @.fatbin_image = internal constant [0 x i8] zeroinitializer, section ".hip_fatbin"
// HIP-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1212764230, i32 1, ptr @.fatbin_image, ptr null }, section ".hipFatBinSegment", align 8
// HIP-NEXT: @.hip.binary_handle = internal global ptr null
// HIP-MACHO: @"\01section$start$__LLVM$offload_entries" = external hidden constant [0 x %struct.__tgt_offload_entry]
// HIP-MACHO-NEXT: @"\01section$end$__LLVM$offload_entries" = external hidden constant [0 x %struct.__tgt_offload_entry]
// HIP-MACHO-NEXT: @"__dummy.__LLVM,offload_entries" = internal constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "__LLVM,offload_entries"
// HIP-ELF: @.fatbin_image = internal constant [0 x i8] zeroinitializer, section ".hip_fatbin"
// HIP-ELF-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1212764230, i32 1, ptr @.fatbin_image, ptr null }, section ".hipFatBinSegment", align 8
// HIP-ELF-NEXT: @.hip.binary_handle = internal global ptr null
// HIP-COFF: @.fatbin_image = internal constant [0 x i8] zeroinitializer, section ".hip_fatbin"
// HIP-COFF-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1212764230, i32 1, ptr @.fatbin_image, ptr null }, section ".hipFatBinSegment", align 8
// HIP-COFF-NEXT: @.hip.binary_handle = internal global ptr null
// HIP-MACHO: @.fatbin_image = internal constant [0 x i8] zeroinitializer, section "__HIP,__hip_fatbin"
// HIP-MACHO-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1212764230, i32 1, ptr @.fatbin_image, ptr null }, section "__HIP,__fatbin", align 8
// HIP-MACHO-NEXT: @.hip.binary_handle = internal global ptr null
// HIP: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 101, ptr @.hip.fatbin_reg, ptr null }]
// HIP: define internal void @.hip.fatbin_reg() section ".text.startup" {
// HIP: define internal void @.hip.fatbin_reg() section "{{\.text\.startup|__TEXT,__StaticInit}}" {
// HIP-NEXT: entry:
// HIP-NEXT: %0 = call ptr @__hipRegisterFatBinary(ptr @.fatbin_wrapper)
// HIP-NEXT: store ptr %0, ptr @.hip.binary_handle, align 8
@@ -185,20 +199,20 @@
// HIP-NEXT: ret void
// HIP-NEXT: }
//
// HIP: define internal void @.hip.fatbin_unreg() section ".text.startup" {
// HIP: define internal void @.hip.fatbin_unreg() section "{{\.text\.startup|__TEXT,__StaticInit}}" {
// HIP-NEXT: entry:
// HIP-NEXT: %0 = load ptr, ptr @.hip.binary_handle, align 8
// HIP-NEXT: call void @__hipUnregisterFatBinary(ptr %0)
// HIP-NEXT: ret void
// HIP-NEXT: }
//
// HIP: define internal void @.hip.globals_reg(ptr %0) section ".text.startup" {
// HIP: define internal void @.hip.globals_reg(ptr %0) section "{{\.text\.startup|__TEXT,__StaticInit}}" {
// HIP-NEXT: entry:
// HIP-NEXT: %1 = icmp ne ptr @__start_llvm_offload_entries, @__stop_llvm_offload_entries
// HIP-NEXT: %1 = icmp ne ptr @{{.*offload_entries.*}}, @{{.*offload_entries.*}}
// HIP-NEXT: br i1 %1, label %while.entry, label %while.end
//
// HIP: while.entry:
// HIP-NEXT: %entry1 = phi ptr [ @__start_llvm_offload_entries, %entry ], [ %16, %if.end ]
// HIP-NEXT: %entry1 = phi ptr [ @{{.*offload_entries.*}}, %entry ], [ %16, %if.end ]
// HIP-NEXT: %2 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i32 0, i32 4
// HIP-NEXT: %addr = load ptr, ptr %2, align 8
// HIP-NEXT: %3 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i32 0, i32 8
@@ -258,7 +272,7 @@
//
// HIP: if.end:
// HIP-NEXT: %16 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 1
// HIP-NEXT: %17 = icmp eq ptr %16, @__stop_llvm_offload_entries
// HIP-NEXT: %17 = icmp eq ptr %16, @{{.*offload_entries.*}}
// HIP-NEXT: br i1 %17, label %while.end, label %while.entry
//
// HIP: while.end:

View File

@@ -84,12 +84,16 @@ LLVM_ABI StructType *getEntryTy(Module &M);
/// \param Data Extra data storage associated with the entry.
/// \param SectionName The section this entry will be placed at.
/// \param AuxAddr An extra pointer if needed.
/// Returns the section name for offloading entries based on the target triple.
/// ELF: "llvm_offload_entries", COFF: "llvm_offload_entries",
/// Mach-O: "__LLVM,offload_entries".
LLVM_ABI StringRef getOffloadEntrySection(Module &M);
/// \return The emitted global variable containing the offloading entry.
LLVM_ABI GlobalVariable *
emitOffloadingEntry(Module &M, object::OffloadKind Kind, Constant *Addr,
StringRef Name, uint64_t Size, uint32_t Flags,
uint64_t Data, Constant *AuxAddr = nullptr,
StringRef SectionName = "llvm_offload_entries");
uint64_t Data, Constant *AuxAddr = nullptr);
/// Create a constant struct initializer used to register this global at
/// runtime.
@@ -102,7 +106,7 @@ getOffloadingEntryInitializer(Module &M, object::OffloadKind Kind,
/// Creates a pair of globals used to iterate the array of offloading entries by
/// accessing the section variables provided by the linker.
LLVM_ABI std::pair<GlobalVariable *, GlobalVariable *>
getOffloadEntryArray(Module &M, StringRef SectionName = "llvm_offload_entries");
getOffloadEntryArray(Module &M);
namespace amdgpu {
/// Check if an image is compatible with current system's environment. The

View File

@@ -44,6 +44,12 @@ IntegerType *getSizeTTy(Module &M) {
return M.getDataLayout().getIntPtrType(M.getContext());
}
/// Returns the appropriate startup section for registration functions.
/// Mach-O uses "__TEXT,__StaticInit"; ELF/COFF use ".text.startup".
StringRef getStartupSection(const Triple &T) {
return T.isOSBinFormatMachO() ? "__TEXT,__StaticInit" : ".text.startup";
}
// struct __tgt_device_image {
// void *ImageStart;
// void *ImageEnd;
@@ -207,7 +213,7 @@ Function *createUnregisterFunction(Module &M, GlobalVariable *BinDesc,
auto *Func =
Function::Create(FuncTy, GlobalValue::InternalLinkage,
".omp_offloading.descriptor_unreg" + Suffix, &M);
Func->setSection(".text.startup");
Func->setSection(getStartupSection(M.getTargetTriple()));
// Get __tgt_unregister_lib function declaration.
auto *UnRegFuncTy = FunctionType::get(Type::getVoidTy(C), getBinDescPtrTy(M),
@@ -229,7 +235,7 @@ void createRegisterFunction(Module &M, GlobalVariable *BinDesc,
auto *FuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
auto *Func = Function::Create(FuncTy, GlobalValue::InternalLinkage,
".omp_offloading.descriptor_reg" + Suffix, &M);
Func->setSection(".text.startup");
Func->setSection(getStartupSection(M.getTargetTriple()));
// Get __tgt_register_lib function declaration.
auto *RegFuncTy = FunctionType::get(Type::getVoidTy(C), getBinDescPtrTy(M),
@@ -285,7 +291,7 @@ GlobalVariable *createFatbinDesc(Module &M, ArrayRef<char> Image, bool IsHIP,
// Create the global string containing the fatbinary.
StringRef FatbinConstantSection =
IsHIP ? ".hip_fatbin"
IsHIP ? (Triple.isMacOSX() ? "__HIP,__hip_fatbin" : ".hip_fatbin")
: (Triple.isMacOSX() ? "__NV_CUDA,__nv_fatbin" : ".nv_fatbin");
auto *Data = ConstantDataArray::get(C, Image);
auto *Fatbin = new GlobalVariable(M, Data->getType(), /*isConstant*/ true,
@@ -294,9 +300,9 @@ GlobalVariable *createFatbinDesc(Module &M, ArrayRef<char> Image, bool IsHIP,
Fatbin->setSection(FatbinConstantSection);
// Create the fatbinary wrapper
StringRef FatbinWrapperSection = IsHIP ? ".hipFatBinSegment"
: Triple.isMacOSX() ? "__NV_CUDA,__fatbin"
: ".nvFatBinSegment";
StringRef FatbinWrapperSection =
IsHIP ? (Triple.isMacOSX() ? "__HIP,__fatbin" : ".hipFatBinSegment")
: (Triple.isMacOSX() ? "__NV_CUDA,__fatbin" : ".nvFatBinSegment");
Constant *FatbinWrapper[] = {
ConstantInt::get(Type::getInt32Ty(C), IsHIP ? HIPFatMagic : CudaFatMagic),
ConstantInt::get(Type::getInt32Ty(C), 1),
@@ -403,7 +409,7 @@ Function *createRegisterGlobalsFunction(Module &M, bool IsHIP,
auto *RegGlobalsFn =
Function::Create(RegGlobalsTy, GlobalValue::InternalLinkage,
IsHIP ? ".hip.globals_reg" : ".cuda.globals_reg", &M);
RegGlobalsFn->setSection(".text.startup");
RegGlobalsFn->setSection(getStartupSection(M.getTargetTriple()));
// Create the loop to register all the entries.
IRBuilder<> Builder(BasicBlock::Create(C, "entry", RegGlobalsFn));
@@ -559,13 +565,13 @@ void createRegisterFatbinFunction(Module &M, GlobalVariable *FatbinDesc,
auto *CtorFunc = Function::Create(
CtorFuncTy, GlobalValue::InternalLinkage,
(IsHIP ? ".hip.fatbin_reg" : ".cuda.fatbin_reg") + Suffix, &M);
CtorFunc->setSection(".text.startup");
CtorFunc->setSection(getStartupSection(M.getTargetTriple()));
auto *DtorFuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
auto *DtorFunc = Function::Create(
DtorFuncTy, GlobalValue::InternalLinkage,
(IsHIP ? ".hip.fatbin_unreg" : ".cuda.fatbin_unreg") + Suffix, &M);
DtorFunc->setSection(".text.startup");
DtorFunc->setSection(getStartupSection(M.getTargetTriple()));
auto *PtrTy = PointerType::getUnqual(C);
@@ -655,7 +661,7 @@ public:
FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
Function *Func = Function::Create(FuncTy, GlobalValue::InternalLinkage,
Twine("sycl") + ".descriptor_reg", &M);
Func->setSection(".text.startup");
Func->setSection(getStartupSection(M.getTargetTriple()));
PointerType *PtrTy = PointerType::getUnqual(C);
IntegerType *Int64Ty = Type::getInt64Ty(C);
@@ -677,7 +683,7 @@ public:
FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
Function *Func = Function::Create(FuncTy, GlobalValue::InternalLinkage,
"sycl.descriptor_unreg", &M);
Func->setSection(".text.startup");
Func->setSection(getStartupSection(M.getTargetTriple()));
PointerType *PtrTy = PointerType::getUnqual(C);
IntegerType *Int64Ty = Type::getInt64Ty(C);

View File

@@ -84,12 +84,29 @@ offloading::getOffloadingEntryInitializer(Module &M, object::OffloadKind Kind,
return {EntryInitializer, Str};
}
GlobalVariable *
offloading::emitOffloadingEntry(Module &M, object::OffloadKind Kind,
Constant *Addr, StringRef Name, uint64_t Size,
uint32_t Flags, uint64_t Data,
Constant *AuxAddr, StringRef SectionName) {
StringRef offloading::getOffloadEntrySection(Module &M) {
return M.getTargetTriple().isOSBinFormatMachO() ? "__LLVM,offload_entries"
: "llvm_offload_entries";
}
/// Returns the start/end symbol names for iterating offloading entries in a
/// given section. Mach-O uses \1section$start$/\1section$end$ convention;
/// ELF/COFF use __start_/__stop_ prefixes.
static std::pair<std::string, std::string>
getOffloadEntryBoundarySymbols(const Triple &T, StringRef SectionName) {
if (T.isOSBinFormatMachO()) {
std::string SymSection = SectionName.str();
std::replace(SymSection.begin(), SymSection.end(), ',', '$');
return {"\1section$start$" + SymSection, "\1section$end$" + SymSection};
}
return {("__start_" + SectionName).str(), ("__stop_" + SectionName).str()};
}
GlobalVariable *offloading::emitOffloadingEntry(
Module &M, object::OffloadKind Kind, Constant *Addr, StringRef Name,
uint64_t Size, uint32_t Flags, uint64_t Data, Constant *AuxAddr) {
const llvm::Triple &Triple = M.getTargetTriple();
StringRef SectionName = getOffloadEntrySection(M);
auto [EntryInitializer, NameGV] = getOffloadingEntryInitializer(
M, Kind, Addr, Name, Size, Flags, Data, AuxAddr);
@@ -112,8 +129,9 @@ offloading::emitOffloadingEntry(Module &M, object::OffloadKind Kind,
}
std::pair<GlobalVariable *, GlobalVariable *>
offloading::getOffloadEntryArray(Module &M, StringRef SectionName) {
offloading::getOffloadEntryArray(Module &M) {
const llvm::Triple &Triple = M.getTargetTriple();
StringRef SectionName = getOffloadEntrySection(M);
auto *ZeroInitilaizer =
ConstantAggregateZero::get(ArrayType::get(getEntryTy(M), 0u));
@@ -122,13 +140,14 @@ offloading::getOffloadEntryArray(Module &M, StringRef SectionName) {
auto Linkage = Triple.isOSBinFormatCOFF() ? GlobalValue::WeakODRLinkage
: GlobalValue::ExternalLinkage;
auto *EntriesB =
new GlobalVariable(M, EntryType, /*isConstant=*/true, Linkage, EntryInit,
"__start_" + SectionName);
auto [StartName, StopName] =
getOffloadEntryBoundarySymbols(Triple, SectionName);
auto *EntriesB = new GlobalVariable(M, EntryType, /*isConstant=*/true,
Linkage, EntryInit, StartName);
EntriesB->setVisibility(GlobalValue::HiddenVisibility);
auto *EntriesE =
new GlobalVariable(M, EntryType, /*isConstant=*/true, Linkage, EntryInit,
"__stop_" + SectionName);
auto *EntriesE = new GlobalVariable(M, EntryType, /*isConstant=*/true,
Linkage, EntryInit, StopName);
EntriesE->setVisibility(GlobalValue::HiddenVisibility);
if (Triple.isOSBinFormatELF()) {
@@ -142,6 +161,15 @@ offloading::getOffloadEntryArray(Module &M, StringRef SectionName) {
DummyEntry->setSection(SectionName);
DummyEntry->setAlignment(Align(object::OffloadBinary::getAlignment()));
appendToCompilerUsed(M, DummyEntry);
} else if (Triple.isOSBinFormatMachO()) {
// Mach-O needs a dummy variable in the section (like ELF) to ensure the
// linker provides the section boundary symbols.
auto *DummyEntry = new GlobalVariable(
M, ZeroInitilaizer->getType(), true, GlobalVariable::InternalLinkage,
ZeroInitilaizer, "__dummy." + SectionName);
DummyEntry->setSection(SectionName);
DummyEntry->setAlignment(Align(object::OffloadBinary::getAlignment()));
appendToCompilerUsed(M, DummyEntry);
} else {
// The COFF linker will merge sections containing a '$' together into a
// single section. The order of entries in this section will be sorted

View File

@@ -51,6 +51,34 @@
; HIP-NEXT: ret void
; HIP-NEXT: }
; RUN: llvm-offload-wrapper --triple=x86_64-apple-macosx10.15.0 -kind=hip %s -o %t.bc
; RUN: llvm-dis %t.bc -o - | FileCheck %s --check-prefix=HIP-MACHO
; HIP-MACHO: @"\01section$start$__LLVM$offload_entries" = external hidden constant [0 x %struct.__tgt_offload_entry]
; HIP-MACHO-NEXT: @"\01section$end$__LLVM$offload_entries" = external hidden constant [0 x %struct.__tgt_offload_entry]
; HIP-MACHO-NEXT: @"__dummy.__LLVM,offload_entries" = internal constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "__LLVM,offload_entries", align 8
; HIP-MACHO-NEXT: @llvm.compiler.used = appending global [1 x ptr] [ptr @"__dummy.__LLVM,offload_entries"], section "llvm.metadata"
; HIP-MACHO-NEXT: @.fatbin_image = internal constant {{.*}}, section "__HIP,__hip_fatbin"
; HIP-MACHO-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1212764230, i32 1, ptr @.fatbin_image, ptr null }, section "__HIP,__fatbin", align 8
; HIP-MACHO-NEXT: @.hip.binary_handle = internal global ptr null
; HIP-MACHO-NEXT: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 101, ptr @.hip.fatbin_reg, ptr null }]
; HIP-MACHO: define internal void @.hip.fatbin_reg() section "__TEXT,__StaticInit" {
; HIP-MACHO-NEXT: entry:
; HIP-MACHO-NEXT: %0 = call ptr @__hipRegisterFatBinary(ptr @.fatbin_wrapper)
; HIP-MACHO-NEXT: store ptr %0, ptr @.hip.binary_handle, align 8
; HIP-MACHO-NEXT: call void @.hip.globals_reg(ptr %0)
; HIP-MACHO-NEXT: %1 = call i32 @atexit(ptr @.hip.fatbin_unreg)
; HIP-MACHO-NEXT: ret void
; HIP-MACHO-NEXT: }
; HIP-MACHO: define internal void @.hip.fatbin_unreg() section "__TEXT,__StaticInit" {
; HIP-MACHO-NEXT: entry:
; HIP-MACHO-NEXT: %0 = load ptr, ptr @.hip.binary_handle, align 8
; HIP-MACHO-NEXT: call void @__hipUnregisterFatBinary(ptr %0)
; HIP-MACHO-NEXT: ret void
; HIP-MACHO-NEXT: }
; RUN: llvm-offload-wrapper --triple=x86_64-unknown-linux-gnu -kind=cuda %s -o %t.bc
; RUN: llvm-dis %t.bc -o - | FileCheck %s --check-prefix=CUDA