[HIP][MacOS] Mach-O support and Darwin toolchain fixes (#183991)
This PR adds support for HIP on macOS: Mach-O section naming, Darwin host toolchain initialization guards, and HIPSPV behavior when Darwin is the host. This has been verified using chipStar on MacOS via the PoCL OpenCL implementation. ## Uninitialized target workaround Darwin’s toolchain is only initialized when its own TranslateArgs runs. For HIP/CUDA device jobs, Darwin is used as the HostTC and never gets its args translated, so its target stays uninitialized. The new checks avoid asserting on that uninitialized state. A better long-term fix is to initialize Darwin earlier (see the FIXME in Driver.cpp BuildJobsForAction). - [ ] Initialize Darwin toolchain during construction instead of lazily in TranslateArgs. See Driver.cpp BuildJobsForAction FIXME. - [x] In Darwin’s addClangTargetOptions, skip host-stdlib flags when DeviceOffloadKind != OFK_None so HIPSPV can safely delegate to the host.
This commit is contained in:
@@ -817,10 +817,14 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
|
||||
llvm::Constant *FatBinStr;
|
||||
unsigned FatMagic;
|
||||
if (IsHIP) {
|
||||
FatbinConstantName = ".hip_fatbin";
|
||||
FatbinSectionName = ".hipFatBinSegment";
|
||||
// On macOS (Mach-O), section names must be in "segment,section" format.
|
||||
FatbinConstantName =
|
||||
CGM.getTriple().isMacOSX() ? "__HIP,__hip_fatbin" : ".hip_fatbin";
|
||||
FatbinSectionName =
|
||||
CGM.getTriple().isMacOSX() ? "__HIP,__fatbin" : ".hipFatBinSegment";
|
||||
|
||||
ModuleIDSectionName = "__hip_module_id";
|
||||
ModuleIDSectionName =
|
||||
CGM.getTriple().isMacOSX() ? "__HIP,__module_id" : "__hip_module_id";
|
||||
ModuleIDPrefix = "__hip_";
|
||||
|
||||
if (CudaGpuBinary) {
|
||||
|
||||
@@ -1140,6 +1140,50 @@ VersionTuple MachO::getLinkerVersion(const llvm::opt::ArgList &Args) const {
|
||||
|
||||
Darwin::~Darwin() {}
|
||||
|
||||
void Darwin::ensureTargetInitialized() const {
|
||||
if (TargetInitialized)
|
||||
return;
|
||||
|
||||
llvm::Triple::OSType OS = getTriple().getOS();
|
||||
|
||||
DarwinPlatformKind Platform;
|
||||
switch (OS) {
|
||||
case llvm::Triple::Darwin:
|
||||
case llvm::Triple::MacOSX:
|
||||
Platform = MacOS;
|
||||
break;
|
||||
case llvm::Triple::IOS:
|
||||
Platform = IPhoneOS;
|
||||
break;
|
||||
case llvm::Triple::TvOS:
|
||||
Platform = TvOS;
|
||||
break;
|
||||
case llvm::Triple::WatchOS:
|
||||
Platform = WatchOS;
|
||||
break;
|
||||
case llvm::Triple::XROS:
|
||||
Platform = XROS;
|
||||
break;
|
||||
case llvm::Triple::DriverKit:
|
||||
Platform = DriverKit;
|
||||
break;
|
||||
default:
|
||||
// Unknown platform; leave uninitialized.
|
||||
return;
|
||||
}
|
||||
|
||||
DarwinEnvironmentKind Environment = NativeEnvironment;
|
||||
if (getTriple().isSimulatorEnvironment())
|
||||
Environment = Simulator;
|
||||
else if (getTriple().isMacCatalystEnvironment())
|
||||
Environment = MacCatalyst;
|
||||
|
||||
VersionTuple OsVer = getTriple().getOSVersion();
|
||||
setTarget(Platform, Environment, OsVer.getMajor(),
|
||||
OsVer.getMinor().value_or(0), OsVer.getSubminor().value_or(0),
|
||||
VersionTuple());
|
||||
}
|
||||
|
||||
AppleMachO::~AppleMachO() {}
|
||||
|
||||
MachO::~MachO() {}
|
||||
@@ -1182,7 +1226,11 @@ std::string Darwin::ComputeEffectiveClangTriple(const ArgList &Args,
|
||||
llvm::Triple Triple(ComputeLLVMTriple(Args, InputType));
|
||||
|
||||
// If the target isn't initialized (e.g., an unknown Darwin platform, return
|
||||
// the default triple).
|
||||
// the default triple). Note: we intentionally do NOT call
|
||||
// ensureTargetInitialized() here because this method is called before
|
||||
// AddDeploymentTarget() in some code paths (e.g. -print-libgcc-file-name),
|
||||
// and lazy init with version 0.0.0 would conflict with the real version
|
||||
// that AddDeploymentTarget() later sets via setTarget().
|
||||
if (!isTargetInitialized())
|
||||
return Triple.getTriple();
|
||||
|
||||
@@ -1248,6 +1296,11 @@ void DarwinClang::addClangWarningOptions(ArgStringList &CC1Args) const {
|
||||
CC1Args.push_back("-Werror=undef-prefix");
|
||||
|
||||
// For modern targets, promote certain warnings to errors.
|
||||
// Lazily initialize the target if needed (e.g. when Darwin is used as
|
||||
// a host toolchain for device offloading).
|
||||
ensureTargetInitialized();
|
||||
if (!isTargetInitialized())
|
||||
return;
|
||||
if (isTargetWatchOSBased() || getTriple().isArch64Bit()) {
|
||||
// Always enable -Wdeprecated-objc-isa-usage and promote it
|
||||
// to an error.
|
||||
@@ -3399,6 +3452,12 @@ void Darwin::addClangTargetOptions(
|
||||
|
||||
MachO::addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadKind);
|
||||
|
||||
// When compiling device code (e.g. SPIR-V for HIP), skip host-specific
|
||||
// flags like -faligned-alloc-unavailable and -fno-sized-deallocation
|
||||
// that depend on the host OS version and are irrelevant to device code.
|
||||
if (DeviceOffloadKind != Action::OFK_None)
|
||||
return;
|
||||
|
||||
// Pass "-faligned-alloc-unavailable" only when the user hasn't manually
|
||||
// enabled or disabled aligned allocations.
|
||||
if (!DriverArgs.hasArgNoClaim(options::OPT_faligned_allocation,
|
||||
@@ -3939,6 +3998,9 @@ void Darwin::addStartObjectFileArgs(const ArgList &Args,
|
||||
}
|
||||
|
||||
void Darwin::CheckObjCARC() const {
|
||||
ensureTargetInitialized();
|
||||
if (!isTargetInitialized())
|
||||
return;
|
||||
if (isTargetIOSBased() || isTargetWatchOSBased() || isTargetXROS() ||
|
||||
(isTargetMacOSBased() && !isMacosxVersionLT(10, 6)))
|
||||
return;
|
||||
@@ -3958,6 +4020,9 @@ SanitizerMask Darwin::getSupportedSanitizers() const {
|
||||
Res |= SanitizerKind::FuzzerNoLink;
|
||||
Res |= SanitizerKind::ObjCCast;
|
||||
|
||||
ensureTargetInitialized();
|
||||
if (!isTargetInitialized())
|
||||
return Res;
|
||||
// Prior to 10.9, macOS shipped a version of the C++ standard library without
|
||||
// C++11 support. The same is true of iOS prior to version 5. These OS'es are
|
||||
// incompatible with -fsanitize=vptr.
|
||||
|
||||
@@ -391,6 +391,12 @@ private:
|
||||
void VerifyTripleForSDK(const llvm::opt::ArgList &Args,
|
||||
const llvm::Triple Triple) const;
|
||||
|
||||
protected:
|
||||
/// Lazily initialize the target platform from the triple when
|
||||
/// AddDeploymentTarget has not run yet (e.g. when Darwin is used as
|
||||
/// a host toolchain for device offloading).
|
||||
void ensureTargetInitialized() const;
|
||||
|
||||
public:
|
||||
Darwin(const Driver &D, const llvm::Triple &Triple,
|
||||
const llvm::opt::ArgList &Args);
|
||||
|
||||
@@ -409,9 +409,11 @@ void HIP::constructGenerateObjFileFromHIPFatBinary(
|
||||
ObjStream << "# *** Automatically generated by Clang ***\n";
|
||||
if (FoundPrimaryGpuBinHandleSymbol) {
|
||||
// Define the first gpubin handle symbol
|
||||
if (HostTriple.isWindowsMSVCEnvironment())
|
||||
if (HostTriple.isWindowsMSVCEnvironment()) {
|
||||
ObjStream << " .section .hip_gpubin_handle,\"dw\"\n";
|
||||
else {
|
||||
} else if (HostTriple.isMacOSX()) {
|
||||
ObjStream << " .section __HIP,__gpubin_handle\n";
|
||||
} else {
|
||||
ObjStream << " .protected " << PrimaryGpuBinHandleSymbol << "\n";
|
||||
ObjStream << " .type " << PrimaryGpuBinHandleSymbol << ",@object\n";
|
||||
ObjStream << " .section .hip_gpubin_handle,\"aw\"\n";
|
||||
@@ -430,9 +432,12 @@ void HIP::constructGenerateObjFileFromHIPFatBinary(
|
||||
}
|
||||
if (FoundPrimaryHipFatbinSymbol) {
|
||||
// Define the first fatbin symbol
|
||||
if (HostTriple.isWindowsMSVCEnvironment())
|
||||
if (HostTriple.isWindowsMSVCEnvironment()) {
|
||||
ObjStream << " .section .hip_fatbin,\"dw\"\n";
|
||||
else {
|
||||
} else if (HostTriple.isMacOSX()) {
|
||||
// Mach-O requires "segment,section" format
|
||||
ObjStream << " .section __HIP,__hip_fatbin\n";
|
||||
} else {
|
||||
ObjStream << " .protected " << PrimaryHipFatbinSymbol << "\n";
|
||||
ObjStream << " .type " << PrimaryHipFatbinSymbol << ",@object\n";
|
||||
ObjStream << " .section .hip_fatbin,\"a\",@progbits\n";
|
||||
|
||||
28
clang/test/CodeGenCUDA/device-stub-macho.cu
Normal file
28
clang/test/CodeGenCUDA/device-stub-macho.cu
Normal file
@@ -0,0 +1,28 @@
|
||||
// Verify that HIP fat binary sections use Mach-O "segment,section" format on Darwin.
|
||||
|
||||
// RUN: echo -n "GPU binary would be here." > %t
|
||||
// RUN: %clang_cc1 -triple x86_64-apple-macosx10.15.0 -emit-llvm %s \
|
||||
// RUN: -fcuda-include-gpubinary %t -o - -x hip \
|
||||
// RUN: | FileCheck %s --check-prefix=HIPEF
|
||||
// RUN: %clang_cc1 -cuid=123 -triple x86_64-apple-macosx10.15.0 -emit-llvm %s \
|
||||
// RUN: -o - -x hip \
|
||||
// RUN: | FileCheck %s --check-prefix=HIPNEF
|
||||
|
||||
#include "Inputs/cuda.h"
|
||||
|
||||
__device__ int device_var;
|
||||
__constant__ int constant_var;
|
||||
|
||||
// When fat binary is embedded, section names use Mach-O format.
|
||||
// HIPEF: @[[FATBIN:.*]] = private constant{{.*}} c"GPU binary would be here.",{{.*}}section "__HIP,__hip_fatbin"{{.*}}align 4096
|
||||
// HIPEF: @__hip_fatbin_wrapper = internal constant { i32, i32, ptr, ptr }
|
||||
// HIPEF-SAME: section "__HIP,__fatbin"
|
||||
|
||||
// When fat binary is external (no -fcuda-include-gpubinary), external symbol uses Mach-O section.
|
||||
// HIPNEF: @[[FATBIN:__hip_fatbin_[0-9a-f]+]] = external constant i8, section "__HIP,__hip_fatbin"
|
||||
// HIPNEF: @__hip_fatbin_wrapper = internal constant { i32, i32, ptr, ptr }
|
||||
// HIPNEF-SAME: section "__HIP,__fatbin"
|
||||
|
||||
__global__ void kernelfunc(int i, int j, int k) {}
|
||||
|
||||
void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); }
|
||||
@@ -11,6 +11,9 @@
|
||||
// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-windows-gnu -fgpu-rdc \
|
||||
// RUN: --offload-new-driver -emit-llvm -o - -x hip %s | FileCheck \
|
||||
// RUN: --check-prefix=HIP-COFF %s
|
||||
// RUN: %clang_cc1 -std=c++11 -triple x86_64-apple-macosx10.15.0 -fgpu-rdc \
|
||||
// RUN: --offload-new-driver -emit-llvm -o - -x hip %s | FileCheck \
|
||||
// RUN: --check-prefix=HIP-MACHO %s
|
||||
|
||||
#include "Inputs/cuda.h"
|
||||
|
||||
@@ -75,6 +78,21 @@
|
||||
// HIP-COFF: @.offloading.entry_name.5 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading"
|
||||
// HIP-COFF: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 4, i32 35, ptr @tex, ptr @.offloading.entry_name.5, i64 1, i64 1, ptr null }, section "llvm_offload_entries$OE"
|
||||
//.
|
||||
// HIP-MACHO: @managed.managed = global i32 0, align 4
|
||||
// HIP-MACHO: @managed = externally_initialized global ptr null
|
||||
// HIP-MACHO: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00", section ".llvm.rodata.offloading"
|
||||
// HIP-MACHO: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 4, i32 0, ptr @_Z3foov, ptr @.offloading.entry_name, i64 0, i64 0, ptr null }, section "__LLVM,offload_entries"
|
||||
// HIP-MACHO: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00", section ".llvm.rodata.offloading"
|
||||
// HIP-MACHO: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 4, i32 0, ptr @_Z6kernelv, ptr @.offloading.entry_name.1, i64 0, i64 0, ptr null }, section "__LLVM,offload_entries"
|
||||
// HIP-MACHO: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00", section ".llvm.rodata.offloading"
|
||||
// HIP-MACHO: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 4, i32 0, ptr @var, ptr @.offloading.entry_name.2, i64 4, i64 0, ptr null }, section "__LLVM,offload_entries"
|
||||
// HIP-MACHO: @.offloading.entry_name.3 = internal unnamed_addr constant [8 x i8] c"managed\00", section ".llvm.rodata.offloading"
|
||||
// HIP-MACHO: @.offloading.entry.managed = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 4, i32 1, ptr @managed.managed, ptr @.offloading.entry_name.3, i64 4, i64 4, ptr @managed }, section "__LLVM,offload_entries"
|
||||
// HIP-MACHO: @.offloading.entry_name.4 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading"
|
||||
// HIP-MACHO: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 4, i32 2, ptr @surf, ptr @.offloading.entry_name.4, i64 4, i64 1, ptr null }, section "__LLVM,offload_entries"
|
||||
// HIP-MACHO: @.offloading.entry_name.5 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading"
|
||||
// HIP-MACHO: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 4, i32 35, ptr @tex, ptr @.offloading.entry_name.5, i64 1, i64 1, ptr null }, section "__LLVM,offload_entries"
|
||||
//.
|
||||
// CUDA-LABEL: @_Z18__device_stub__foov(
|
||||
// CUDA-NEXT: entry:
|
||||
// CUDA-NEXT: [[TMP0:%.*]] = call i32 @cudaLaunch(ptr @_Z18__device_stub__foov)
|
||||
@@ -103,6 +121,13 @@
|
||||
// HIP-COFF: setup.end:
|
||||
// HIP-COFF-NEXT: ret void
|
||||
//
|
||||
// HIP-MACHO-LABEL: @_Z18__device_stub__foov(
|
||||
// HIP-MACHO-NEXT: entry:
|
||||
// HIP-MACHO-NEXT: [[TMP0:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z3foov)
|
||||
// HIP-MACHO-NEXT: br label [[SETUP_END:%.*]]
|
||||
// HIP-MACHO: setup.end:
|
||||
// HIP-MACHO-NEXT: ret void
|
||||
//
|
||||
__global__ void foo() {}
|
||||
__device__ int var = 1;
|
||||
const __device__ int constant = 1;
|
||||
@@ -137,6 +162,13 @@ __device__ __managed__ int managed = 0;
|
||||
// HIP-COFF: setup.end:
|
||||
// HIP-COFF-NEXT: ret void
|
||||
//
|
||||
// HIP-MACHO-LABEL: @_Z21__device_stub__kernelv(
|
||||
// HIP-MACHO-NEXT: entry:
|
||||
// HIP-MACHO-NEXT: [[TMP0:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z6kernelv)
|
||||
// HIP-MACHO-NEXT: br label [[SETUP_END:%.*]]
|
||||
// HIP-MACHO: setup.end:
|
||||
// HIP-MACHO-NEXT: ret void
|
||||
//
|
||||
__global__ void kernel() { external = 1; }
|
||||
|
||||
surface<void> surf;
|
||||
|
||||
@@ -162,6 +162,8 @@
|
||||
// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=HIP,HIP-ELF
|
||||
// RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-windows-gnu \
|
||||
// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=HIP,HIP-COFF
|
||||
// RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-apple-macosx10.15.0 \
|
||||
// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=HIP,HIP-MACHO
|
||||
|
||||
// HIP-ELF: @__start_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry]
|
||||
// HIP-ELF-NEXT: @__stop_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry]
|
||||
@@ -170,13 +172,25 @@
|
||||
// HIP-COFF: @__start_llvm_offload_entries = weak_odr hidden constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "llvm_offload_entries$OA"
|
||||
// HIP-COFF-NEXT: @__stop_llvm_offload_entries = weak_odr hidden constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "llvm_offload_entries$OZ"
|
||||
|
||||
// HIP: @.fatbin_image = internal constant [0 x i8] zeroinitializer, section ".hip_fatbin"
|
||||
// HIP-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1212764230, i32 1, ptr @.fatbin_image, ptr null }, section ".hipFatBinSegment", align 8
|
||||
// HIP-NEXT: @.hip.binary_handle = internal global ptr null
|
||||
// HIP-MACHO: @"\01section$start$__LLVM$offload_entries" = external hidden constant [0 x %struct.__tgt_offload_entry]
|
||||
// HIP-MACHO-NEXT: @"\01section$end$__LLVM$offload_entries" = external hidden constant [0 x %struct.__tgt_offload_entry]
|
||||
// HIP-MACHO-NEXT: @"__dummy.__LLVM,offload_entries" = internal constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "__LLVM,offload_entries"
|
||||
|
||||
// HIP-ELF: @.fatbin_image = internal constant [0 x i8] zeroinitializer, section ".hip_fatbin"
|
||||
// HIP-ELF-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1212764230, i32 1, ptr @.fatbin_image, ptr null }, section ".hipFatBinSegment", align 8
|
||||
// HIP-ELF-NEXT: @.hip.binary_handle = internal global ptr null
|
||||
|
||||
// HIP-COFF: @.fatbin_image = internal constant [0 x i8] zeroinitializer, section ".hip_fatbin"
|
||||
// HIP-COFF-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1212764230, i32 1, ptr @.fatbin_image, ptr null }, section ".hipFatBinSegment", align 8
|
||||
// HIP-COFF-NEXT: @.hip.binary_handle = internal global ptr null
|
||||
|
||||
// HIP-MACHO: @.fatbin_image = internal constant [0 x i8] zeroinitializer, section "__HIP,__hip_fatbin"
|
||||
// HIP-MACHO-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1212764230, i32 1, ptr @.fatbin_image, ptr null }, section "__HIP,__fatbin", align 8
|
||||
// HIP-MACHO-NEXT: @.hip.binary_handle = internal global ptr null
|
||||
|
||||
// HIP: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 101, ptr @.hip.fatbin_reg, ptr null }]
|
||||
|
||||
// HIP: define internal void @.hip.fatbin_reg() section ".text.startup" {
|
||||
// HIP: define internal void @.hip.fatbin_reg() section "{{\.text\.startup|__TEXT,__StaticInit}}" {
|
||||
// HIP-NEXT: entry:
|
||||
// HIP-NEXT: %0 = call ptr @__hipRegisterFatBinary(ptr @.fatbin_wrapper)
|
||||
// HIP-NEXT: store ptr %0, ptr @.hip.binary_handle, align 8
|
||||
@@ -185,20 +199,20 @@
|
||||
// HIP-NEXT: ret void
|
||||
// HIP-NEXT: }
|
||||
//
|
||||
// HIP: define internal void @.hip.fatbin_unreg() section ".text.startup" {
|
||||
// HIP: define internal void @.hip.fatbin_unreg() section "{{\.text\.startup|__TEXT,__StaticInit}}" {
|
||||
// HIP-NEXT: entry:
|
||||
// HIP-NEXT: %0 = load ptr, ptr @.hip.binary_handle, align 8
|
||||
// HIP-NEXT: call void @__hipUnregisterFatBinary(ptr %0)
|
||||
// HIP-NEXT: ret void
|
||||
// HIP-NEXT: }
|
||||
//
|
||||
// HIP: define internal void @.hip.globals_reg(ptr %0) section ".text.startup" {
|
||||
// HIP: define internal void @.hip.globals_reg(ptr %0) section "{{\.text\.startup|__TEXT,__StaticInit}}" {
|
||||
// HIP-NEXT: entry:
|
||||
// HIP-NEXT: %1 = icmp ne ptr @__start_llvm_offload_entries, @__stop_llvm_offload_entries
|
||||
// HIP-NEXT: %1 = icmp ne ptr @{{.*offload_entries.*}}, @{{.*offload_entries.*}}
|
||||
// HIP-NEXT: br i1 %1, label %while.entry, label %while.end
|
||||
//
|
||||
// HIP: while.entry:
|
||||
// HIP-NEXT: %entry1 = phi ptr [ @__start_llvm_offload_entries, %entry ], [ %16, %if.end ]
|
||||
// HIP-NEXT: %entry1 = phi ptr [ @{{.*offload_entries.*}}, %entry ], [ %16, %if.end ]
|
||||
// HIP-NEXT: %2 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i32 0, i32 4
|
||||
// HIP-NEXT: %addr = load ptr, ptr %2, align 8
|
||||
// HIP-NEXT: %3 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i32 0, i32 8
|
||||
@@ -258,7 +272,7 @@
|
||||
//
|
||||
// HIP: if.end:
|
||||
// HIP-NEXT: %16 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 1
|
||||
// HIP-NEXT: %17 = icmp eq ptr %16, @__stop_llvm_offload_entries
|
||||
// HIP-NEXT: %17 = icmp eq ptr %16, @{{.*offload_entries.*}}
|
||||
// HIP-NEXT: br i1 %17, label %while.end, label %while.entry
|
||||
//
|
||||
// HIP: while.end:
|
||||
|
||||
@@ -84,12 +84,16 @@ LLVM_ABI StructType *getEntryTy(Module &M);
|
||||
/// \param Data Extra data storage associated with the entry.
|
||||
/// \param SectionName The section this entry will be placed at.
|
||||
/// \param AuxAddr An extra pointer if needed.
|
||||
/// Returns the section name for offloading entries based on the target triple.
|
||||
/// ELF: "llvm_offload_entries", COFF: "llvm_offload_entries",
|
||||
/// Mach-O: "__LLVM,offload_entries".
|
||||
LLVM_ABI StringRef getOffloadEntrySection(Module &M);
|
||||
|
||||
/// \return The emitted global variable containing the offloading entry.
|
||||
LLVM_ABI GlobalVariable *
|
||||
emitOffloadingEntry(Module &M, object::OffloadKind Kind, Constant *Addr,
|
||||
StringRef Name, uint64_t Size, uint32_t Flags,
|
||||
uint64_t Data, Constant *AuxAddr = nullptr,
|
||||
StringRef SectionName = "llvm_offload_entries");
|
||||
uint64_t Data, Constant *AuxAddr = nullptr);
|
||||
|
||||
/// Create a constant struct initializer used to register this global at
|
||||
/// runtime.
|
||||
@@ -102,7 +106,7 @@ getOffloadingEntryInitializer(Module &M, object::OffloadKind Kind,
|
||||
/// Creates a pair of globals used to iterate the array of offloading entries by
|
||||
/// accessing the section variables provided by the linker.
|
||||
LLVM_ABI std::pair<GlobalVariable *, GlobalVariable *>
|
||||
getOffloadEntryArray(Module &M, StringRef SectionName = "llvm_offload_entries");
|
||||
getOffloadEntryArray(Module &M);
|
||||
|
||||
namespace amdgpu {
|
||||
/// Check if an image is compatible with current system's environment. The
|
||||
|
||||
@@ -44,6 +44,12 @@ IntegerType *getSizeTTy(Module &M) {
|
||||
return M.getDataLayout().getIntPtrType(M.getContext());
|
||||
}
|
||||
|
||||
/// Returns the appropriate startup section for registration functions.
|
||||
/// Mach-O uses "__TEXT,__StaticInit"; ELF/COFF use ".text.startup".
|
||||
StringRef getStartupSection(const Triple &T) {
|
||||
return T.isOSBinFormatMachO() ? "__TEXT,__StaticInit" : ".text.startup";
|
||||
}
|
||||
|
||||
// struct __tgt_device_image {
|
||||
// void *ImageStart;
|
||||
// void *ImageEnd;
|
||||
@@ -207,7 +213,7 @@ Function *createUnregisterFunction(Module &M, GlobalVariable *BinDesc,
|
||||
auto *Func =
|
||||
Function::Create(FuncTy, GlobalValue::InternalLinkage,
|
||||
".omp_offloading.descriptor_unreg" + Suffix, &M);
|
||||
Func->setSection(".text.startup");
|
||||
Func->setSection(getStartupSection(M.getTargetTriple()));
|
||||
|
||||
// Get __tgt_unregister_lib function declaration.
|
||||
auto *UnRegFuncTy = FunctionType::get(Type::getVoidTy(C), getBinDescPtrTy(M),
|
||||
@@ -229,7 +235,7 @@ void createRegisterFunction(Module &M, GlobalVariable *BinDesc,
|
||||
auto *FuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
|
||||
auto *Func = Function::Create(FuncTy, GlobalValue::InternalLinkage,
|
||||
".omp_offloading.descriptor_reg" + Suffix, &M);
|
||||
Func->setSection(".text.startup");
|
||||
Func->setSection(getStartupSection(M.getTargetTriple()));
|
||||
|
||||
// Get __tgt_register_lib function declaration.
|
||||
auto *RegFuncTy = FunctionType::get(Type::getVoidTy(C), getBinDescPtrTy(M),
|
||||
@@ -285,7 +291,7 @@ GlobalVariable *createFatbinDesc(Module &M, ArrayRef<char> Image, bool IsHIP,
|
||||
|
||||
// Create the global string containing the fatbinary.
|
||||
StringRef FatbinConstantSection =
|
||||
IsHIP ? ".hip_fatbin"
|
||||
IsHIP ? (Triple.isMacOSX() ? "__HIP,__hip_fatbin" : ".hip_fatbin")
|
||||
: (Triple.isMacOSX() ? "__NV_CUDA,__nv_fatbin" : ".nv_fatbin");
|
||||
auto *Data = ConstantDataArray::get(C, Image);
|
||||
auto *Fatbin = new GlobalVariable(M, Data->getType(), /*isConstant*/ true,
|
||||
@@ -294,9 +300,9 @@ GlobalVariable *createFatbinDesc(Module &M, ArrayRef<char> Image, bool IsHIP,
|
||||
Fatbin->setSection(FatbinConstantSection);
|
||||
|
||||
// Create the fatbinary wrapper
|
||||
StringRef FatbinWrapperSection = IsHIP ? ".hipFatBinSegment"
|
||||
: Triple.isMacOSX() ? "__NV_CUDA,__fatbin"
|
||||
: ".nvFatBinSegment";
|
||||
StringRef FatbinWrapperSection =
|
||||
IsHIP ? (Triple.isMacOSX() ? "__HIP,__fatbin" : ".hipFatBinSegment")
|
||||
: (Triple.isMacOSX() ? "__NV_CUDA,__fatbin" : ".nvFatBinSegment");
|
||||
Constant *FatbinWrapper[] = {
|
||||
ConstantInt::get(Type::getInt32Ty(C), IsHIP ? HIPFatMagic : CudaFatMagic),
|
||||
ConstantInt::get(Type::getInt32Ty(C), 1),
|
||||
@@ -403,7 +409,7 @@ Function *createRegisterGlobalsFunction(Module &M, bool IsHIP,
|
||||
auto *RegGlobalsFn =
|
||||
Function::Create(RegGlobalsTy, GlobalValue::InternalLinkage,
|
||||
IsHIP ? ".hip.globals_reg" : ".cuda.globals_reg", &M);
|
||||
RegGlobalsFn->setSection(".text.startup");
|
||||
RegGlobalsFn->setSection(getStartupSection(M.getTargetTriple()));
|
||||
|
||||
// Create the loop to register all the entries.
|
||||
IRBuilder<> Builder(BasicBlock::Create(C, "entry", RegGlobalsFn));
|
||||
@@ -559,13 +565,13 @@ void createRegisterFatbinFunction(Module &M, GlobalVariable *FatbinDesc,
|
||||
auto *CtorFunc = Function::Create(
|
||||
CtorFuncTy, GlobalValue::InternalLinkage,
|
||||
(IsHIP ? ".hip.fatbin_reg" : ".cuda.fatbin_reg") + Suffix, &M);
|
||||
CtorFunc->setSection(".text.startup");
|
||||
CtorFunc->setSection(getStartupSection(M.getTargetTriple()));
|
||||
|
||||
auto *DtorFuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
|
||||
auto *DtorFunc = Function::Create(
|
||||
DtorFuncTy, GlobalValue::InternalLinkage,
|
||||
(IsHIP ? ".hip.fatbin_unreg" : ".cuda.fatbin_unreg") + Suffix, &M);
|
||||
DtorFunc->setSection(".text.startup");
|
||||
DtorFunc->setSection(getStartupSection(M.getTargetTriple()));
|
||||
|
||||
auto *PtrTy = PointerType::getUnqual(C);
|
||||
|
||||
@@ -655,7 +661,7 @@ public:
|
||||
FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
|
||||
Function *Func = Function::Create(FuncTy, GlobalValue::InternalLinkage,
|
||||
Twine("sycl") + ".descriptor_reg", &M);
|
||||
Func->setSection(".text.startup");
|
||||
Func->setSection(getStartupSection(M.getTargetTriple()));
|
||||
|
||||
PointerType *PtrTy = PointerType::getUnqual(C);
|
||||
IntegerType *Int64Ty = Type::getInt64Ty(C);
|
||||
@@ -677,7 +683,7 @@ public:
|
||||
FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
|
||||
Function *Func = Function::Create(FuncTy, GlobalValue::InternalLinkage,
|
||||
"sycl.descriptor_unreg", &M);
|
||||
Func->setSection(".text.startup");
|
||||
Func->setSection(getStartupSection(M.getTargetTriple()));
|
||||
|
||||
PointerType *PtrTy = PointerType::getUnqual(C);
|
||||
IntegerType *Int64Ty = Type::getInt64Ty(C);
|
||||
|
||||
@@ -84,12 +84,29 @@ offloading::getOffloadingEntryInitializer(Module &M, object::OffloadKind Kind,
|
||||
return {EntryInitializer, Str};
|
||||
}
|
||||
|
||||
GlobalVariable *
|
||||
offloading::emitOffloadingEntry(Module &M, object::OffloadKind Kind,
|
||||
Constant *Addr, StringRef Name, uint64_t Size,
|
||||
uint32_t Flags, uint64_t Data,
|
||||
Constant *AuxAddr, StringRef SectionName) {
|
||||
StringRef offloading::getOffloadEntrySection(Module &M) {
|
||||
return M.getTargetTriple().isOSBinFormatMachO() ? "__LLVM,offload_entries"
|
||||
: "llvm_offload_entries";
|
||||
}
|
||||
|
||||
/// Returns the start/end symbol names for iterating offloading entries in a
|
||||
/// given section. Mach-O uses \1section$start$/\1section$end$ convention;
|
||||
/// ELF/COFF use __start_/__stop_ prefixes.
|
||||
static std::pair<std::string, std::string>
|
||||
getOffloadEntryBoundarySymbols(const Triple &T, StringRef SectionName) {
|
||||
if (T.isOSBinFormatMachO()) {
|
||||
std::string SymSection = SectionName.str();
|
||||
std::replace(SymSection.begin(), SymSection.end(), ',', '$');
|
||||
return {"\1section$start$" + SymSection, "\1section$end$" + SymSection};
|
||||
}
|
||||
return {("__start_" + SectionName).str(), ("__stop_" + SectionName).str()};
|
||||
}
|
||||
|
||||
GlobalVariable *offloading::emitOffloadingEntry(
|
||||
Module &M, object::OffloadKind Kind, Constant *Addr, StringRef Name,
|
||||
uint64_t Size, uint32_t Flags, uint64_t Data, Constant *AuxAddr) {
|
||||
const llvm::Triple &Triple = M.getTargetTriple();
|
||||
StringRef SectionName = getOffloadEntrySection(M);
|
||||
|
||||
auto [EntryInitializer, NameGV] = getOffloadingEntryInitializer(
|
||||
M, Kind, Addr, Name, Size, Flags, Data, AuxAddr);
|
||||
@@ -112,8 +129,9 @@ offloading::emitOffloadingEntry(Module &M, object::OffloadKind Kind,
|
||||
}
|
||||
|
||||
std::pair<GlobalVariable *, GlobalVariable *>
|
||||
offloading::getOffloadEntryArray(Module &M, StringRef SectionName) {
|
||||
offloading::getOffloadEntryArray(Module &M) {
|
||||
const llvm::Triple &Triple = M.getTargetTriple();
|
||||
StringRef SectionName = getOffloadEntrySection(M);
|
||||
|
||||
auto *ZeroInitilaizer =
|
||||
ConstantAggregateZero::get(ArrayType::get(getEntryTy(M), 0u));
|
||||
@@ -122,13 +140,14 @@ offloading::getOffloadEntryArray(Module &M, StringRef SectionName) {
|
||||
auto Linkage = Triple.isOSBinFormatCOFF() ? GlobalValue::WeakODRLinkage
|
||||
: GlobalValue::ExternalLinkage;
|
||||
|
||||
auto *EntriesB =
|
||||
new GlobalVariable(M, EntryType, /*isConstant=*/true, Linkage, EntryInit,
|
||||
"__start_" + SectionName);
|
||||
auto [StartName, StopName] =
|
||||
getOffloadEntryBoundarySymbols(Triple, SectionName);
|
||||
|
||||
auto *EntriesB = new GlobalVariable(M, EntryType, /*isConstant=*/true,
|
||||
Linkage, EntryInit, StartName);
|
||||
EntriesB->setVisibility(GlobalValue::HiddenVisibility);
|
||||
auto *EntriesE =
|
||||
new GlobalVariable(M, EntryType, /*isConstant=*/true, Linkage, EntryInit,
|
||||
"__stop_" + SectionName);
|
||||
auto *EntriesE = new GlobalVariable(M, EntryType, /*isConstant=*/true,
|
||||
Linkage, EntryInit, StopName);
|
||||
EntriesE->setVisibility(GlobalValue::HiddenVisibility);
|
||||
|
||||
if (Triple.isOSBinFormatELF()) {
|
||||
@@ -142,6 +161,15 @@ offloading::getOffloadEntryArray(Module &M, StringRef SectionName) {
|
||||
DummyEntry->setSection(SectionName);
|
||||
DummyEntry->setAlignment(Align(object::OffloadBinary::getAlignment()));
|
||||
appendToCompilerUsed(M, DummyEntry);
|
||||
} else if (Triple.isOSBinFormatMachO()) {
|
||||
// Mach-O needs a dummy variable in the section (like ELF) to ensure the
|
||||
// linker provides the section boundary symbols.
|
||||
auto *DummyEntry = new GlobalVariable(
|
||||
M, ZeroInitilaizer->getType(), true, GlobalVariable::InternalLinkage,
|
||||
ZeroInitilaizer, "__dummy." + SectionName);
|
||||
DummyEntry->setSection(SectionName);
|
||||
DummyEntry->setAlignment(Align(object::OffloadBinary::getAlignment()));
|
||||
appendToCompilerUsed(M, DummyEntry);
|
||||
} else {
|
||||
// The COFF linker will merge sections containing a '$' together into a
|
||||
// single section. The order of entries in this section will be sorted
|
||||
|
||||
@@ -51,6 +51,34 @@
|
||||
; HIP-NEXT: ret void
|
||||
; HIP-NEXT: }
|
||||
|
||||
; RUN: llvm-offload-wrapper --triple=x86_64-apple-macosx10.15.0 -kind=hip %s -o %t.bc
|
||||
; RUN: llvm-dis %t.bc -o - | FileCheck %s --check-prefix=HIP-MACHO
|
||||
|
||||
; HIP-MACHO: @"\01section$start$__LLVM$offload_entries" = external hidden constant [0 x %struct.__tgt_offload_entry]
|
||||
; HIP-MACHO-NEXT: @"\01section$end$__LLVM$offload_entries" = external hidden constant [0 x %struct.__tgt_offload_entry]
|
||||
; HIP-MACHO-NEXT: @"__dummy.__LLVM,offload_entries" = internal constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "__LLVM,offload_entries", align 8
|
||||
; HIP-MACHO-NEXT: @llvm.compiler.used = appending global [1 x ptr] [ptr @"__dummy.__LLVM,offload_entries"], section "llvm.metadata"
|
||||
; HIP-MACHO-NEXT: @.fatbin_image = internal constant {{.*}}, section "__HIP,__hip_fatbin"
|
||||
; HIP-MACHO-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1212764230, i32 1, ptr @.fatbin_image, ptr null }, section "__HIP,__fatbin", align 8
|
||||
; HIP-MACHO-NEXT: @.hip.binary_handle = internal global ptr null
|
||||
; HIP-MACHO-NEXT: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 101, ptr @.hip.fatbin_reg, ptr null }]
|
||||
|
||||
; HIP-MACHO: define internal void @.hip.fatbin_reg() section "__TEXT,__StaticInit" {
|
||||
; HIP-MACHO-NEXT: entry:
|
||||
; HIP-MACHO-NEXT: %0 = call ptr @__hipRegisterFatBinary(ptr @.fatbin_wrapper)
|
||||
; HIP-MACHO-NEXT: store ptr %0, ptr @.hip.binary_handle, align 8
|
||||
; HIP-MACHO-NEXT: call void @.hip.globals_reg(ptr %0)
|
||||
; HIP-MACHO-NEXT: %1 = call i32 @atexit(ptr @.hip.fatbin_unreg)
|
||||
; HIP-MACHO-NEXT: ret void
|
||||
; HIP-MACHO-NEXT: }
|
||||
|
||||
; HIP-MACHO: define internal void @.hip.fatbin_unreg() section "__TEXT,__StaticInit" {
|
||||
; HIP-MACHO-NEXT: entry:
|
||||
; HIP-MACHO-NEXT: %0 = load ptr, ptr @.hip.binary_handle, align 8
|
||||
; HIP-MACHO-NEXT: call void @__hipUnregisterFatBinary(ptr %0)
|
||||
; HIP-MACHO-NEXT: ret void
|
||||
; HIP-MACHO-NEXT: }
|
||||
|
||||
; RUN: llvm-offload-wrapper --triple=x86_64-unknown-linux-gnu -kind=cuda %s -o %t.bc
|
||||
; RUN: llvm-dis %t.bc -o - | FileCheck %s --check-prefix=CUDA
|
||||
|
||||
|
||||
Reference in New Issue
Block a user