diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp index e04da90b3cbf..f08040d1d3d1 100644 --- a/clang/lib/CodeGen/CGCUDANV.cpp +++ b/clang/lib/CodeGen/CGCUDANV.cpp @@ -817,10 +817,14 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { llvm::Constant *FatBinStr; unsigned FatMagic; if (IsHIP) { - FatbinConstantName = ".hip_fatbin"; - FatbinSectionName = ".hipFatBinSegment"; + // On macOS (Mach-O), section names must be in "segment,section" format. + FatbinConstantName = + CGM.getTriple().isMacOSX() ? "__HIP,__hip_fatbin" : ".hip_fatbin"; + FatbinSectionName = + CGM.getTriple().isMacOSX() ? "__HIP,__fatbin" : ".hipFatBinSegment"; - ModuleIDSectionName = "__hip_module_id"; + ModuleIDSectionName = + CGM.getTriple().isMacOSX() ? "__HIP,__module_id" : "__hip_module_id"; ModuleIDPrefix = "__hip_"; if (CudaGpuBinary) { diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp index 8b000845fea1..61b779c60b90 100644 --- a/clang/lib/Driver/ToolChains/Darwin.cpp +++ b/clang/lib/Driver/ToolChains/Darwin.cpp @@ -1140,6 +1140,50 @@ VersionTuple MachO::getLinkerVersion(const llvm::opt::ArgList &Args) const { Darwin::~Darwin() {} +void Darwin::ensureTargetInitialized() const { + if (TargetInitialized) + return; + + llvm::Triple::OSType OS = getTriple().getOS(); + + DarwinPlatformKind Platform; + switch (OS) { + case llvm::Triple::Darwin: + case llvm::Triple::MacOSX: + Platform = MacOS; + break; + case llvm::Triple::IOS: + Platform = IPhoneOS; + break; + case llvm::Triple::TvOS: + Platform = TvOS; + break; + case llvm::Triple::WatchOS: + Platform = WatchOS; + break; + case llvm::Triple::XROS: + Platform = XROS; + break; + case llvm::Triple::DriverKit: + Platform = DriverKit; + break; + default: + // Unknown platform; leave uninitialized. + return; + } + + DarwinEnvironmentKind Environment = NativeEnvironment; + if (getTriple().isSimulatorEnvironment()) + Environment = Simulator; + else if (getTriple().isMacCatalystEnvironment()) + Environment = MacCatalyst; + + VersionTuple OsVer = getTriple().getOSVersion(); + setTarget(Platform, Environment, OsVer.getMajor(), + OsVer.getMinor().value_or(0), OsVer.getSubminor().value_or(0), + VersionTuple()); +} + AppleMachO::~AppleMachO() {} MachO::~MachO() {} @@ -1182,7 +1226,11 @@ std::string Darwin::ComputeEffectiveClangTriple(const ArgList &Args, llvm::Triple Triple(ComputeLLVMTriple(Args, InputType)); // If the target isn't initialized (e.g., an unknown Darwin platform, return - // the default triple). + // the default triple). Note: we intentionally do NOT call + // ensureTargetInitialized() here because this method is called before + // AddDeploymentTarget() in some code paths (e.g. -print-libgcc-file-name), + // and lazy init with version 0.0.0 would conflict with the real version + // that AddDeploymentTarget() later sets via setTarget(). if (!isTargetInitialized()) return Triple.getTriple(); @@ -1248,6 +1296,11 @@ void DarwinClang::addClangWarningOptions(ArgStringList &CC1Args) const { CC1Args.push_back("-Werror=undef-prefix"); // For modern targets, promote certain warnings to errors. + // Lazily initialize the target if needed (e.g. when Darwin is used as + // a host toolchain for device offloading). + ensureTargetInitialized(); + if (!isTargetInitialized()) + return; if (isTargetWatchOSBased() || getTriple().isArch64Bit()) { // Always enable -Wdeprecated-objc-isa-usage and promote it // to an error. @@ -3399,6 +3452,12 @@ void Darwin::addClangTargetOptions( MachO::addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadKind); + // When compiling device code (e.g. SPIR-V for HIP), skip host-specific + // flags like -faligned-alloc-unavailable and -fno-sized-deallocation + // that depend on the host OS version and are irrelevant to device code. + if (DeviceOffloadKind != Action::OFK_None) + return; + // Pass "-faligned-alloc-unavailable" only when the user hasn't manually // enabled or disabled aligned allocations. if (!DriverArgs.hasArgNoClaim(options::OPT_faligned_allocation, @@ -3939,6 +3998,9 @@ void Darwin::addStartObjectFileArgs(const ArgList &Args, } void Darwin::CheckObjCARC() const { + ensureTargetInitialized(); + if (!isTargetInitialized()) + return; if (isTargetIOSBased() || isTargetWatchOSBased() || isTargetXROS() || (isTargetMacOSBased() && !isMacosxVersionLT(10, 6))) return; @@ -3958,6 +4020,9 @@ SanitizerMask Darwin::getSupportedSanitizers() const { Res |= SanitizerKind::FuzzerNoLink; Res |= SanitizerKind::ObjCCast; + ensureTargetInitialized(); + if (!isTargetInitialized()) + return Res; // Prior to 10.9, macOS shipped a version of the C++ standard library without // C++11 support. The same is true of iOS prior to version 5. These OS'es are // incompatible with -fsanitize=vptr. diff --git a/clang/lib/Driver/ToolChains/Darwin.h b/clang/lib/Driver/ToolChains/Darwin.h index 75f1dff46bfa..89177b0455ac 100644 --- a/clang/lib/Driver/ToolChains/Darwin.h +++ b/clang/lib/Driver/ToolChains/Darwin.h @@ -391,6 +391,12 @@ private: void VerifyTripleForSDK(const llvm::opt::ArgList &Args, const llvm::Triple Triple) const; +protected: + /// Lazily initialize the target platform from the triple when + /// AddDeploymentTarget has not run yet (e.g. when Darwin is used as + /// a host toolchain for device offloading). + void ensureTargetInitialized() const; + public: Darwin(const Driver &D, const llvm::Triple &Triple, const llvm::opt::ArgList &Args); diff --git a/clang/lib/Driver/ToolChains/HIPUtility.cpp b/clang/lib/Driver/ToolChains/HIPUtility.cpp index 1fcb36cc3a39..3bf0f23409f9 100644 --- a/clang/lib/Driver/ToolChains/HIPUtility.cpp +++ b/clang/lib/Driver/ToolChains/HIPUtility.cpp @@ -409,9 +409,11 @@ void HIP::constructGenerateObjFileFromHIPFatBinary( ObjStream << "# *** Automatically generated by Clang ***\n"; if (FoundPrimaryGpuBinHandleSymbol) { // Define the first gpubin handle symbol - if (HostTriple.isWindowsMSVCEnvironment()) + if (HostTriple.isWindowsMSVCEnvironment()) { ObjStream << " .section .hip_gpubin_handle,\"dw\"\n"; - else { + } else if (HostTriple.isMacOSX()) { + ObjStream << " .section __HIP,__gpubin_handle\n"; + } else { ObjStream << " .protected " << PrimaryGpuBinHandleSymbol << "\n"; ObjStream << " .type " << PrimaryGpuBinHandleSymbol << ",@object\n"; ObjStream << " .section .hip_gpubin_handle,\"aw\"\n"; @@ -430,9 +432,12 @@ void HIP::constructGenerateObjFileFromHIPFatBinary( } if (FoundPrimaryHipFatbinSymbol) { // Define the first fatbin symbol - if (HostTriple.isWindowsMSVCEnvironment()) + if (HostTriple.isWindowsMSVCEnvironment()) { ObjStream << " .section .hip_fatbin,\"dw\"\n"; - else { + } else if (HostTriple.isMacOSX()) { + // Mach-O requires "segment,section" format + ObjStream << " .section __HIP,__hip_fatbin\n"; + } else { ObjStream << " .protected " << PrimaryHipFatbinSymbol << "\n"; ObjStream << " .type " << PrimaryHipFatbinSymbol << ",@object\n"; ObjStream << " .section .hip_fatbin,\"a\",@progbits\n"; diff --git a/clang/test/CodeGenCUDA/device-stub-macho.cu b/clang/test/CodeGenCUDA/device-stub-macho.cu new file mode 100644 index 000000000000..d53cefd58bfc --- /dev/null +++ b/clang/test/CodeGenCUDA/device-stub-macho.cu @@ -0,0 +1,28 @@ +// Verify that HIP fat binary sections use Mach-O "segment,section" format on Darwin. + +// RUN: echo -n "GPU binary would be here." > %t +// RUN: %clang_cc1 -triple x86_64-apple-macosx10.15.0 -emit-llvm %s \ +// RUN: -fcuda-include-gpubinary %t -o - -x hip \ +// RUN: | FileCheck %s --check-prefix=HIPEF +// RUN: %clang_cc1 -cuid=123 -triple x86_64-apple-macosx10.15.0 -emit-llvm %s \ +// RUN: -o - -x hip \ +// RUN: | FileCheck %s --check-prefix=HIPNEF + +#include "Inputs/cuda.h" + +__device__ int device_var; +__constant__ int constant_var; + +// When fat binary is embedded, section names use Mach-O format. +// HIPEF: @[[FATBIN:.*]] = private constant{{.*}} c"GPU binary would be here.",{{.*}}section "__HIP,__hip_fatbin"{{.*}}align 4096 +// HIPEF: @__hip_fatbin_wrapper = internal constant { i32, i32, ptr, ptr } +// HIPEF-SAME: section "__HIP,__fatbin" + +// When fat binary is external (no -fcuda-include-gpubinary), external symbol uses Mach-O section. +// HIPNEF: @[[FATBIN:__hip_fatbin_[0-9a-f]+]] = external constant i8, section "__HIP,__hip_fatbin" +// HIPNEF: @__hip_fatbin_wrapper = internal constant { i32, i32, ptr, ptr } +// HIPNEF-SAME: section "__HIP,__fatbin" + +__global__ void kernelfunc(int i, int j, int k) {} + +void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); } diff --git a/clang/test/CodeGenCUDA/offloading-entries.cu b/clang/test/CodeGenCUDA/offloading-entries.cu index fe03cc83b9d2..86ef3bcfa556 100644 --- a/clang/test/CodeGenCUDA/offloading-entries.cu +++ b/clang/test/CodeGenCUDA/offloading-entries.cu @@ -11,6 +11,9 @@ // RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-windows-gnu -fgpu-rdc \ // RUN: --offload-new-driver -emit-llvm -o - -x hip %s | FileCheck \ // RUN: --check-prefix=HIP-COFF %s +// RUN: %clang_cc1 -std=c++11 -triple x86_64-apple-macosx10.15.0 -fgpu-rdc \ +// RUN: --offload-new-driver -emit-llvm -o - -x hip %s | FileCheck \ +// RUN: --check-prefix=HIP-MACHO %s #include "Inputs/cuda.h" @@ -75,6 +78,21 @@ // HIP-COFF: @.offloading.entry_name.5 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading" // HIP-COFF: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 4, i32 35, ptr @tex, ptr @.offloading.entry_name.5, i64 1, i64 1, ptr null }, section "llvm_offload_entries$OE" //. +// HIP-MACHO: @managed.managed = global i32 0, align 4 +// HIP-MACHO: @managed = externally_initialized global ptr null +// HIP-MACHO: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00", section ".llvm.rodata.offloading" +// HIP-MACHO: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 4, i32 0, ptr @_Z3foov, ptr @.offloading.entry_name, i64 0, i64 0, ptr null }, section "__LLVM,offload_entries" +// HIP-MACHO: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00", section ".llvm.rodata.offloading" +// HIP-MACHO: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 4, i32 0, ptr @_Z6kernelv, ptr @.offloading.entry_name.1, i64 0, i64 0, ptr null }, section "__LLVM,offload_entries" +// HIP-MACHO: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00", section ".llvm.rodata.offloading" +// HIP-MACHO: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 4, i32 0, ptr @var, ptr @.offloading.entry_name.2, i64 4, i64 0, ptr null }, section "__LLVM,offload_entries" +// HIP-MACHO: @.offloading.entry_name.3 = internal unnamed_addr constant [8 x i8] c"managed\00", section ".llvm.rodata.offloading" +// HIP-MACHO: @.offloading.entry.managed = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 4, i32 1, ptr @managed.managed, ptr @.offloading.entry_name.3, i64 4, i64 4, ptr @managed }, section "__LLVM,offload_entries" +// HIP-MACHO: @.offloading.entry_name.4 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading" +// HIP-MACHO: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 4, i32 2, ptr @surf, ptr @.offloading.entry_name.4, i64 4, i64 1, ptr null }, section "__LLVM,offload_entries" +// HIP-MACHO: @.offloading.entry_name.5 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading" +// HIP-MACHO: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 4, i32 35, ptr @tex, ptr @.offloading.entry_name.5, i64 1, i64 1, ptr null }, section "__LLVM,offload_entries" +//. // CUDA-LABEL: @_Z18__device_stub__foov( // CUDA-NEXT: entry: // CUDA-NEXT: [[TMP0:%.*]] = call i32 @cudaLaunch(ptr @_Z18__device_stub__foov) @@ -103,6 +121,13 @@ // HIP-COFF: setup.end: // HIP-COFF-NEXT: ret void // +// HIP-MACHO-LABEL: @_Z18__device_stub__foov( +// HIP-MACHO-NEXT: entry: +// HIP-MACHO-NEXT: [[TMP0:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z3foov) +// HIP-MACHO-NEXT: br label [[SETUP_END:%.*]] +// HIP-MACHO: setup.end: +// HIP-MACHO-NEXT: ret void +// __global__ void foo() {} __device__ int var = 1; const __device__ int constant = 1; @@ -137,6 +162,13 @@ __device__ __managed__ int managed = 0; // HIP-COFF: setup.end: // HIP-COFF-NEXT: ret void // +// HIP-MACHO-LABEL: @_Z21__device_stub__kernelv( +// HIP-MACHO-NEXT: entry: +// HIP-MACHO-NEXT: [[TMP0:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z6kernelv) +// HIP-MACHO-NEXT: br label [[SETUP_END:%.*]] +// HIP-MACHO: setup.end: +// HIP-MACHO-NEXT: ret void +// __global__ void kernel() { external = 1; } surface surf; diff --git a/clang/test/Driver/linker-wrapper-image.c b/clang/test/Driver/linker-wrapper-image.c index c434e4a68326..dd346bce225e 100644 --- a/clang/test/Driver/linker-wrapper-image.c +++ b/clang/test/Driver/linker-wrapper-image.c @@ -162,6 +162,8 @@ // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=HIP,HIP-ELF // RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-windows-gnu \ // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=HIP,HIP-COFF +// RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-apple-macosx10.15.0 \ +// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=HIP,HIP-MACHO // HIP-ELF: @__start_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry] // HIP-ELF-NEXT: @__stop_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry] @@ -170,13 +172,25 @@ // HIP-COFF: @__start_llvm_offload_entries = weak_odr hidden constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "llvm_offload_entries$OA" // HIP-COFF-NEXT: @__stop_llvm_offload_entries = weak_odr hidden constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "llvm_offload_entries$OZ" -// HIP: @.fatbin_image = internal constant [0 x i8] zeroinitializer, section ".hip_fatbin" -// HIP-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1212764230, i32 1, ptr @.fatbin_image, ptr null }, section ".hipFatBinSegment", align 8 -// HIP-NEXT: @.hip.binary_handle = internal global ptr null +// HIP-MACHO: @"\01section$start$__LLVM$offload_entries" = external hidden constant [0 x %struct.__tgt_offload_entry] +// HIP-MACHO-NEXT: @"\01section$end$__LLVM$offload_entries" = external hidden constant [0 x %struct.__tgt_offload_entry] +// HIP-MACHO-NEXT: @"__dummy.__LLVM,offload_entries" = internal constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "__LLVM,offload_entries" + +// HIP-ELF: @.fatbin_image = internal constant [0 x i8] zeroinitializer, section ".hip_fatbin" +// HIP-ELF-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1212764230, i32 1, ptr @.fatbin_image, ptr null }, section ".hipFatBinSegment", align 8 +// HIP-ELF-NEXT: @.hip.binary_handle = internal global ptr null + +// HIP-COFF: @.fatbin_image = internal constant [0 x i8] zeroinitializer, section ".hip_fatbin" +// HIP-COFF-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1212764230, i32 1, ptr @.fatbin_image, ptr null }, section ".hipFatBinSegment", align 8 +// HIP-COFF-NEXT: @.hip.binary_handle = internal global ptr null + +// HIP-MACHO: @.fatbin_image = internal constant [0 x i8] zeroinitializer, section "__HIP,__hip_fatbin" +// HIP-MACHO-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1212764230, i32 1, ptr @.fatbin_image, ptr null }, section "__HIP,__fatbin", align 8 +// HIP-MACHO-NEXT: @.hip.binary_handle = internal global ptr null // HIP: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 101, ptr @.hip.fatbin_reg, ptr null }] -// HIP: define internal void @.hip.fatbin_reg() section ".text.startup" { +// HIP: define internal void @.hip.fatbin_reg() section "{{\.text\.startup|__TEXT,__StaticInit}}" { // HIP-NEXT: entry: // HIP-NEXT: %0 = call ptr @__hipRegisterFatBinary(ptr @.fatbin_wrapper) // HIP-NEXT: store ptr %0, ptr @.hip.binary_handle, align 8 @@ -185,20 +199,20 @@ // HIP-NEXT: ret void // HIP-NEXT: } // -// HIP: define internal void @.hip.fatbin_unreg() section ".text.startup" { +// HIP: define internal void @.hip.fatbin_unreg() section "{{\.text\.startup|__TEXT,__StaticInit}}" { // HIP-NEXT: entry: // HIP-NEXT: %0 = load ptr, ptr @.hip.binary_handle, align 8 // HIP-NEXT: call void @__hipUnregisterFatBinary(ptr %0) // HIP-NEXT: ret void // HIP-NEXT: } // -// HIP: define internal void @.hip.globals_reg(ptr %0) section ".text.startup" { +// HIP: define internal void @.hip.globals_reg(ptr %0) section "{{\.text\.startup|__TEXT,__StaticInit}}" { // HIP-NEXT: entry: -// HIP-NEXT: %1 = icmp ne ptr @__start_llvm_offload_entries, @__stop_llvm_offload_entries +// HIP-NEXT: %1 = icmp ne ptr @{{.*offload_entries.*}}, @{{.*offload_entries.*}} // HIP-NEXT: br i1 %1, label %while.entry, label %while.end // // HIP: while.entry: -// HIP-NEXT: %entry1 = phi ptr [ @__start_llvm_offload_entries, %entry ], [ %16, %if.end ] +// HIP-NEXT: %entry1 = phi ptr [ @{{.*offload_entries.*}}, %entry ], [ %16, %if.end ] // HIP-NEXT: %2 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i32 0, i32 4 // HIP-NEXT: %addr = load ptr, ptr %2, align 8 // HIP-NEXT: %3 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i32 0, i32 8 @@ -258,7 +272,7 @@ // // HIP: if.end: // HIP-NEXT: %16 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 1 -// HIP-NEXT: %17 = icmp eq ptr %16, @__stop_llvm_offload_entries +// HIP-NEXT: %17 = icmp eq ptr %16, @{{.*offload_entries.*}} // HIP-NEXT: br i1 %17, label %while.end, label %while.entry // // HIP: while.end: diff --git a/llvm/include/llvm/Frontend/Offloading/Utility.h b/llvm/include/llvm/Frontend/Offloading/Utility.h index dad447228d14..a1a8a0291626 100644 --- a/llvm/include/llvm/Frontend/Offloading/Utility.h +++ b/llvm/include/llvm/Frontend/Offloading/Utility.h @@ -84,12 +84,16 @@ LLVM_ABI StructType *getEntryTy(Module &M); /// \param Data Extra data storage associated with the entry. /// \param SectionName The section this entry will be placed at. /// \param AuxAddr An extra pointer if needed. +/// Returns the section name for offloading entries based on the target triple. +/// ELF: "llvm_offload_entries", COFF: "llvm_offload_entries", +/// Mach-O: "__LLVM,offload_entries". +LLVM_ABI StringRef getOffloadEntrySection(Module &M); + /// \return The emitted global variable containing the offloading entry. LLVM_ABI GlobalVariable * emitOffloadingEntry(Module &M, object::OffloadKind Kind, Constant *Addr, StringRef Name, uint64_t Size, uint32_t Flags, - uint64_t Data, Constant *AuxAddr = nullptr, - StringRef SectionName = "llvm_offload_entries"); + uint64_t Data, Constant *AuxAddr = nullptr); /// Create a constant struct initializer used to register this global at /// runtime. @@ -102,7 +106,7 @@ getOffloadingEntryInitializer(Module &M, object::OffloadKind Kind, /// Creates a pair of globals used to iterate the array of offloading entries by /// accessing the section variables provided by the linker. LLVM_ABI std::pair -getOffloadEntryArray(Module &M, StringRef SectionName = "llvm_offload_entries"); +getOffloadEntryArray(Module &M); namespace amdgpu { /// Check if an image is compatible with current system's environment. The diff --git a/llvm/lib/Frontend/Offloading/OffloadWrapper.cpp b/llvm/lib/Frontend/Offloading/OffloadWrapper.cpp index ff5946bff35d..b6b25c7bbad1 100644 --- a/llvm/lib/Frontend/Offloading/OffloadWrapper.cpp +++ b/llvm/lib/Frontend/Offloading/OffloadWrapper.cpp @@ -44,6 +44,12 @@ IntegerType *getSizeTTy(Module &M) { return M.getDataLayout().getIntPtrType(M.getContext()); } +/// Returns the appropriate startup section for registration functions. +/// Mach-O uses "__TEXT,__StaticInit"; ELF/COFF use ".text.startup". +StringRef getStartupSection(const Triple &T) { + return T.isOSBinFormatMachO() ? "__TEXT,__StaticInit" : ".text.startup"; +} + // struct __tgt_device_image { // void *ImageStart; // void *ImageEnd; @@ -207,7 +213,7 @@ Function *createUnregisterFunction(Module &M, GlobalVariable *BinDesc, auto *Func = Function::Create(FuncTy, GlobalValue::InternalLinkage, ".omp_offloading.descriptor_unreg" + Suffix, &M); - Func->setSection(".text.startup"); + Func->setSection(getStartupSection(M.getTargetTriple())); // Get __tgt_unregister_lib function declaration. auto *UnRegFuncTy = FunctionType::get(Type::getVoidTy(C), getBinDescPtrTy(M), @@ -229,7 +235,7 @@ void createRegisterFunction(Module &M, GlobalVariable *BinDesc, auto *FuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false); auto *Func = Function::Create(FuncTy, GlobalValue::InternalLinkage, ".omp_offloading.descriptor_reg" + Suffix, &M); - Func->setSection(".text.startup"); + Func->setSection(getStartupSection(M.getTargetTriple())); // Get __tgt_register_lib function declaration. auto *RegFuncTy = FunctionType::get(Type::getVoidTy(C), getBinDescPtrTy(M), @@ -285,7 +291,7 @@ GlobalVariable *createFatbinDesc(Module &M, ArrayRef Image, bool IsHIP, // Create the global string containing the fatbinary. StringRef FatbinConstantSection = - IsHIP ? ".hip_fatbin" + IsHIP ? (Triple.isMacOSX() ? "__HIP,__hip_fatbin" : ".hip_fatbin") : (Triple.isMacOSX() ? "__NV_CUDA,__nv_fatbin" : ".nv_fatbin"); auto *Data = ConstantDataArray::get(C, Image); auto *Fatbin = new GlobalVariable(M, Data->getType(), /*isConstant*/ true, @@ -294,9 +300,9 @@ GlobalVariable *createFatbinDesc(Module &M, ArrayRef Image, bool IsHIP, Fatbin->setSection(FatbinConstantSection); // Create the fatbinary wrapper - StringRef FatbinWrapperSection = IsHIP ? ".hipFatBinSegment" - : Triple.isMacOSX() ? "__NV_CUDA,__fatbin" - : ".nvFatBinSegment"; + StringRef FatbinWrapperSection = + IsHIP ? (Triple.isMacOSX() ? "__HIP,__fatbin" : ".hipFatBinSegment") + : (Triple.isMacOSX() ? "__NV_CUDA,__fatbin" : ".nvFatBinSegment"); Constant *FatbinWrapper[] = { ConstantInt::get(Type::getInt32Ty(C), IsHIP ? HIPFatMagic : CudaFatMagic), ConstantInt::get(Type::getInt32Ty(C), 1), @@ -403,7 +409,7 @@ Function *createRegisterGlobalsFunction(Module &M, bool IsHIP, auto *RegGlobalsFn = Function::Create(RegGlobalsTy, GlobalValue::InternalLinkage, IsHIP ? ".hip.globals_reg" : ".cuda.globals_reg", &M); - RegGlobalsFn->setSection(".text.startup"); + RegGlobalsFn->setSection(getStartupSection(M.getTargetTriple())); // Create the loop to register all the entries. IRBuilder<> Builder(BasicBlock::Create(C, "entry", RegGlobalsFn)); @@ -559,13 +565,13 @@ void createRegisterFatbinFunction(Module &M, GlobalVariable *FatbinDesc, auto *CtorFunc = Function::Create( CtorFuncTy, GlobalValue::InternalLinkage, (IsHIP ? ".hip.fatbin_reg" : ".cuda.fatbin_reg") + Suffix, &M); - CtorFunc->setSection(".text.startup"); + CtorFunc->setSection(getStartupSection(M.getTargetTriple())); auto *DtorFuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false); auto *DtorFunc = Function::Create( DtorFuncTy, GlobalValue::InternalLinkage, (IsHIP ? ".hip.fatbin_unreg" : ".cuda.fatbin_unreg") + Suffix, &M); - DtorFunc->setSection(".text.startup"); + DtorFunc->setSection(getStartupSection(M.getTargetTriple())); auto *PtrTy = PointerType::getUnqual(C); @@ -655,7 +661,7 @@ public: FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false); Function *Func = Function::Create(FuncTy, GlobalValue::InternalLinkage, Twine("sycl") + ".descriptor_reg", &M); - Func->setSection(".text.startup"); + Func->setSection(getStartupSection(M.getTargetTriple())); PointerType *PtrTy = PointerType::getUnqual(C); IntegerType *Int64Ty = Type::getInt64Ty(C); @@ -677,7 +683,7 @@ public: FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false); Function *Func = Function::Create(FuncTy, GlobalValue::InternalLinkage, "sycl.descriptor_unreg", &M); - Func->setSection(".text.startup"); + Func->setSection(getStartupSection(M.getTargetTriple())); PointerType *PtrTy = PointerType::getUnqual(C); IntegerType *Int64Ty = Type::getInt64Ty(C); diff --git a/llvm/lib/Frontend/Offloading/Utility.cpp b/llvm/lib/Frontend/Offloading/Utility.cpp index e78ef5a985db..d689d1bb192d 100644 --- a/llvm/lib/Frontend/Offloading/Utility.cpp +++ b/llvm/lib/Frontend/Offloading/Utility.cpp @@ -84,12 +84,29 @@ offloading::getOffloadingEntryInitializer(Module &M, object::OffloadKind Kind, return {EntryInitializer, Str}; } -GlobalVariable * -offloading::emitOffloadingEntry(Module &M, object::OffloadKind Kind, - Constant *Addr, StringRef Name, uint64_t Size, - uint32_t Flags, uint64_t Data, - Constant *AuxAddr, StringRef SectionName) { +StringRef offloading::getOffloadEntrySection(Module &M) { + return M.getTargetTriple().isOSBinFormatMachO() ? "__LLVM,offload_entries" + : "llvm_offload_entries"; +} + +/// Returns the start/end symbol names for iterating offloading entries in a +/// given section. Mach-O uses \1section$start$/\1section$end$ convention; +/// ELF/COFF use __start_/__stop_ prefixes. +static std::pair +getOffloadEntryBoundarySymbols(const Triple &T, StringRef SectionName) { + if (T.isOSBinFormatMachO()) { + std::string SymSection = SectionName.str(); + std::replace(SymSection.begin(), SymSection.end(), ',', '$'); + return {"\1section$start$" + SymSection, "\1section$end$" + SymSection}; + } + return {("__start_" + SectionName).str(), ("__stop_" + SectionName).str()}; +} + +GlobalVariable *offloading::emitOffloadingEntry( + Module &M, object::OffloadKind Kind, Constant *Addr, StringRef Name, + uint64_t Size, uint32_t Flags, uint64_t Data, Constant *AuxAddr) { const llvm::Triple &Triple = M.getTargetTriple(); + StringRef SectionName = getOffloadEntrySection(M); auto [EntryInitializer, NameGV] = getOffloadingEntryInitializer( M, Kind, Addr, Name, Size, Flags, Data, AuxAddr); @@ -112,8 +129,9 @@ offloading::emitOffloadingEntry(Module &M, object::OffloadKind Kind, } std::pair -offloading::getOffloadEntryArray(Module &M, StringRef SectionName) { +offloading::getOffloadEntryArray(Module &M) { const llvm::Triple &Triple = M.getTargetTriple(); + StringRef SectionName = getOffloadEntrySection(M); auto *ZeroInitilaizer = ConstantAggregateZero::get(ArrayType::get(getEntryTy(M), 0u)); @@ -122,13 +140,14 @@ offloading::getOffloadEntryArray(Module &M, StringRef SectionName) { auto Linkage = Triple.isOSBinFormatCOFF() ? GlobalValue::WeakODRLinkage : GlobalValue::ExternalLinkage; - auto *EntriesB = - new GlobalVariable(M, EntryType, /*isConstant=*/true, Linkage, EntryInit, - "__start_" + SectionName); + auto [StartName, StopName] = + getOffloadEntryBoundarySymbols(Triple, SectionName); + + auto *EntriesB = new GlobalVariable(M, EntryType, /*isConstant=*/true, + Linkage, EntryInit, StartName); EntriesB->setVisibility(GlobalValue::HiddenVisibility); - auto *EntriesE = - new GlobalVariable(M, EntryType, /*isConstant=*/true, Linkage, EntryInit, - "__stop_" + SectionName); + auto *EntriesE = new GlobalVariable(M, EntryType, /*isConstant=*/true, + Linkage, EntryInit, StopName); EntriesE->setVisibility(GlobalValue::HiddenVisibility); if (Triple.isOSBinFormatELF()) { @@ -142,6 +161,15 @@ offloading::getOffloadEntryArray(Module &M, StringRef SectionName) { DummyEntry->setSection(SectionName); DummyEntry->setAlignment(Align(object::OffloadBinary::getAlignment())); appendToCompilerUsed(M, DummyEntry); + } else if (Triple.isOSBinFormatMachO()) { + // Mach-O needs a dummy variable in the section (like ELF) to ensure the + // linker provides the section boundary symbols. + auto *DummyEntry = new GlobalVariable( + M, ZeroInitilaizer->getType(), true, GlobalVariable::InternalLinkage, + ZeroInitilaizer, "__dummy." + SectionName); + DummyEntry->setSection(SectionName); + DummyEntry->setAlignment(Align(object::OffloadBinary::getAlignment())); + appendToCompilerUsed(M, DummyEntry); } else { // The COFF linker will merge sections containing a '$' together into a // single section. The order of entries in this section will be sorted diff --git a/llvm/test/tools/llvm-offload-wrapper/offload-wrapper.ll b/llvm/test/tools/llvm-offload-wrapper/offload-wrapper.ll index 32aad0b6cf64..b9d2d86eda37 100644 --- a/llvm/test/tools/llvm-offload-wrapper/offload-wrapper.ll +++ b/llvm/test/tools/llvm-offload-wrapper/offload-wrapper.ll @@ -51,6 +51,34 @@ ; HIP-NEXT: ret void ; HIP-NEXT: } +; RUN: llvm-offload-wrapper --triple=x86_64-apple-macosx10.15.0 -kind=hip %s -o %t.bc +; RUN: llvm-dis %t.bc -o - | FileCheck %s --check-prefix=HIP-MACHO + +; HIP-MACHO: @"\01section$start$__LLVM$offload_entries" = external hidden constant [0 x %struct.__tgt_offload_entry] +; HIP-MACHO-NEXT: @"\01section$end$__LLVM$offload_entries" = external hidden constant [0 x %struct.__tgt_offload_entry] +; HIP-MACHO-NEXT: @"__dummy.__LLVM,offload_entries" = internal constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "__LLVM,offload_entries", align 8 +; HIP-MACHO-NEXT: @llvm.compiler.used = appending global [1 x ptr] [ptr @"__dummy.__LLVM,offload_entries"], section "llvm.metadata" +; HIP-MACHO-NEXT: @.fatbin_image = internal constant {{.*}}, section "__HIP,__hip_fatbin" +; HIP-MACHO-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1212764230, i32 1, ptr @.fatbin_image, ptr null }, section "__HIP,__fatbin", align 8 +; HIP-MACHO-NEXT: @.hip.binary_handle = internal global ptr null +; HIP-MACHO-NEXT: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 101, ptr @.hip.fatbin_reg, ptr null }] + +; HIP-MACHO: define internal void @.hip.fatbin_reg() section "__TEXT,__StaticInit" { +; HIP-MACHO-NEXT: entry: +; HIP-MACHO-NEXT: %0 = call ptr @__hipRegisterFatBinary(ptr @.fatbin_wrapper) +; HIP-MACHO-NEXT: store ptr %0, ptr @.hip.binary_handle, align 8 +; HIP-MACHO-NEXT: call void @.hip.globals_reg(ptr %0) +; HIP-MACHO-NEXT: %1 = call i32 @atexit(ptr @.hip.fatbin_unreg) +; HIP-MACHO-NEXT: ret void +; HIP-MACHO-NEXT: } + +; HIP-MACHO: define internal void @.hip.fatbin_unreg() section "__TEXT,__StaticInit" { +; HIP-MACHO-NEXT: entry: +; HIP-MACHO-NEXT: %0 = load ptr, ptr @.hip.binary_handle, align 8 +; HIP-MACHO-NEXT: call void @__hipUnregisterFatBinary(ptr %0) +; HIP-MACHO-NEXT: ret void +; HIP-MACHO-NEXT: } + ; RUN: llvm-offload-wrapper --triple=x86_64-unknown-linux-gnu -kind=cuda %s -o %t.bc ; RUN: llvm-dis %t.bc -o - | FileCheck %s --check-prefix=CUDA