[Offload][libsycl][clang-sycl-linker] Simplify SYCL Offload wrapping (#193876)
Replace the __sycl_tgt_bin_desc/__sycl_tgt_device_image-based fat binary registration with a simpler OffloadBinary-native approach: - __sycl_register_lib/__sycl_unregister_lib now take (BinaryStart, Size) instead of a __sycl_tgt_bin_desc pointer; __sycl_unregister_lib only needs BinaryStart since the runtime looks up the binary by its start address. - OffloadWrapper's SYCL wrapping is significantly simplified: the __tgt_bin_desc/__tgt_device_image structs and the descriptor construction code are replaced by a single embedded OffloadBinary blob passed directly to the register/unregister entry points. - clang-sycl-linker generates a single OffloadBinary, which contains multiple images. - ProgramAndKernelManager::registerFatBin parses the blob via OffloadBinary::create, keying MDeviceImageManagers by BinaryStart to eliminate the reparse on unregister. - DeviceImageManager owns std::unique_ptr<OffloadBinary> instead of borrowing a __sycl_tgt_device_image pointer; getRawData renamed to getOffloadBinary. - Introduce forEachSymbol and SymbolTable serialization (writeSymbolTable/ SymbolTableHeader/SymbolTableEntry) in Utility.h to expose the kernel symbol table embedded in the OffloadBinary "symbols" section for consumption by the SYCL runtime. - device_binary_structures.hpp reduced to DeviceBinaryTripleSPIRV64. Co-Authored-By: Claude
This commit is contained in:
@@ -273,27 +273,16 @@
|
||||
// RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-linux-gnu -r \
|
||||
// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=SYCL
|
||||
|
||||
// SYCL: %__sycl.tgt_device_image = type { i16, i8, i8, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr }
|
||||
// SYCL-NEXT: %__sycl.tgt_bin_desc = type { i16, i16, ptr, ptr, ptr }
|
||||
|
||||
// SYCL: @.sycl_offloading.target.0 = internal unnamed_addr constant [1 x i8] zeroinitializer
|
||||
// SYCL-NEXT: @.sycl_offloading.opts.compile.0 = internal unnamed_addr constant [1 x i8] zeroinitializer
|
||||
// SYCL-NEXT: @.sycl_offloading.opts.link.0 = internal unnamed_addr constant [1 x i8] zeroinitializer
|
||||
// SYCL-NEXT: @.sycl_offloading.0.data = internal unnamed_addr constant [0 x i8] zeroinitializer, section ".llvm.offloading"
|
||||
// SYCL-NEXT: @.offloading.entry_name = internal unnamed_addr constant [5 x i8] c"stub\00", section ".llvm.rodata.offloading", align 1
|
||||
// SYCL-NEXT: @.offloading.entry.stub = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 8, i32 0, ptr null, ptr @.offloading.entry_name, i64 0, i64 0, ptr null }, section "llvm_offload_entries", align 8
|
||||
// SYCL-NEXT: @.sycl_offloading.entries_arr = internal constant [1 x %struct.__tgt_offload_entry] [%struct.__tgt_offload_entry { i64 0, i16 1, i16 8, i32 0, ptr null, ptr @.offloading.entry_name, i64 0, i64 0, ptr null }]
|
||||
// SYCL-NEXT: @.sycl_offloading.device_images = internal unnamed_addr constant [1 x %__sycl.tgt_device_image] [%__sycl.tgt_device_image { i16 3, i8 8, i8 0, ptr @.sycl_offloading.target.0, ptr @.sycl_offloading.opts.compile.0, ptr @.sycl_offloading.opts.link.0, ptr @.sycl_offloading.0.data, ptr @.sycl_offloading.0.data, ptr @.sycl_offloading.entries_arr, ptr getelementptr ([1 x %struct.__tgt_offload_entry], ptr @.sycl_offloading.entries_arr, i64 0, i64 1), ptr null, ptr null }]
|
||||
// SYCL-NEXT: @.sycl_offloading.descriptor = internal constant %__sycl.tgt_bin_desc { i16 1, i16 1, ptr @.sycl_offloading.device_images, ptr null, ptr null }
|
||||
// SYCL: @.sycl_offloading.binary = internal unnamed_addr constant [[[SIZE:[0-9]+]] x i8] c"{{.*}}", section ".llvm.offloading"
|
||||
|
||||
// SYCL: define internal void @sycl.descriptor_reg() section ".text.startup" {
|
||||
// SYCL-NEXT: entry:
|
||||
// SYCL-NEXT: call void @__sycl_register_lib(ptr @.sycl_offloading.descriptor)
|
||||
// SYCL-NEXT: call void @__sycl_register_lib(ptr @.sycl_offloading.binary, i64 [[SIZE]])
|
||||
// SYCL-NEXT: ret void
|
||||
// SYCL-NEXT: }
|
||||
|
||||
// SYCL: define internal void @sycl.descriptor_unreg() section ".text.startup" {
|
||||
// SYCL-NEXT: entry:
|
||||
// SYCL-NEXT: call void @__sycl_unregister_lib(ptr @.sycl_offloading.descriptor)
|
||||
// SYCL-NEXT: call void @__sycl_unregister_lib(ptr @.sycl_offloading.binary, i64 [[SIZE]])
|
||||
// SYCL-NEXT: ret void
|
||||
// SYCL-NEXT: }
|
||||
|
||||
@@ -22,6 +22,7 @@
|
||||
#include "llvm/BinaryFormat/Magic.h"
|
||||
#include "llvm/Bitcode/BitcodeWriter.h"
|
||||
#include "llvm/CodeGen/CommandFlags.h"
|
||||
#include "llvm/Frontend/Offloading/Utility.h"
|
||||
#include "llvm/IR/DiagnosticPrinter.h"
|
||||
#include "llvm/IR/LLVMContext.h"
|
||||
#include "llvm/IRReader/IRReader.h"
|
||||
@@ -35,6 +36,7 @@
|
||||
#include "llvm/Option/OptTable.h"
|
||||
#include "llvm/Option/Option.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/FileOutputBuffer.h"
|
||||
#include "llvm/Support/FileSystem.h"
|
||||
#include "llvm/Support/FormatVariadic.h"
|
||||
#include "llvm/Support/InitLLVM.h"
|
||||
@@ -487,14 +489,14 @@ Error runSYCLLink(ArrayRef<std::string> Files, const ArgList &Args) {
|
||||
if (!ModOrErr)
|
||||
return ModOrErr.takeError();
|
||||
|
||||
SmallString<0> SymbolData;
|
||||
SmallVector<StringRef> KernelNames;
|
||||
for (Function &F : **ModOrErr) {
|
||||
// TODO: Consider using LLVM-IR metadata to identify globals of interest
|
||||
if (F.hasKernelCallingConv()) {
|
||||
SymbolData.append(F.getName());
|
||||
SymbolData.push_back('\0');
|
||||
}
|
||||
if (F.hasKernelCallingConv())
|
||||
KernelNames.push_back(F.getName());
|
||||
}
|
||||
SmallString<0> SymbolData;
|
||||
llvm::offloading::sycl::writeSymbolTable(KernelNames, SymbolData);
|
||||
SymbolTable.emplace_back(std::move(SymbolData));
|
||||
}
|
||||
|
||||
@@ -520,13 +522,11 @@ Error runSYCLLink(ArrayRef<std::string> Files, const ArgList &Args) {
|
||||
}
|
||||
}
|
||||
|
||||
// Write the final output into file.
|
||||
int FD = -1;
|
||||
if (std::error_code EC = sys::fs::openFileForWrite(OutputFile, FD))
|
||||
return errorCodeToError(EC);
|
||||
llvm::raw_fd_ostream FS(FD, /*shouldClose=*/true);
|
||||
|
||||
// Collect all images to be packed into a single OffloadBinary.
|
||||
SmallVector<OffloadingImage> Images;
|
||||
for (size_t I = 0, E = SplitModules.size(); I != E; ++I) {
|
||||
if (SymbolTable[I].empty())
|
||||
continue;
|
||||
auto File = SplitModules[I];
|
||||
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> FileOrErr =
|
||||
llvm::MemoryBuffer::getFileOrSTDIN(File);
|
||||
@@ -545,13 +545,18 @@ Error runSYCLLink(ArrayRef<std::string> Files, const ArgList &Args) {
|
||||
Args.MakeArgString(Args.getLastArgValue(OPT_arch_EQ));
|
||||
TheImage.StringData["symbols"] = SymbolTable[I];
|
||||
TheImage.Image = std::move(*FileOrErr);
|
||||
Images.emplace_back(std::move(TheImage));
|
||||
}
|
||||
|
||||
llvm::SmallString<0> Buffer = OffloadBinary::write(TheImage);
|
||||
llvm::SmallString<0> Buffer = OffloadBinary::write(Images);
|
||||
if (Buffer.size() % OffloadBinary::getAlignment() != 0)
|
||||
return createStringError("Offload binary has invalid size alignment");
|
||||
FS << Buffer;
|
||||
}
|
||||
return Error::success();
|
||||
|
||||
auto OutputOrErr = FileOutputBuffer::create(OutputFile, Buffer.size());
|
||||
if (!OutputOrErr)
|
||||
return OutputOrErr.takeError();
|
||||
llvm::copy(Buffer, (*OutputOrErr)->getBufferStart());
|
||||
return (*OutputOrErr)->commit();
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
@@ -17,11 +17,6 @@
|
||||
|
||||
#include <sycl/__impl/detail/config.hpp>
|
||||
|
||||
#include <llvm/Frontend/Offloading/Utility.h>
|
||||
#include <llvm/Object/OffloadBinary.h>
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
_LIBSYCL_BEGIN_NAMESPACE_SYCL
|
||||
|
||||
namespace detail {
|
||||
@@ -35,60 +30,6 @@ namespace detail {
|
||||
/// SPIR-V with 64-bit pointers.
|
||||
static constexpr char DeviceBinaryTripleSPIRV64[] = "spirv64-unknown-unknown";
|
||||
|
||||
/// Device binary descriptor version supported by this library.
|
||||
static constexpr uint16_t SupportedDeviceBinaryVersion = 3;
|
||||
|
||||
/// This struct is a record of the device binary information.
|
||||
/// It must match the __tgt_device_image structure generated by the
|
||||
/// compiler when their `Version` fields match.
|
||||
struct __sycl_tgt_device_image {
|
||||
uint16_t Version;
|
||||
/// The type of offload model the binary employs. See `OffloadKind`. Only
|
||||
/// OFK_SYCL is supported by libsycl.
|
||||
uint8_t OffloadKind;
|
||||
/// Format of the binary data, see `ImageKind`.
|
||||
uint8_t ImageFormat;
|
||||
/// A null-terminated string representation of the device's target
|
||||
/// architecture. Must hold one of _LIBSYCL_DEVICE_BINARY_TARGET_* values.
|
||||
const char *TripleString;
|
||||
/// A null-terminated string of target- and compiler-specific options
|
||||
/// that are suggested to use to "compile" program at runtime.
|
||||
const char *CompileOptions;
|
||||
/// A null-terminated string of target- and compiler-specific options
|
||||
/// that are suggested to use to "link" program at runtime.
|
||||
const char *LinkOptions;
|
||||
/// Pointer to the target code start.
|
||||
const unsigned char *ImageStart;
|
||||
/// Pointer to the target code end.
|
||||
const unsigned char *ImageEnd;
|
||||
/// The offload entry table
|
||||
llvm::offloading::EntryTy *EntriesBegin;
|
||||
llvm::offloading::EntryTy *EntriesEnd;
|
||||
// TODO: properties are not supported now.
|
||||
/// Array of property sets.
|
||||
void *PropertiesBegin;
|
||||
void *PropertiesEnd;
|
||||
};
|
||||
|
||||
/// Version of offload binaries descriptor `__sycl_tgt_bin_desc` supported by
|
||||
/// libsycl.
|
||||
static constexpr uint16_t SupportedOffloadBinaryVersion = 1;
|
||||
|
||||
/// This struct is a record of all the device code that may be offloaded.
|
||||
/// It must match the `__tgt_bin_desc` structure generated by
|
||||
/// the compiler when their `Version` fields match.
|
||||
struct __sycl_tgt_bin_desc {
|
||||
/// Version of the structure.
|
||||
uint16_t Version;
|
||||
/// Number of device binaries in this descriptor.
|
||||
uint16_t NumDeviceBinaries;
|
||||
/// Device binaries data.
|
||||
__sycl_tgt_device_image *DeviceImages;
|
||||
/// The offload entry table (not used, for compatibility with OpenMP).
|
||||
llvm::offloading::EntryTy *HostEntriesBegin;
|
||||
llvm::offloading::EntryTy *HostEntriesEnd;
|
||||
};
|
||||
|
||||
} // namespace detail
|
||||
_LIBSYCL_END_NAMESPACE_SYCL
|
||||
|
||||
|
||||
@@ -17,8 +17,8 @@ ProgramWrapper::ProgramWrapper(ol_device_handle_t Device,
|
||||
DeviceImageManager &DevImage) {
|
||||
assert(Device);
|
||||
|
||||
callAndThrow(olCreateProgram, Device, DevImage.getRawData().ImageStart,
|
||||
DevImage.getSize(), &MProgram);
|
||||
llvm::StringRef Image = DevImage.getOffloadBinary().getImage();
|
||||
callAndThrow(olCreateProgram, Device, Image.data(), Image.size(), &MProgram);
|
||||
}
|
||||
|
||||
ProgramWrapper::~ProgramWrapper() {
|
||||
|
||||
@@ -17,10 +17,11 @@
|
||||
|
||||
#include <sycl/__impl/detail/config.hpp>
|
||||
|
||||
#include <detail/device_binary_structures.hpp>
|
||||
#include <llvm/Object/OffloadBinary.h>
|
||||
|
||||
#include <OffloadAPI.h>
|
||||
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
|
||||
_LIBSYCL_BEGIN_NAMESPACE_SYCL
|
||||
@@ -35,8 +36,7 @@ public:
|
||||
/// provided arguments.
|
||||
///
|
||||
/// \param Device is the device to use for program creation.
|
||||
/// \param DevImage is the device image (wrapped __sycl_tgt_device_image) to
|
||||
/// use for program creation.
|
||||
/// \param DevImage is the device image to use for program creation.
|
||||
/// \throw sycl::exception with sycl::errc::runtime when failed to create the
|
||||
/// program.
|
||||
ProgramWrapper(ol_device_handle_t Device, DeviceImageManager &DevImage);
|
||||
@@ -61,7 +61,8 @@ private:
|
||||
/// creation.
|
||||
class DeviceImageManager {
|
||||
public:
|
||||
DeviceImageManager(const __sycl_tgt_device_image &Bin) : MBin(&Bin) {}
|
||||
DeviceImageManager(std::unique_ptr<llvm::object::OffloadBinary> Bin)
|
||||
: MBin(std::move(Bin)) {}
|
||||
// Explicitly delete copy constructor/operator= to avoid unintentional copies.
|
||||
DeviceImageManager(const DeviceImageManager &) = delete;
|
||||
DeviceImageManager &operator=(const DeviceImageManager &) = delete;
|
||||
@@ -71,14 +72,8 @@ public:
|
||||
|
||||
~DeviceImageManager() = default;
|
||||
|
||||
/// \return a reference to the corresponding raw __sycl_tgt_device_image
|
||||
/// object.
|
||||
const __sycl_tgt_device_image &getRawData() const { return *get(); }
|
||||
|
||||
/// \return the size of the corresponding device image data in bytes.
|
||||
size_t getSize() const {
|
||||
return static_cast<size_t>(MBin->ImageEnd - MBin->ImageStart);
|
||||
}
|
||||
/// \return a reference to the corresponding parsed OffloadBinary object.
|
||||
const llvm::object::OffloadBinary &getOffloadBinary() const { return *MBin; }
|
||||
|
||||
/// Returns a liboffload program which is compatible with the specified
|
||||
/// device. Searches among existing programs and creates a new one if no
|
||||
@@ -92,9 +87,7 @@ public:
|
||||
protected:
|
||||
std::unordered_map<ol_device_handle_t, ProgramWrapper> MPrograms;
|
||||
|
||||
const __sycl_tgt_device_image *get() const { return MBin; }
|
||||
|
||||
__sycl_tgt_device_image const *MBin{};
|
||||
std::unique_ptr<llvm::object::OffloadBinary> MBin;
|
||||
};
|
||||
|
||||
} // namespace detail
|
||||
|
||||
@@ -13,113 +13,96 @@
|
||||
#include <detail/device_impl.hpp>
|
||||
#include <detail/offload/offload_utils.hpp>
|
||||
|
||||
#include <cstring>
|
||||
#include <llvm/Frontend/Offloading/Utility.h>
|
||||
|
||||
_LIBSYCL_BEGIN_NAMESPACE_SYCL
|
||||
namespace detail {
|
||||
|
||||
static inline bool checkFatBinVersion(const __sycl_tgt_bin_desc &FatbinDesc) {
|
||||
return FatbinDesc.Version == SupportedOffloadBinaryVersion;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
checkDeviceImageValidity(const __sycl_tgt_device_image &DeviceImage) {
|
||||
return (DeviceImage.Version == SupportedDeviceBinaryVersion) &&
|
||||
(DeviceImage.OffloadKind == llvm::object::OFK_SYCL) &&
|
||||
(DeviceImage.ImageFormat == llvm::object::IMG_SPIRV);
|
||||
checkDeviceImageValidity(const llvm::object::OffloadBinary &OB) {
|
||||
return (OB.getOffloadKind() == llvm::object::OFK_SYCL) &&
|
||||
(OB.getImageKind() == llvm::object::IMG_SPIRV);
|
||||
}
|
||||
|
||||
void ProgramAndKernelManager::registerFatBin(__sycl_tgt_bin_desc *FatbinDesc) {
|
||||
assert(FatbinDesc && "Device images descriptor can't be nullptr");
|
||||
void ProgramAndKernelManager::registerFatBin(const void *BinaryStart,
|
||||
size_t Size) {
|
||||
assert(BinaryStart && "Binary pointer can't be nullptr");
|
||||
|
||||
if (!checkFatBinVersion(*FatbinDesc))
|
||||
llvm::MemoryBufferRef MBR(
|
||||
llvm::StringRef(static_cast<const char *>(BinaryStart), Size),
|
||||
/*Identifier=*/"");
|
||||
auto BinOrErr = llvm::object::OffloadBinary::create(MBR);
|
||||
if (!BinOrErr || BinOrErr->empty())
|
||||
throw sycl::exception(sycl::make_error_code(sycl::errc::runtime),
|
||||
"Incompatible version of device images descriptor.");
|
||||
if (!FatbinDesc->NumDeviceBinaries)
|
||||
return;
|
||||
"Failed to parse OffloadBinary");
|
||||
|
||||
DeviceImageManagerVec Images;
|
||||
Images.reserve(BinOrErr->size());
|
||||
|
||||
std::lock_guard<std::mutex> Guard(MDataCollectionMutex);
|
||||
for (uint16_t I = 0; I < FatbinDesc->NumDeviceBinaries; ++I) {
|
||||
const auto &RawDeviceImage = FatbinDesc->DeviceImages[I];
|
||||
if (!checkDeviceImageValidity(RawDeviceImage))
|
||||
for (std::unique_ptr<llvm::object::OffloadBinary> &OB : *BinOrErr) {
|
||||
if (!checkDeviceImageValidity(*OB))
|
||||
throw sycl::exception(sycl::make_error_code(sycl::errc::runtime),
|
||||
"Incompatible device image.");
|
||||
|
||||
const llvm::offloading::EntryTy *EntriesB = RawDeviceImage.EntriesBegin;
|
||||
const llvm::offloading::EntryTy *EntriesE = RawDeviceImage.EntriesEnd;
|
||||
// Ignore "empty" device image.
|
||||
if (EntriesB == EntriesE)
|
||||
continue;
|
||||
llvm::StringRef Symbols = OB->getString("symbols");
|
||||
|
||||
std::unique_ptr<DeviceImageManager> NewImageWrapper =
|
||||
std::make_unique<DeviceImageManager>(RawDeviceImage);
|
||||
|
||||
for (auto EntriesIt = EntriesB; EntriesIt != EntriesE; ++EntriesIt) {
|
||||
auto Name = EntriesIt->SymbolName;
|
||||
Images.push_back(std::make_unique<DeviceImageManager>(std::move(OB)));
|
||||
DeviceImageManager &NewImageWrapper = *Images.back();
|
||||
|
||||
llvm::offloading::sycl::forEachSymbol(Symbols, [&](llvm::StringRef Name) {
|
||||
auto It = MDeviceKernelInfoMap.find(std::string_view(Name));
|
||||
if (It == MDeviceKernelInfoMap.end()) {
|
||||
|
||||
[[maybe_unused]] auto [Iterator, EmplaceSucceeded] =
|
||||
MDeviceKernelInfoMap.emplace(
|
||||
std::piecewise_construct, std::forward_as_tuple(Name),
|
||||
std::forward_as_tuple(Name, *NewImageWrapper));
|
||||
std::piecewise_construct,
|
||||
std::forward_as_tuple(std::string_view(Name)),
|
||||
std::forward_as_tuple(std::string_view(Name), NewImageWrapper));
|
||||
assert(EmplaceSucceeded && "Kernel name found in multiple images");
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
MDeviceImageManagers.insert(
|
||||
std::make_pair(&RawDeviceImage, std::move(NewImageWrapper)));
|
||||
}
|
||||
[[maybe_unused]] auto [It, Inserted] =
|
||||
MDeviceImageManagers.emplace(BinaryStart, std::move(Images));
|
||||
assert(Inserted && "Fat binary registered twice");
|
||||
}
|
||||
|
||||
void ProgramAndKernelManager::unregisterFatBin(
|
||||
__sycl_tgt_bin_desc *FatbinDesc) {
|
||||
assert(FatbinDesc && "Device images descriptor can't be nullptr");
|
||||
|
||||
if (!checkFatBinVersion(*FatbinDesc) || FatbinDesc->NumDeviceBinaries == 0)
|
||||
return;
|
||||
void ProgramAndKernelManager::unregisterFatBin(const void *BinaryStart,
|
||||
size_t /*Size*/) {
|
||||
assert(BinaryStart && "Binary pointer can't be nullptr");
|
||||
|
||||
std::lock_guard<std::mutex> Guard(MDataCollectionMutex);
|
||||
for (uint16_t I = 0; I < FatbinDesc->NumDeviceBinaries; ++I) {
|
||||
const auto &RawDeviceImage = FatbinDesc->DeviceImages[I];
|
||||
auto It = MDeviceImageManagers.find(BinaryStart);
|
||||
if (It == MDeviceImageManagers.end())
|
||||
return;
|
||||
|
||||
auto DevImageIt = MDeviceImageManagers.find(&RawDeviceImage);
|
||||
if (DevImageIt == MDeviceImageManagers.end())
|
||||
continue;
|
||||
|
||||
const llvm::offloading::EntryTy *EntriesB = RawDeviceImage.EntriesBegin;
|
||||
const llvm::offloading::EntryTy *EntriesE = RawDeviceImage.EntriesEnd;
|
||||
// Ignore "empty" device image
|
||||
if (EntriesB == EntriesE)
|
||||
continue;
|
||||
|
||||
for (auto EntriesIt = EntriesB; EntriesIt != EntriesE; ++EntriesIt) {
|
||||
if (auto KernelIt = MDeviceKernelInfoMap.find(EntriesIt->SymbolName);
|
||||
for (auto &Image : It->second) {
|
||||
llvm::StringRef Symbols = Image->getOffloadBinary().getString("symbols");
|
||||
llvm::offloading::sycl::forEachSymbol(Symbols, [&](llvm::StringRef Name) {
|
||||
if (auto KernelIt = MDeviceKernelInfoMap.find(std::string_view(Name));
|
||||
KernelIt != MDeviceKernelInfoMap.end()) {
|
||||
// Programs are attached to image and will be released with image
|
||||
// Programs are attached to the image and will be released with image
|
||||
// destruction. Clear only kernel specific data by destroying its kernel
|
||||
// info object.
|
||||
MDeviceKernelInfoMap.erase(KernelIt);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
MDeviceImageManagers.erase(DevImageIt);
|
||||
}
|
||||
MDeviceImageManagers.erase(It);
|
||||
}
|
||||
|
||||
static bool isImageCompatible(const DeviceImageManager &Image,
|
||||
const DeviceImpl &Device) {
|
||||
sycl::backend BE = Device.getBackend();
|
||||
const char *Target = Image.getRawData().TripleString;
|
||||
|
||||
if (!(strcmp(Target, DeviceBinaryTripleSPIRV64) == 0 &&
|
||||
BE == sycl::backend::level_zero))
|
||||
const llvm::object::OffloadBinary &OB = Image.getOffloadBinary();
|
||||
if (!(OB.getTriple() == DeviceBinaryTripleSPIRV64 &&
|
||||
Device.getBackend() == sycl::backend::level_zero))
|
||||
return false;
|
||||
|
||||
bool IsValid{};
|
||||
callAndThrow(olIsValidBinary, Device.getOLHandle(),
|
||||
Image.getRawData().ImageStart, Image.getSize(), &IsValid);
|
||||
llvm::StringRef ImageBytes = OB.getImage();
|
||||
callAndThrow(olIsValidBinary, Device.getOLHandle(), ImageBytes.data(),
|
||||
ImageBytes.size(), &IsValid);
|
||||
return IsValid;
|
||||
}
|
||||
|
||||
@@ -152,14 +135,14 @@ ProgramAndKernelManager::getOrCreateKernel(DeviceKernelInfo &KernelInfo,
|
||||
} // namespace detail
|
||||
_LIBSYCL_END_NAMESPACE_SYCL
|
||||
|
||||
extern "C" _LIBSYCL_EXPORT void
|
||||
__sycl_register_lib(sycl::detail::__sycl_tgt_bin_desc *FatbinDesc) {
|
||||
extern "C" _LIBSYCL_EXPORT void __sycl_register_lib(const void *BinaryStart,
|
||||
size_t Size) {
|
||||
sycl::detail::ProgramAndKernelManager::getInstance().registerFatBin(
|
||||
FatbinDesc);
|
||||
BinaryStart, Size);
|
||||
}
|
||||
|
||||
extern "C" _LIBSYCL_EXPORT void
|
||||
__sycl_unregister_lib(sycl::detail::__sycl_tgt_bin_desc *FatbinDesc) {
|
||||
extern "C" _LIBSYCL_EXPORT void __sycl_unregister_lib(const void *BinaryStart,
|
||||
size_t Size) {
|
||||
sycl::detail::ProgramAndKernelManager::getInstance().unregisterFatBin(
|
||||
FatbinDesc);
|
||||
BinaryStart, Size);
|
||||
}
|
||||
|
||||
@@ -21,23 +21,32 @@
|
||||
#include <detail/device_image_wrapper.hpp>
|
||||
#include <detail/device_kernel_info.hpp>
|
||||
|
||||
#include <llvm/Object/OffloadBinary.h>
|
||||
|
||||
#include <OffloadAPI.h>
|
||||
|
||||
#include <mutex>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
// +++ Entry points referenced by the offload wrapper object {
|
||||
|
||||
/// Executed as a part of a module's (.exe, .dll) static initialization.
|
||||
/// Registers device executable images with the runtime.
|
||||
extern "C" _LIBSYCL_EXPORT void
|
||||
__sycl_register_lib(sycl::detail::__sycl_tgt_bin_desc *FatbinDesc);
|
||||
/// \param BinaryStart pointer to the start of the OffloadBinary.
|
||||
/// \param Size size in bytes of the OffloadBinary.
|
||||
extern "C" _LIBSYCL_EXPORT void __sycl_register_lib(const void *BinaryStart,
|
||||
size_t Size);
|
||||
|
||||
/// Executed as a part of current module's (.exe, .dll) static
|
||||
/// de-initialization.
|
||||
/// Unregisters device executable images with the runtime.
|
||||
extern "C" _LIBSYCL_EXPORT void
|
||||
__sycl_unregister_lib(sycl::detail::__sycl_tgt_bin_desc *FatbinDesc);
|
||||
/// \param BinaryStart pointer to the start of the OffloadBinary.
|
||||
/// \param Size size in bytes of the OffloadBinary.
|
||||
/// BinaryStart and Size must match the values passed to the corresponding
|
||||
/// __sycl_register_lib call.
|
||||
extern "C" _LIBSYCL_EXPORT void __sycl_unregister_lib(const void *BinaryStart,
|
||||
size_t Size);
|
||||
|
||||
// +++ }
|
||||
|
||||
@@ -56,20 +65,16 @@ public:
|
||||
return PM;
|
||||
}
|
||||
|
||||
/// Parses raw device images data and prepares internal structures for
|
||||
/// effective kernel/program creation.
|
||||
/// \param FatbinDesc a record of all the device code that may be offloaded,
|
||||
/// generated by compiler and offloading tools.
|
||||
/// \throw sycl::exception with sycl::errc::runtime if a device image
|
||||
/// descriptor has an incompatible version or if a device image has an
|
||||
/// incompatible version, target or kind.
|
||||
void registerFatBin(__sycl_tgt_bin_desc *FatbinDesc);
|
||||
/// Parses the OffloadBinary of the given Size starting at BinaryStart and
|
||||
/// prepares internal structures for effective kernel/program creation.
|
||||
/// \throw sycl::exception with sycl::errc::runtime if parsing fails or if
|
||||
/// the binary has an incompatible kind or target.
|
||||
void registerFatBin(const void *BinaryStart, size_t Size);
|
||||
|
||||
/// Removes all entries of the data in FatbinDesc from internal structures.
|
||||
/// \param FatbinDesc a record of all the device code that may be offloaded,
|
||||
/// generated by compiler and offloading tools. Must match the pointer and
|
||||
/// data passed to registerFatBin.
|
||||
void unregisterFatBin(__sycl_tgt_bin_desc *FatbinDesc);
|
||||
/// Removes all entries associated with the fat binary that was previously
|
||||
/// passed to registerFatBin. BinaryStart and Size must match the values
|
||||
/// passed to the corresponding registerFatBin call.
|
||||
void unregisterFatBin(const void *BinaryStart, size_t Size);
|
||||
|
||||
/// Creates a liboffload kernel that is ready for execution.
|
||||
/// This method is thread-safe.
|
||||
@@ -92,9 +97,14 @@ private:
|
||||
// by caching the pointers when possible.
|
||||
std::unordered_map<std::string_view, DeviceKernelInfo> MDeviceKernelInfoMap;
|
||||
|
||||
// Controls lifetime of device images.
|
||||
std::unordered_map<const __sycl_tgt_device_image *,
|
||||
std::unique_ptr<DeviceImageManager>>
|
||||
// Keyed by BinaryStart (register/unregister param). Each fat binary can
|
||||
// contain multiple device images, each owned by its own DeviceImageManager.
|
||||
// Controls lifetime of device image managers and, through them, parsed
|
||||
// OffloadBinary objects.
|
||||
using BinaryStartKey = const void *;
|
||||
using DeviceImageManagerVec =
|
||||
std::vector<std::unique_ptr<DeviceImageManager>>;
|
||||
std::unordered_map<BinaryStartKey, DeviceImageManagerVec>
|
||||
MDeviceImageManagers;
|
||||
|
||||
// All work with device images and data related to it must be wrapped with a
|
||||
|
||||
@@ -13,6 +13,8 @@
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
|
||||
#include "llvm/ADT/ArrayRef.h"
|
||||
#include "llvm/ADT/SmallString.h"
|
||||
#include "llvm/ADT/StringMap.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
@@ -176,6 +178,43 @@ LLVM_ABI Error containerizeImage(std::unique_ptr<MemoryBuffer> &Binary,
|
||||
int32_t ImageFlags,
|
||||
MapVector<StringRef, StringRef> &MetaData);
|
||||
|
||||
namespace sycl {
|
||||
|
||||
/// Serialized symbol table stored in the "symbols" entry of a SYCL
|
||||
/// OffloadBinary. The in-memory layout of the blob is:
|
||||
/// [ SymbolTableHeader ]
|
||||
/// [ SymbolTableEntry Entries[N] ] -- N == Header.Count
|
||||
/// [ char StringData[] ] -- packed null-terminated names
|
||||
/// Use writeSymbolTable() to produce the blob and forEachSymbol() to consume
|
||||
/// it; both encapsulate all pointer arithmetic.
|
||||
|
||||
struct SymbolTableHeader {
|
||||
uint32_t Count; ///< Number of symbol entries.
|
||||
};
|
||||
struct SymbolTableEntry {
|
||||
uint32_t OffsetToSymbol; ///< Byte offset from blob start to the symbol name.
|
||||
uint32_t SymbolSize; ///< Length of the symbol name in bytes, excluding
|
||||
///< the null terminator.
|
||||
};
|
||||
|
||||
/// Serialize \p Names into \p Out.
|
||||
LLVM_ABI void writeSymbolTable(ArrayRef<StringRef> Names, SmallString<0> &Out);
|
||||
|
||||
/// Invoke \p Callback with a \c StringRef for each symbol in \p Symbols,
|
||||
/// the raw serialized symbol-table blob.
|
||||
template <typename Fn> void forEachSymbol(StringRef Symbols, Fn &&Callback) {
|
||||
assert(Symbols.size() >= sizeof(SymbolTableHeader) &&
|
||||
"symbols blob smaller than header");
|
||||
const char *Base = Symbols.data();
|
||||
const auto &Header = *reinterpret_cast<const SymbolTableHeader *>(Base);
|
||||
const auto *Entries = reinterpret_cast<const SymbolTableEntry *>(&Header + 1);
|
||||
for (uint32_t I = 0; I < Header.Count; ++I)
|
||||
Callback(
|
||||
StringRef(Base + Entries[I].OffsetToSymbol, Entries[I].SymbolSize));
|
||||
}
|
||||
|
||||
} // namespace sycl
|
||||
|
||||
namespace intel {
|
||||
/// Containerizes an OpenMP SPIR-V image into an OffloadBinary image.
|
||||
/// \param Binary The SPIR-V binary to containerize.
|
||||
|
||||
@@ -91,9 +91,9 @@ PointerType *getBinDescPtrTy(Module &M) {
|
||||
/// library. It is defined as follows
|
||||
///
|
||||
/// __attribute__((visibility("hidden")))
|
||||
/// extern __tgt_offload_entry *__start_omp_offloading_entries;
|
||||
/// extern __tgt_offload_entry *__start_llvm_offload_entries;
|
||||
/// __attribute__((visibility("hidden")))
|
||||
/// extern __tgt_offload_entry *__stop_omp_offloading_entries;
|
||||
/// extern __tgt_offload_entry *__stop_llvm_offload_entries;
|
||||
///
|
||||
/// static const char Image0[] = { <Bufs.front() contents> };
|
||||
/// ...
|
||||
@@ -103,23 +103,23 @@ PointerType *getBinDescPtrTy(Module &M) {
|
||||
/// {
|
||||
/// Image0, /*ImageStart*/
|
||||
/// Image0 + sizeof(Image0), /*ImageEnd*/
|
||||
/// __start_omp_offloading_entries, /*EntriesBegin*/
|
||||
/// __stop_omp_offloading_entries /*EntriesEnd*/
|
||||
/// __start_llvm_offload_entries, /*EntriesBegin*/
|
||||
/// __stop_llvm_offload_entries /*EntriesEnd*/
|
||||
/// },
|
||||
/// ...
|
||||
/// {
|
||||
/// ImageN, /*ImageStart*/
|
||||
/// ImageN + sizeof(ImageN), /*ImageEnd*/
|
||||
/// __start_omp_offloading_entries, /*EntriesBegin*/
|
||||
/// __stop_omp_offloading_entries /*EntriesEnd*/
|
||||
/// __start_llvm_offload_entries, /*EntriesBegin*/
|
||||
/// __stop_llvm_offload_entries /*EntriesEnd*/
|
||||
/// }
|
||||
/// };
|
||||
///
|
||||
/// static const __tgt_bin_desc BinDesc = {
|
||||
/// sizeof(Images) / sizeof(Images[0]), /*NumDeviceImages*/
|
||||
/// Images, /*DeviceImages*/
|
||||
/// __start_omp_offloading_entries, /*HostEntriesBegin*/
|
||||
/// __stop_omp_offloading_entries /*HostEntriesEnd*/
|
||||
/// __start_llvm_offload_entries, /*HostEntriesBegin*/
|
||||
/// __stop_llvm_offload_entries /*HostEntriesEnd*/
|
||||
/// };
|
||||
///
|
||||
/// Global variable that represents BinDesc is returned.
|
||||
@@ -629,384 +629,75 @@ void createRegisterFatbinFunction(Module &M, GlobalVariable *FatbinDesc,
|
||||
class SYCLWrapper {
|
||||
public:
|
||||
SYCLWrapper(Module &M, const SYCLJITOptions &Options)
|
||||
: M(M), C(M.getContext()), Options(Options) {
|
||||
EntryTy = offloading::getEntryTy(M);
|
||||
SyclDeviceImageTy = getSyclDeviceImageTy();
|
||||
SyclBinDescTy = getSyclBinDescTy();
|
||||
: M(M), C(M.getContext()), Options(Options) {}
|
||||
|
||||
/// Embeds \p Buffer (a raw OffloadBinary) as a global constant and returns
|
||||
/// a pair of (Start, Size), where Start points to the beginning of the
|
||||
/// embedded data and Size is its length in bytes.
|
||||
std::pair<Constant *, Constant *> embedBinary(ArrayRef<char> Buffer) {
|
||||
Constant *Arr = ConstantDataArray::get(C, Buffer);
|
||||
GlobalVariable *BinaryGV = new GlobalVariable(
|
||||
M, Arr->getType(), /*isConstant=*/true, GlobalValue::InternalLinkage,
|
||||
Arr, ".sycl_offloading.binary");
|
||||
BinaryGV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
|
||||
BinaryGV->setSection(".llvm.offloading");
|
||||
|
||||
IntegerType *Int64Ty = Type::getInt64Ty(C);
|
||||
Constant *Zero = ConstantInt::get(Int64Ty, 0);
|
||||
Constant *Size = ConstantInt::get(Int64Ty, Buffer.size());
|
||||
Constant *Start = ConstantExpr::getGetElementPtr(
|
||||
BinaryGV->getValueType(), BinaryGV, ArrayRef<Constant *>{Zero, Zero});
|
||||
return {Start, Size};
|
||||
}
|
||||
|
||||
/// Creates binary descriptor for the given device images. Binary descriptor
|
||||
/// is an object that is passed to the offloading runtime at program startup
|
||||
/// and it describes all device images available in the executable or shared
|
||||
/// library. It is defined as follows:
|
||||
///
|
||||
/// \code
|
||||
/// __attribute__((visibility("hidden")))
|
||||
/// __tgt_offload_entry *__sycl_offload_entries_arr0[];
|
||||
/// ...
|
||||
/// __attribute__((visibility("hidden")))
|
||||
/// __tgt_offload_entry *__sycl_offload_entries_arrN[];
|
||||
///
|
||||
/// __attribute__((visibility("hidden")))
|
||||
/// extern const char *CompileOptions = "...";
|
||||
/// ...
|
||||
/// __attribute__((visibility("hidden")))
|
||||
/// extern const char *LinkOptions = "...";
|
||||
/// ...
|
||||
///
|
||||
/// static const char Image0[] = { ... };
|
||||
/// ...
|
||||
/// static const char ImageN[] = { ... };
|
||||
///
|
||||
/// static const __sycl.tgt_device_image Images[] = {
|
||||
/// {
|
||||
/// Version, // Version
|
||||
/// OffloadKind, // OffloadKind
|
||||
/// Format, // Format of the image.
|
||||
// TripleString, // Arch
|
||||
/// CompileOptions, // CompileOptions
|
||||
/// LinkOptions, // LinkOptions
|
||||
/// Image0, // ImageStart
|
||||
/// Image0 + IMAGE0_SIZE, // ImageEnd
|
||||
/// __sycl_offload_entries_arr0, // EntriesBegin
|
||||
/// __sycl_offload_entries_arr0 + ENTRIES0_SIZE, // EntriesEnd
|
||||
/// NULL, // PropertiesBegin
|
||||
/// NULL, // PropertiesEnd
|
||||
/// },
|
||||
/// ...
|
||||
/// };
|
||||
///
|
||||
/// static const __sycl.tgt_bin_desc FatbinDesc = {
|
||||
/// Version, //Version
|
||||
/// sizeof(Images) / sizeof(Images[0]), //NumDeviceImages
|
||||
/// Images, //DeviceImages
|
||||
/// NULL, //HostEntriesBegin
|
||||
/// NULL //HostEntriesEnd
|
||||
/// };
|
||||
/// \endcode
|
||||
///
|
||||
/// \returns Global variable that represents FatbinDesc.
|
||||
GlobalVariable *createFatbinDesc(ArrayRef<OffloadFile> OffloadFiles) {
|
||||
StringRef OffloadKindTag = ".sycl_offloading.";
|
||||
SmallVector<Constant *> WrappedImages;
|
||||
WrappedImages.reserve(OffloadFiles.size());
|
||||
for (size_t I = 0, E = OffloadFiles.size(); I != E; ++I)
|
||||
WrappedImages.push_back(
|
||||
wrapImage(*OffloadFiles[I].getBinary(), Twine(I), OffloadKindTag));
|
||||
|
||||
return combineWrappedImages(WrappedImages, OffloadKindTag);
|
||||
}
|
||||
|
||||
void createRegisterFatbinFunction(GlobalVariable *FatbinDesc) {
|
||||
void createRegisterFatbinFunction(Constant *Start, Constant *Size) {
|
||||
FunctionType *FuncTy =
|
||||
FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
|
||||
Function *Func = Function::Create(FuncTy, GlobalValue::InternalLinkage,
|
||||
Twine("sycl") + ".descriptor_reg", &M);
|
||||
Func->setSection(".text.startup");
|
||||
|
||||
// Get RegFuncName function declaration.
|
||||
PointerType *PtrTy = PointerType::getUnqual(C);
|
||||
IntegerType *Int64Ty = Type::getInt64Ty(C);
|
||||
FunctionType *RegFuncTy =
|
||||
FunctionType::get(Type::getVoidTy(C), PointerType::getUnqual(C),
|
||||
FunctionType::get(Type::getVoidTy(C), {PtrTy, Int64Ty},
|
||||
/*isVarArg=*/false);
|
||||
FunctionCallee RegFuncC =
|
||||
M.getOrInsertFunction("__sycl_register_lib", RegFuncTy);
|
||||
|
||||
// Construct function body.
|
||||
IRBuilder Builder(BasicBlock::Create(C, "entry", Func));
|
||||
Builder.CreateCall(RegFuncC, FatbinDesc);
|
||||
IRBuilder<> Builder(BasicBlock::Create(C, "entry", Func));
|
||||
Builder.CreateCall(RegFuncC, {Start, Size});
|
||||
Builder.CreateRetVoid();
|
||||
|
||||
// Add this function to constructors.
|
||||
appendToGlobalCtors(M, Func, /*Priority*/ 1);
|
||||
}
|
||||
|
||||
void createUnregisterFunction(GlobalVariable *FatbinDesc) {
|
||||
void createUnregisterFunction(Constant *Start, Constant *Size) {
|
||||
FunctionType *FuncTy =
|
||||
FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
|
||||
Function *Func = Function::Create(FuncTy, GlobalValue::InternalLinkage,
|
||||
"sycl.descriptor_unreg", &M);
|
||||
Func->setSection(".text.startup");
|
||||
|
||||
// Get UnregFuncName function declaration.
|
||||
PointerType *PtrTy = PointerType::getUnqual(C);
|
||||
IntegerType *Int64Ty = Type::getInt64Ty(C);
|
||||
FunctionType *UnRegFuncTy =
|
||||
FunctionType::get(Type::getVoidTy(C), PointerType::getUnqual(C),
|
||||
FunctionType::get(Type::getVoidTy(C), {PtrTy, Int64Ty},
|
||||
/*isVarArg=*/false);
|
||||
FunctionCallee UnRegFuncC =
|
||||
M.getOrInsertFunction("__sycl_unregister_lib", UnRegFuncTy);
|
||||
|
||||
// Construct function body
|
||||
IRBuilder<> Builder(BasicBlock::Create(C, "entry", Func));
|
||||
Builder.CreateCall(UnRegFuncC, FatbinDesc);
|
||||
Builder.CreateCall(UnRegFuncC, {Start, Size});
|
||||
Builder.CreateRetVoid();
|
||||
|
||||
// Add this function to global destructors.
|
||||
appendToGlobalDtors(M, Func, /*Priority*/ 1);
|
||||
}
|
||||
|
||||
private:
|
||||
IntegerType *getSizeTTy() {
|
||||
switch (M.getDataLayout().getPointerSize()) {
|
||||
case 4:
|
||||
return Type::getInt32Ty(C);
|
||||
case 8:
|
||||
return Type::getInt64Ty(C);
|
||||
}
|
||||
llvm_unreachable("unsupported pointer type size");
|
||||
}
|
||||
|
||||
SmallVector<Constant *, 2> getSizetConstPair(size_t First, size_t Second) {
|
||||
IntegerType *SizeTTy = getSizeTTy();
|
||||
return SmallVector<Constant *, 2>{ConstantInt::get(SizeTTy, First),
|
||||
ConstantInt::get(SizeTTy, Second)};
|
||||
}
|
||||
|
||||
/// Note: Properties aren't supported and the support is going
|
||||
/// to be added later.
|
||||
/// Creates a structure corresponding to:
|
||||
/// SYCL specific image descriptor type.
|
||||
/// \code
|
||||
/// struct __sycl.tgt_device_image {
|
||||
/// // Version of this structure - for backward compatibility;
|
||||
/// // all modifications which change order/type/offsets of existing fields
|
||||
/// // should increment the version.
|
||||
/// uint16_t Version;
|
||||
/// // The kind of offload model the image employs.
|
||||
/// uint8_t OffloadKind;
|
||||
/// // Format of the image data - SPIRV, LLVMIR bitcode, etc.
|
||||
/// uint8_t Format;
|
||||
/// // Null-terminated string representation of the device's target
|
||||
/// // architecture.
|
||||
/// const char *Arch;
|
||||
/// // A null-terminated string; target- and compiler-specific options
|
||||
/// // which are passed to the device compiler at runtime.
|
||||
/// const char *CompileOptions;
|
||||
/// // A null-terminated string; target- and compiler-specific options
|
||||
/// // which are passed to the device linker at runtime.
|
||||
/// const char *LinkOptions;
|
||||
/// // Pointer to the device binary image start.
|
||||
/// void *ImageStart;
|
||||
/// // Pointer to the device binary image end.
|
||||
/// void *ImageEnd;
|
||||
/// // The entry table.
|
||||
/// __tgt_offload_entry *EntriesBegin;
|
||||
/// __tgt_offload_entry *EntriesEnd;
|
||||
/// const char *PropertiesBegin;
|
||||
/// const char *PropertiesEnd;
|
||||
/// };
|
||||
/// \endcode
|
||||
StructType *getSyclDeviceImageTy() {
|
||||
return StructType::create(
|
||||
{
|
||||
Type::getInt16Ty(C), // Version
|
||||
Type::getInt8Ty(C), // OffloadKind
|
||||
Type::getInt8Ty(C), // Format
|
||||
PointerType::getUnqual(C), // Arch
|
||||
PointerType::getUnqual(C), // CompileOptions
|
||||
PointerType::getUnqual(C), // LinkOptions
|
||||
PointerType::getUnqual(C), // ImageStart
|
||||
PointerType::getUnqual(C), // ImageEnd
|
||||
PointerType::getUnqual(C), // EntriesBegin
|
||||
PointerType::getUnqual(C), // EntriesEnd
|
||||
PointerType::getUnqual(C), // PropertiesBegin
|
||||
PointerType::getUnqual(C) // PropertiesEnd
|
||||
},
|
||||
"__sycl.tgt_device_image");
|
||||
}
|
||||
|
||||
/// Creates a structure for SYCL specific binary descriptor type. Corresponds
|
||||
/// to:
|
||||
///
|
||||
/// \code
|
||||
/// struct __sycl.tgt_bin_desc {
|
||||
/// // version of this structure - for backward compatibility;
|
||||
/// // all modifications which change order/type/offsets of existing fields
|
||||
/// // should increment the version.
|
||||
/// uint16_t Version;
|
||||
/// uint16_t NumDeviceImages;
|
||||
/// __sycl.tgt_device_image *DeviceImages;
|
||||
/// // the offload entry table
|
||||
/// __tgt_offload_entry *HostEntriesBegin;
|
||||
/// __tgt_offload_entry *HostEntriesEnd;
|
||||
/// };
|
||||
/// \endcode
|
||||
StructType *getSyclBinDescTy() {
|
||||
return StructType::create(
|
||||
{Type::getInt16Ty(C), Type::getInt16Ty(C), PointerType::getUnqual(C),
|
||||
PointerType::getUnqual(C), PointerType::getUnqual(C)},
|
||||
"__sycl.tgt_bin_desc");
|
||||
}
|
||||
|
||||
/// Adds a global readonly variable that is initialized by given
|
||||
/// \p Initializer to the module.
|
||||
GlobalVariable *addGlobalArrayVariable(const Twine &Name,
|
||||
ArrayRef<char> Initializer,
|
||||
const Twine &Section = "") {
|
||||
Constant *Arr = ConstantDataArray::get(M.getContext(), Initializer);
|
||||
GlobalVariable *Var =
|
||||
new GlobalVariable(M, Arr->getType(), /*isConstant*/ true,
|
||||
GlobalVariable::InternalLinkage, Arr, Name);
|
||||
Var->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
|
||||
|
||||
SmallVector<char, 32> NameBuf;
|
||||
StringRef SectionName = Section.toStringRef(NameBuf);
|
||||
if (!SectionName.empty())
|
||||
Var->setSection(SectionName);
|
||||
return Var;
|
||||
}
|
||||
|
||||
/// Adds given \p Buf as a global variable into the module.
|
||||
/// \returns Pair of pointers that point at the beginning and the end of the
|
||||
/// variable.
|
||||
std::pair<Constant *, Constant *>
|
||||
addArrayToModule(ArrayRef<char> Buf, const Twine &Name,
|
||||
const Twine &Section = "") {
|
||||
GlobalVariable *Var = addGlobalArrayVariable(Name, Buf, Section);
|
||||
Constant *ImageB = ConstantExpr::getGetElementPtr(Var->getValueType(), Var,
|
||||
getSizetConstPair(0, 0));
|
||||
Constant *ImageE = ConstantExpr::getGetElementPtr(
|
||||
Var->getValueType(), Var, getSizetConstPair(0, Buf.size()));
|
||||
return std::make_pair(ImageB, ImageE);
|
||||
}
|
||||
|
||||
/// Adds given \p Data as constant byte array in the module.
|
||||
/// \returns Constant pointer to the added data. The pointer type does not
|
||||
/// carry size information.
|
||||
Constant *addRawDataToModule(ArrayRef<char> Data, const Twine &Name) {
|
||||
GlobalVariable *Var = addGlobalArrayVariable(Name, Data);
|
||||
Constant *DataPtr = ConstantExpr::getGetElementPtr(Var->getValueType(), Var,
|
||||
getSizetConstPair(0, 0));
|
||||
return DataPtr;
|
||||
}
|
||||
|
||||
/// Creates a global variable of const char* type and creates an
|
||||
/// initializer that initializes it with \p Str.
|
||||
///
|
||||
/// \returns Link-time constant pointer (constant expr) to that
|
||||
/// variable.
|
||||
Constant *addStringToModule(StringRef Str, const Twine &Name) {
|
||||
Constant *Arr = ConstantDataArray::getString(C, Str);
|
||||
GlobalVariable *Var =
|
||||
new GlobalVariable(M, Arr->getType(), /*isConstant*/ true,
|
||||
GlobalVariable::InternalLinkage, Arr, Name);
|
||||
Var->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
|
||||
return Var;
|
||||
}
|
||||
|
||||
/// Each image contains its own set of symbols, which may contain different
|
||||
/// symbols than other images. This function constructs an array of
|
||||
/// symbol entries for a particular image.
|
||||
///
|
||||
/// \returns Pointers to the beginning and end of the array.
|
||||
std::pair<Constant *, Constant *>
|
||||
initOffloadEntriesPerImage(StringRef Entries, const Twine &OffloadKindTag) {
|
||||
SmallVector<Constant *> EntriesInits;
|
||||
const char *Current = Entries.data();
|
||||
const char *End = Current + Entries.size();
|
||||
while (Current < End) {
|
||||
StringRef Name(Current);
|
||||
Current += Name.size() + 1;
|
||||
|
||||
if (Name.empty())
|
||||
continue;
|
||||
|
||||
GlobalVariable *GV = emitOffloadingEntry(
|
||||
M, /*Kind*/ OffloadKind::OFK_SYCL,
|
||||
Constant::getNullValue(PointerType::getUnqual(C)), Name, /*Size*/ 0,
|
||||
/*Flags*/ 0, /*Data*/ 0);
|
||||
EntriesInits.push_back(GV->getInitializer());
|
||||
}
|
||||
|
||||
Constant *Arr = ConstantArray::get(
|
||||
ArrayType::get(EntryTy, EntriesInits.size()), EntriesInits);
|
||||
GlobalVariable *EntriesGV = new GlobalVariable(
|
||||
M, Arr->getType(), /*isConstant*/ true, GlobalVariable::InternalLinkage,
|
||||
Arr, OffloadKindTag + "entries_arr");
|
||||
|
||||
Constant *EntriesB = ConstantExpr::getGetElementPtr(
|
||||
EntriesGV->getValueType(), EntriesGV, getSizetConstPair(0, 0));
|
||||
Constant *EntriesE = ConstantExpr::getGetElementPtr(
|
||||
EntriesGV->getValueType(), EntriesGV,
|
||||
getSizetConstPair(0, EntriesInits.size()));
|
||||
return std::make_pair(EntriesB, EntriesE);
|
||||
}
|
||||
|
||||
Constant *wrapImage(const OffloadBinary &OB, const Twine &ImageID,
|
||||
StringRef OffloadKindTag) {
|
||||
// Note: Intel DPC++ compiler had 2 versions of this structure
|
||||
// and clang++ has a third different structure. To avoid ABI incompatibility
|
||||
// between generated device images the Version here starts from 3.
|
||||
constexpr uint16_t DeviceImageStructVersion = 3;
|
||||
Constant *Version =
|
||||
ConstantInt::get(Type::getInt16Ty(C), DeviceImageStructVersion);
|
||||
Constant *OffloadKindConstant = ConstantInt::get(
|
||||
Type::getInt8Ty(C), static_cast<uint8_t>(OB.getOffloadKind()));
|
||||
Constant *ImageKindConstant = ConstantInt::get(
|
||||
Type::getInt8Ty(C), static_cast<uint8_t>(OB.getImageKind()));
|
||||
StringRef Triple = OB.getString("triple");
|
||||
Constant *TripleConstant =
|
||||
addStringToModule(Triple, Twine(OffloadKindTag) + "target." + ImageID);
|
||||
Constant *CompileOptions =
|
||||
addStringToModule(Options.CompileOptions,
|
||||
Twine(OffloadKindTag) + "opts.compile." + ImageID);
|
||||
Constant *LinkOptions = addStringToModule(
|
||||
Options.LinkOptions, Twine(OffloadKindTag) + "opts.link." + ImageID);
|
||||
|
||||
// Note: NULL for now.
|
||||
std::pair<Constant *, Constant *> PropertiesConstants = {
|
||||
Constant::getNullValue(PointerType::getUnqual(C)),
|
||||
Constant::getNullValue(PointerType::getUnqual(C))};
|
||||
|
||||
StringRef RawImage = OB.getImage();
|
||||
std::pair<Constant *, Constant *> Binary = addArrayToModule(
|
||||
ArrayRef<char>(RawImage.begin(), RawImage.end()),
|
||||
Twine(OffloadKindTag) + ImageID + ".data", ".llvm.offloading");
|
||||
|
||||
// For SYCL images offload entries are defined here per image.
|
||||
std::pair<Constant *, Constant *> ImageEntriesPtrs =
|
||||
initOffloadEntriesPerImage(OB.getString("symbols"), OffloadKindTag);
|
||||
|
||||
// .first and .second arguments below correspond to start and end pointers
|
||||
// respectively.
|
||||
Constant *WrappedBinary = ConstantStruct::get(
|
||||
SyclDeviceImageTy, Version, OffloadKindConstant, ImageKindConstant,
|
||||
TripleConstant, CompileOptions, LinkOptions, Binary.first,
|
||||
Binary.second, ImageEntriesPtrs.first, ImageEntriesPtrs.second,
|
||||
PropertiesConstants.first, PropertiesConstants.second);
|
||||
|
||||
return WrappedBinary;
|
||||
}
|
||||
|
||||
GlobalVariable *combineWrappedImages(ArrayRef<Constant *> WrappedImages,
|
||||
StringRef OffloadKindTag) {
|
||||
Constant *ImagesData = ConstantArray::get(
|
||||
ArrayType::get(SyclDeviceImageTy, WrappedImages.size()), WrappedImages);
|
||||
GlobalVariable *ImagesGV =
|
||||
new GlobalVariable(M, ImagesData->getType(), /*isConstant*/ true,
|
||||
GlobalValue::InternalLinkage, ImagesData,
|
||||
Twine(OffloadKindTag) + "device_images");
|
||||
ImagesGV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
|
||||
|
||||
Constant *EntriesB = Constant::getNullValue(PointerType::getUnqual(C));
|
||||
Constant *EntriesE = Constant::getNullValue(PointerType::getUnqual(C));
|
||||
static constexpr uint16_t BinDescStructVersion = 1;
|
||||
Constant *DescInit = ConstantStruct::get(
|
||||
SyclBinDescTy,
|
||||
ConstantInt::get(Type::getInt16Ty(C), BinDescStructVersion),
|
||||
ConstantInt::get(Type::getInt16Ty(C), WrappedImages.size()), ImagesGV,
|
||||
EntriesB, EntriesE);
|
||||
|
||||
return new GlobalVariable(M, DescInit->getType(), /*isConstant*/ true,
|
||||
GlobalValue::InternalLinkage, DescInit,
|
||||
Twine(OffloadKindTag) + "descriptor");
|
||||
}
|
||||
|
||||
Module &M;
|
||||
LLVMContext &C;
|
||||
SYCLJITOptions Options;
|
||||
|
||||
StructType *EntryTy = nullptr;
|
||||
StructType *SyclDeviceImageTy = nullptr;
|
||||
StructType *SyclBinDescTy = nullptr;
|
||||
}; // end of SYCLWrapper
|
||||
|
||||
} // namespace
|
||||
@@ -1053,18 +744,8 @@ Error offloading::wrapHIPBinary(Module &M, ArrayRef<char> Image,
|
||||
Error llvm::offloading::wrapSYCLBinaries(llvm::Module &M, ArrayRef<char> Buffer,
|
||||
SYCLJITOptions Options) {
|
||||
SYCLWrapper W(M, Options);
|
||||
MemoryBufferRef MBR(StringRef(Buffer.begin(), Buffer.size()),
|
||||
/*Identifier*/ "");
|
||||
SmallVector<OffloadFile> OffloadFiles;
|
||||
if (Error E = extractOffloadBinaries(MBR, OffloadFiles))
|
||||
return E;
|
||||
|
||||
GlobalVariable *Desc = W.createFatbinDesc(OffloadFiles);
|
||||
if (!Desc)
|
||||
return createStringError(inconvertibleErrorCode(),
|
||||
"No binary descriptors created.");
|
||||
|
||||
W.createRegisterFatbinFunction(Desc);
|
||||
W.createUnregisterFunction(Desc);
|
||||
auto [Start, Size] = W.embedBinary(Buffer);
|
||||
W.createRegisterFatbinFunction(Start, Size);
|
||||
W.createUnregisterFunction(Start, Size);
|
||||
return Error::success();
|
||||
}
|
||||
|
||||
@@ -23,6 +23,7 @@
|
||||
|
||||
using namespace llvm;
|
||||
using namespace llvm::offloading;
|
||||
using namespace llvm::offloading::sycl;
|
||||
|
||||
StructType *offloading::getEntryTy(Module &M) {
|
||||
LLVMContext &C = M.getContext();
|
||||
@@ -424,3 +425,31 @@ Error offloading::intel::containerizeOpenMPSPIRVImage(
|
||||
object::OffloadKind::OFK_OpenMP, /*ImageFlags=*/0,
|
||||
MetaData);
|
||||
}
|
||||
|
||||
void sycl::writeSymbolTable(ArrayRef<StringRef> Names, SmallString<0> &Out) {
|
||||
uint32_t Count = Names.size();
|
||||
|
||||
// Compute the byte offset where string data begins: right after the header
|
||||
// and the entry array.
|
||||
uint32_t StringDataOffset =
|
||||
sizeof(SymbolTableHeader) + Count * sizeof(SymbolTableEntry);
|
||||
|
||||
// Pre-size the output to hold the header and entry array; string data is
|
||||
// appended below.
|
||||
Out.resize(StringDataOffset);
|
||||
|
||||
// Write the header.
|
||||
auto *Header = reinterpret_cast<SymbolTableHeader *>(Out.data());
|
||||
Header->Count = Count;
|
||||
|
||||
// Write each entry and append the corresponding null-terminated name.
|
||||
auto *Entries = reinterpret_cast<SymbolTableEntry *>(Header + 1);
|
||||
uint32_t CurrentOffset = StringDataOffset;
|
||||
for (uint32_t I = 0; I < Count; ++I) {
|
||||
Entries[I].OffsetToSymbol = CurrentOffset;
|
||||
Entries[I].SymbolSize = Names[I].size();
|
||||
Out.append(Names[I]);
|
||||
Out.push_back('\0');
|
||||
CurrentOffset += Names[I].size() + 1;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user