[Offload][libsycl][clang-sycl-linker] Simplify SYCL Offload wrapping (#193876)

Replace the __sycl_tgt_bin_desc/__sycl_tgt_device_image-based fat binary
registration with a simpler OffloadBinary-native approach:

- __sycl_register_lib/__sycl_unregister_lib now take (BinaryStart, Size)
instead of a __sycl_tgt_bin_desc pointer; __sycl_unregister_lib only
needs BinaryStart since the runtime looks up the binary by its start
address.
- OffloadWrapper's SYCL wrapping is significantly simplified: the
__tgt_bin_desc/__tgt_device_image structs and the descriptor
construction code are replaced by a single embedded OffloadBinary blob
passed directly to the register/unregister entry points.
- clang-sycl-linker generates a single OffloadBinary, which contains
multiple images.
- ProgramAndKernelManager::registerFatBin parses the blob via
OffloadBinary::create, keying MDeviceImageManagers by BinaryStart to
eliminate the reparse on unregister.
- DeviceImageManager owns std::unique_ptr<OffloadBinary> instead of
borrowing a __sycl_tgt_device_image pointer; getRawData renamed to
getOffloadBinary.
- Introduce forEachSymbol and SymbolTable serialization
(writeSymbolTable/ SymbolTableHeader/SymbolTableEntry) in Utility.h to
expose the kernel symbol table embedded in the OffloadBinary "symbols"
section for consumption by the SYCL runtime.
- device_binary_structures.hpp reduced to DeviceBinaryTripleSPIRV64.

Co-Authored-By: Claude
This commit is contained in:
Yury Plyakhin
2026-04-27 14:43:33 -07:00
committed by GitHub
parent 1a805ace43
commit b862554a70
10 changed files with 229 additions and 559 deletions

View File

@@ -273,27 +273,16 @@
// RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-linux-gnu -r \
// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=SYCL
// SYCL: %__sycl.tgt_device_image = type { i16, i8, i8, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr }
// SYCL-NEXT: %__sycl.tgt_bin_desc = type { i16, i16, ptr, ptr, ptr }
// SYCL: @.sycl_offloading.target.0 = internal unnamed_addr constant [1 x i8] zeroinitializer
// SYCL-NEXT: @.sycl_offloading.opts.compile.0 = internal unnamed_addr constant [1 x i8] zeroinitializer
// SYCL-NEXT: @.sycl_offloading.opts.link.0 = internal unnamed_addr constant [1 x i8] zeroinitializer
// SYCL-NEXT: @.sycl_offloading.0.data = internal unnamed_addr constant [0 x i8] zeroinitializer, section ".llvm.offloading"
// SYCL-NEXT: @.offloading.entry_name = internal unnamed_addr constant [5 x i8] c"stub\00", section ".llvm.rodata.offloading", align 1
// SYCL-NEXT: @.offloading.entry.stub = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 8, i32 0, ptr null, ptr @.offloading.entry_name, i64 0, i64 0, ptr null }, section "llvm_offload_entries", align 8
// SYCL-NEXT: @.sycl_offloading.entries_arr = internal constant [1 x %struct.__tgt_offload_entry] [%struct.__tgt_offload_entry { i64 0, i16 1, i16 8, i32 0, ptr null, ptr @.offloading.entry_name, i64 0, i64 0, ptr null }]
// SYCL-NEXT: @.sycl_offloading.device_images = internal unnamed_addr constant [1 x %__sycl.tgt_device_image] [%__sycl.tgt_device_image { i16 3, i8 8, i8 0, ptr @.sycl_offloading.target.0, ptr @.sycl_offloading.opts.compile.0, ptr @.sycl_offloading.opts.link.0, ptr @.sycl_offloading.0.data, ptr @.sycl_offloading.0.data, ptr @.sycl_offloading.entries_arr, ptr getelementptr ([1 x %struct.__tgt_offload_entry], ptr @.sycl_offloading.entries_arr, i64 0, i64 1), ptr null, ptr null }]
// SYCL-NEXT: @.sycl_offloading.descriptor = internal constant %__sycl.tgt_bin_desc { i16 1, i16 1, ptr @.sycl_offloading.device_images, ptr null, ptr null }
// SYCL: @.sycl_offloading.binary = internal unnamed_addr constant [[[SIZE:[0-9]+]] x i8] c"{{.*}}", section ".llvm.offloading"
// SYCL: define internal void @sycl.descriptor_reg() section ".text.startup" {
// SYCL-NEXT: entry:
// SYCL-NEXT: call void @__sycl_register_lib(ptr @.sycl_offloading.descriptor)
// SYCL-NEXT: call void @__sycl_register_lib(ptr @.sycl_offloading.binary, i64 [[SIZE]])
// SYCL-NEXT: ret void
// SYCL-NEXT: }
// SYCL: define internal void @sycl.descriptor_unreg() section ".text.startup" {
// SYCL-NEXT: entry:
// SYCL-NEXT: call void @__sycl_unregister_lib(ptr @.sycl_offloading.descriptor)
// SYCL-NEXT: call void @__sycl_unregister_lib(ptr @.sycl_offloading.binary, i64 [[SIZE]])
// SYCL-NEXT: ret void
// SYCL-NEXT: }

View File

@@ -22,6 +22,7 @@
#include "llvm/BinaryFormat/Magic.h"
#include "llvm/Bitcode/BitcodeWriter.h"
#include "llvm/CodeGen/CommandFlags.h"
#include "llvm/Frontend/Offloading/Utility.h"
#include "llvm/IR/DiagnosticPrinter.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IRReader/IRReader.h"
@@ -35,6 +36,7 @@
#include "llvm/Option/OptTable.h"
#include "llvm/Option/Option.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileOutputBuffer.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/InitLLVM.h"
@@ -487,14 +489,14 @@ Error runSYCLLink(ArrayRef<std::string> Files, const ArgList &Args) {
if (!ModOrErr)
return ModOrErr.takeError();
SmallString<0> SymbolData;
SmallVector<StringRef> KernelNames;
for (Function &F : **ModOrErr) {
// TODO: Consider using LLVM-IR metadata to identify globals of interest
if (F.hasKernelCallingConv()) {
SymbolData.append(F.getName());
SymbolData.push_back('\0');
}
if (F.hasKernelCallingConv())
KernelNames.push_back(F.getName());
}
SmallString<0> SymbolData;
llvm::offloading::sycl::writeSymbolTable(KernelNames, SymbolData);
SymbolTable.emplace_back(std::move(SymbolData));
}
@@ -520,13 +522,11 @@ Error runSYCLLink(ArrayRef<std::string> Files, const ArgList &Args) {
}
}
// Write the final output into file.
int FD = -1;
if (std::error_code EC = sys::fs::openFileForWrite(OutputFile, FD))
return errorCodeToError(EC);
llvm::raw_fd_ostream FS(FD, /*shouldClose=*/true);
// Collect all images to be packed into a single OffloadBinary.
SmallVector<OffloadingImage> Images;
for (size_t I = 0, E = SplitModules.size(); I != E; ++I) {
if (SymbolTable[I].empty())
continue;
auto File = SplitModules[I];
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> FileOrErr =
llvm::MemoryBuffer::getFileOrSTDIN(File);
@@ -545,13 +545,18 @@ Error runSYCLLink(ArrayRef<std::string> Files, const ArgList &Args) {
Args.MakeArgString(Args.getLastArgValue(OPT_arch_EQ));
TheImage.StringData["symbols"] = SymbolTable[I];
TheImage.Image = std::move(*FileOrErr);
Images.emplace_back(std::move(TheImage));
}
llvm::SmallString<0> Buffer = OffloadBinary::write(TheImage);
llvm::SmallString<0> Buffer = OffloadBinary::write(Images);
if (Buffer.size() % OffloadBinary::getAlignment() != 0)
return createStringError("Offload binary has invalid size alignment");
FS << Buffer;
}
return Error::success();
auto OutputOrErr = FileOutputBuffer::create(OutputFile, Buffer.size());
if (!OutputOrErr)
return OutputOrErr.takeError();
llvm::copy(Buffer, (*OutputOrErr)->getBufferStart());
return (*OutputOrErr)->commit();
}
} // namespace

View File

@@ -17,11 +17,6 @@
#include <sycl/__impl/detail/config.hpp>
#include <llvm/Frontend/Offloading/Utility.h>
#include <llvm/Object/OffloadBinary.h>
#include <cstdint>
_LIBSYCL_BEGIN_NAMESPACE_SYCL
namespace detail {
@@ -35,60 +30,6 @@ namespace detail {
/// SPIR-V with 64-bit pointers.
static constexpr char DeviceBinaryTripleSPIRV64[] = "spirv64-unknown-unknown";
/// Device binary descriptor version supported by this library.
static constexpr uint16_t SupportedDeviceBinaryVersion = 3;
/// This struct is a record of the device binary information.
/// It must match the __tgt_device_image structure generated by the
/// compiler when their `Version` fields match.
struct __sycl_tgt_device_image {
uint16_t Version;
/// The type of offload model the binary employs. See `OffloadKind`. Only
/// OFK_SYCL is supported by libsycl.
uint8_t OffloadKind;
/// Format of the binary data, see `ImageKind`.
uint8_t ImageFormat;
/// A null-terminated string representation of the device's target
/// architecture. Must hold one of _LIBSYCL_DEVICE_BINARY_TARGET_* values.
const char *TripleString;
/// A null-terminated string of target- and compiler-specific options
/// that are suggested to use to "compile" program at runtime.
const char *CompileOptions;
/// A null-terminated string of target- and compiler-specific options
/// that are suggested to use to "link" program at runtime.
const char *LinkOptions;
/// Pointer to the target code start.
const unsigned char *ImageStart;
/// Pointer to the target code end.
const unsigned char *ImageEnd;
/// The offload entry table
llvm::offloading::EntryTy *EntriesBegin;
llvm::offloading::EntryTy *EntriesEnd;
// TODO: properties are not supported now.
/// Array of property sets.
void *PropertiesBegin;
void *PropertiesEnd;
};
/// Version of offload binaries descriptor `__sycl_tgt_bin_desc` supported by
/// libsycl.
static constexpr uint16_t SupportedOffloadBinaryVersion = 1;
/// This struct is a record of all the device code that may be offloaded.
/// It must match the `__tgt_bin_desc` structure generated by
/// the compiler when their `Version` fields match.
struct __sycl_tgt_bin_desc {
/// Version of the structure.
uint16_t Version;
/// Number of device binaries in this descriptor.
uint16_t NumDeviceBinaries;
/// Device binaries data.
__sycl_tgt_device_image *DeviceImages;
/// The offload entry table (not used, for compatibility with OpenMP).
llvm::offloading::EntryTy *HostEntriesBegin;
llvm::offloading::EntryTy *HostEntriesEnd;
};
} // namespace detail
_LIBSYCL_END_NAMESPACE_SYCL

View File

@@ -17,8 +17,8 @@ ProgramWrapper::ProgramWrapper(ol_device_handle_t Device,
DeviceImageManager &DevImage) {
assert(Device);
callAndThrow(olCreateProgram, Device, DevImage.getRawData().ImageStart,
DevImage.getSize(), &MProgram);
llvm::StringRef Image = DevImage.getOffloadBinary().getImage();
callAndThrow(olCreateProgram, Device, Image.data(), Image.size(), &MProgram);
}
ProgramWrapper::~ProgramWrapper() {

View File

@@ -17,10 +17,11 @@
#include <sycl/__impl/detail/config.hpp>
#include <detail/device_binary_structures.hpp>
#include <llvm/Object/OffloadBinary.h>
#include <OffloadAPI.h>
#include <memory>
#include <unordered_map>
_LIBSYCL_BEGIN_NAMESPACE_SYCL
@@ -35,8 +36,7 @@ public:
/// provided arguments.
///
/// \param Device is the device to use for program creation.
/// \param DevImage is the device image (wrapped __sycl_tgt_device_image) to
/// use for program creation.
/// \param DevImage is the device image to use for program creation.
/// \throw sycl::exception with sycl::errc::runtime when failed to create the
/// program.
ProgramWrapper(ol_device_handle_t Device, DeviceImageManager &DevImage);
@@ -61,7 +61,8 @@ private:
/// creation.
class DeviceImageManager {
public:
DeviceImageManager(const __sycl_tgt_device_image &Bin) : MBin(&Bin) {}
DeviceImageManager(std::unique_ptr<llvm::object::OffloadBinary> Bin)
: MBin(std::move(Bin)) {}
// Explicitly delete copy constructor/operator= to avoid unintentional copies.
DeviceImageManager(const DeviceImageManager &) = delete;
DeviceImageManager &operator=(const DeviceImageManager &) = delete;
@@ -71,14 +72,8 @@ public:
~DeviceImageManager() = default;
/// \return a reference to the corresponding raw __sycl_tgt_device_image
/// object.
const __sycl_tgt_device_image &getRawData() const { return *get(); }
/// \return the size of the corresponding device image data in bytes.
size_t getSize() const {
return static_cast<size_t>(MBin->ImageEnd - MBin->ImageStart);
}
/// \return a reference to the corresponding parsed OffloadBinary object.
const llvm::object::OffloadBinary &getOffloadBinary() const { return *MBin; }
/// Returns a liboffload program which is compatible with the specified
/// device. Searches among existing programs and creates a new one if no
@@ -92,9 +87,7 @@ public:
protected:
std::unordered_map<ol_device_handle_t, ProgramWrapper> MPrograms;
const __sycl_tgt_device_image *get() const { return MBin; }
__sycl_tgt_device_image const *MBin{};
std::unique_ptr<llvm::object::OffloadBinary> MBin;
};
} // namespace detail

View File

@@ -13,113 +13,96 @@
#include <detail/device_impl.hpp>
#include <detail/offload/offload_utils.hpp>
#include <cstring>
#include <llvm/Frontend/Offloading/Utility.h>
_LIBSYCL_BEGIN_NAMESPACE_SYCL
namespace detail {
static inline bool checkFatBinVersion(const __sycl_tgt_bin_desc &FatbinDesc) {
return FatbinDesc.Version == SupportedOffloadBinaryVersion;
}
static inline bool
checkDeviceImageValidity(const __sycl_tgt_device_image &DeviceImage) {
return (DeviceImage.Version == SupportedDeviceBinaryVersion) &&
(DeviceImage.OffloadKind == llvm::object::OFK_SYCL) &&
(DeviceImage.ImageFormat == llvm::object::IMG_SPIRV);
checkDeviceImageValidity(const llvm::object::OffloadBinary &OB) {
return (OB.getOffloadKind() == llvm::object::OFK_SYCL) &&
(OB.getImageKind() == llvm::object::IMG_SPIRV);
}
void ProgramAndKernelManager::registerFatBin(__sycl_tgt_bin_desc *FatbinDesc) {
assert(FatbinDesc && "Device images descriptor can't be nullptr");
void ProgramAndKernelManager::registerFatBin(const void *BinaryStart,
size_t Size) {
assert(BinaryStart && "Binary pointer can't be nullptr");
if (!checkFatBinVersion(*FatbinDesc))
llvm::MemoryBufferRef MBR(
llvm::StringRef(static_cast<const char *>(BinaryStart), Size),
/*Identifier=*/"");
auto BinOrErr = llvm::object::OffloadBinary::create(MBR);
if (!BinOrErr || BinOrErr->empty())
throw sycl::exception(sycl::make_error_code(sycl::errc::runtime),
"Incompatible version of device images descriptor.");
if (!FatbinDesc->NumDeviceBinaries)
return;
"Failed to parse OffloadBinary");
DeviceImageManagerVec Images;
Images.reserve(BinOrErr->size());
std::lock_guard<std::mutex> Guard(MDataCollectionMutex);
for (uint16_t I = 0; I < FatbinDesc->NumDeviceBinaries; ++I) {
const auto &RawDeviceImage = FatbinDesc->DeviceImages[I];
if (!checkDeviceImageValidity(RawDeviceImage))
for (std::unique_ptr<llvm::object::OffloadBinary> &OB : *BinOrErr) {
if (!checkDeviceImageValidity(*OB))
throw sycl::exception(sycl::make_error_code(sycl::errc::runtime),
"Incompatible device image.");
const llvm::offloading::EntryTy *EntriesB = RawDeviceImage.EntriesBegin;
const llvm::offloading::EntryTy *EntriesE = RawDeviceImage.EntriesEnd;
// Ignore "empty" device image.
if (EntriesB == EntriesE)
continue;
llvm::StringRef Symbols = OB->getString("symbols");
std::unique_ptr<DeviceImageManager> NewImageWrapper =
std::make_unique<DeviceImageManager>(RawDeviceImage);
for (auto EntriesIt = EntriesB; EntriesIt != EntriesE; ++EntriesIt) {
auto Name = EntriesIt->SymbolName;
Images.push_back(std::make_unique<DeviceImageManager>(std::move(OB)));
DeviceImageManager &NewImageWrapper = *Images.back();
llvm::offloading::sycl::forEachSymbol(Symbols, [&](llvm::StringRef Name) {
auto It = MDeviceKernelInfoMap.find(std::string_view(Name));
if (It == MDeviceKernelInfoMap.end()) {
[[maybe_unused]] auto [Iterator, EmplaceSucceeded] =
MDeviceKernelInfoMap.emplace(
std::piecewise_construct, std::forward_as_tuple(Name),
std::forward_as_tuple(Name, *NewImageWrapper));
std::piecewise_construct,
std::forward_as_tuple(std::string_view(Name)),
std::forward_as_tuple(std::string_view(Name), NewImageWrapper));
assert(EmplaceSucceeded && "Kernel name found in multiple images");
}
});
}
MDeviceImageManagers.insert(
std::make_pair(&RawDeviceImage, std::move(NewImageWrapper)));
}
[[maybe_unused]] auto [It, Inserted] =
MDeviceImageManagers.emplace(BinaryStart, std::move(Images));
assert(Inserted && "Fat binary registered twice");
}
void ProgramAndKernelManager::unregisterFatBin(
__sycl_tgt_bin_desc *FatbinDesc) {
assert(FatbinDesc && "Device images descriptor can't be nullptr");
if (!checkFatBinVersion(*FatbinDesc) || FatbinDesc->NumDeviceBinaries == 0)
return;
void ProgramAndKernelManager::unregisterFatBin(const void *BinaryStart,
size_t /*Size*/) {
assert(BinaryStart && "Binary pointer can't be nullptr");
std::lock_guard<std::mutex> Guard(MDataCollectionMutex);
for (uint16_t I = 0; I < FatbinDesc->NumDeviceBinaries; ++I) {
const auto &RawDeviceImage = FatbinDesc->DeviceImages[I];
auto It = MDeviceImageManagers.find(BinaryStart);
if (It == MDeviceImageManagers.end())
return;
auto DevImageIt = MDeviceImageManagers.find(&RawDeviceImage);
if (DevImageIt == MDeviceImageManagers.end())
continue;
const llvm::offloading::EntryTy *EntriesB = RawDeviceImage.EntriesBegin;
const llvm::offloading::EntryTy *EntriesE = RawDeviceImage.EntriesEnd;
// Ignore "empty" device image
if (EntriesB == EntriesE)
continue;
for (auto EntriesIt = EntriesB; EntriesIt != EntriesE; ++EntriesIt) {
if (auto KernelIt = MDeviceKernelInfoMap.find(EntriesIt->SymbolName);
for (auto &Image : It->second) {
llvm::StringRef Symbols = Image->getOffloadBinary().getString("symbols");
llvm::offloading::sycl::forEachSymbol(Symbols, [&](llvm::StringRef Name) {
if (auto KernelIt = MDeviceKernelInfoMap.find(std::string_view(Name));
KernelIt != MDeviceKernelInfoMap.end()) {
// Programs are attached to image and will be released with image
// Programs are attached to the image and will be released with image
// destruction. Clear only kernel specific data by destroying its kernel
// info object.
MDeviceKernelInfoMap.erase(KernelIt);
}
});
}
MDeviceImageManagers.erase(DevImageIt);
}
MDeviceImageManagers.erase(It);
}
static bool isImageCompatible(const DeviceImageManager &Image,
const DeviceImpl &Device) {
sycl::backend BE = Device.getBackend();
const char *Target = Image.getRawData().TripleString;
if (!(strcmp(Target, DeviceBinaryTripleSPIRV64) == 0 &&
BE == sycl::backend::level_zero))
const llvm::object::OffloadBinary &OB = Image.getOffloadBinary();
if (!(OB.getTriple() == DeviceBinaryTripleSPIRV64 &&
Device.getBackend() == sycl::backend::level_zero))
return false;
bool IsValid{};
callAndThrow(olIsValidBinary, Device.getOLHandle(),
Image.getRawData().ImageStart, Image.getSize(), &IsValid);
llvm::StringRef ImageBytes = OB.getImage();
callAndThrow(olIsValidBinary, Device.getOLHandle(), ImageBytes.data(),
ImageBytes.size(), &IsValid);
return IsValid;
}
@@ -152,14 +135,14 @@ ProgramAndKernelManager::getOrCreateKernel(DeviceKernelInfo &KernelInfo,
} // namespace detail
_LIBSYCL_END_NAMESPACE_SYCL
extern "C" _LIBSYCL_EXPORT void
__sycl_register_lib(sycl::detail::__sycl_tgt_bin_desc *FatbinDesc) {
extern "C" _LIBSYCL_EXPORT void __sycl_register_lib(const void *BinaryStart,
size_t Size) {
sycl::detail::ProgramAndKernelManager::getInstance().registerFatBin(
FatbinDesc);
BinaryStart, Size);
}
extern "C" _LIBSYCL_EXPORT void
__sycl_unregister_lib(sycl::detail::__sycl_tgt_bin_desc *FatbinDesc) {
extern "C" _LIBSYCL_EXPORT void __sycl_unregister_lib(const void *BinaryStart,
size_t Size) {
sycl::detail::ProgramAndKernelManager::getInstance().unregisterFatBin(
FatbinDesc);
BinaryStart, Size);
}

View File

@@ -21,23 +21,32 @@
#include <detail/device_image_wrapper.hpp>
#include <detail/device_kernel_info.hpp>
#include <llvm/Object/OffloadBinary.h>
#include <OffloadAPI.h>
#include <mutex>
#include <unordered_map>
#include <vector>
// +++ Entry points referenced by the offload wrapper object {
/// Executed as a part of a module's (.exe, .dll) static initialization.
/// Registers device executable images with the runtime.
extern "C" _LIBSYCL_EXPORT void
__sycl_register_lib(sycl::detail::__sycl_tgt_bin_desc *FatbinDesc);
/// \param BinaryStart pointer to the start of the OffloadBinary.
/// \param Size size in bytes of the OffloadBinary.
extern "C" _LIBSYCL_EXPORT void __sycl_register_lib(const void *BinaryStart,
size_t Size);
/// Executed as a part of current module's (.exe, .dll) static
/// de-initialization.
/// Unregisters device executable images with the runtime.
extern "C" _LIBSYCL_EXPORT void
__sycl_unregister_lib(sycl::detail::__sycl_tgt_bin_desc *FatbinDesc);
/// \param BinaryStart pointer to the start of the OffloadBinary.
/// \param Size size in bytes of the OffloadBinary.
/// BinaryStart and Size must match the values passed to the corresponding
/// __sycl_register_lib call.
extern "C" _LIBSYCL_EXPORT void __sycl_unregister_lib(const void *BinaryStart,
size_t Size);
// +++ }
@@ -56,20 +65,16 @@ public:
return PM;
}
/// Parses raw device images data and prepares internal structures for
/// effective kernel/program creation.
/// \param FatbinDesc a record of all the device code that may be offloaded,
/// generated by compiler and offloading tools.
/// \throw sycl::exception with sycl::errc::runtime if a device image
/// descriptor has an incompatible version or if a device image has an
/// incompatible version, target or kind.
void registerFatBin(__sycl_tgt_bin_desc *FatbinDesc);
/// Parses the OffloadBinary of the given Size starting at BinaryStart and
/// prepares internal structures for effective kernel/program creation.
/// \throw sycl::exception with sycl::errc::runtime if parsing fails or if
/// the binary has an incompatible kind or target.
void registerFatBin(const void *BinaryStart, size_t Size);
/// Removes all entries of the data in FatbinDesc from internal structures.
/// \param FatbinDesc a record of all the device code that may be offloaded,
/// generated by compiler and offloading tools. Must match the pointer and
/// data passed to registerFatBin.
void unregisterFatBin(__sycl_tgt_bin_desc *FatbinDesc);
/// Removes all entries associated with the fat binary that was previously
/// passed to registerFatBin. BinaryStart and Size must match the values
/// passed to the corresponding registerFatBin call.
void unregisterFatBin(const void *BinaryStart, size_t Size);
/// Creates a liboffload kernel that is ready for execution.
/// This method is thread-safe.
@@ -92,9 +97,14 @@ private:
// by caching the pointers when possible.
std::unordered_map<std::string_view, DeviceKernelInfo> MDeviceKernelInfoMap;
// Controls lifetime of device images.
std::unordered_map<const __sycl_tgt_device_image *,
std::unique_ptr<DeviceImageManager>>
// Keyed by BinaryStart (register/unregister param). Each fat binary can
// contain multiple device images, each owned by its own DeviceImageManager.
// Controls lifetime of device image managers and, through them, parsed
// OffloadBinary objects.
using BinaryStartKey = const void *;
using DeviceImageManagerVec =
std::vector<std::unique_ptr<DeviceImageManager>>;
std::unordered_map<BinaryStartKey, DeviceImageManagerVec>
MDeviceImageManagers;
// All work with device images and data related to it must be wrapped with a

View File

@@ -13,6 +13,8 @@
#include <cstdint>
#include <memory>
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/Module.h"
@@ -176,6 +178,43 @@ LLVM_ABI Error containerizeImage(std::unique_ptr<MemoryBuffer> &Binary,
int32_t ImageFlags,
MapVector<StringRef, StringRef> &MetaData);
namespace sycl {
/// Serialized symbol table stored in the "symbols" entry of a SYCL
/// OffloadBinary. The in-memory layout of the blob is:
/// [ SymbolTableHeader ]
/// [ SymbolTableEntry Entries[N] ] -- N == Header.Count
/// [ char StringData[] ] -- packed null-terminated names
/// Use writeSymbolTable() to produce the blob and forEachSymbol() to consume
/// it; both encapsulate all pointer arithmetic.
struct SymbolTableHeader {
uint32_t Count; ///< Number of symbol entries.
};
struct SymbolTableEntry {
uint32_t OffsetToSymbol; ///< Byte offset from blob start to the symbol name.
uint32_t SymbolSize; ///< Length of the symbol name in bytes, excluding
///< the null terminator.
};
/// Serialize \p Names into \p Out.
LLVM_ABI void writeSymbolTable(ArrayRef<StringRef> Names, SmallString<0> &Out);
/// Invoke \p Callback with a \c StringRef for each symbol in \p Symbols,
/// the raw serialized symbol-table blob.
template <typename Fn> void forEachSymbol(StringRef Symbols, Fn &&Callback) {
assert(Symbols.size() >= sizeof(SymbolTableHeader) &&
"symbols blob smaller than header");
const char *Base = Symbols.data();
const auto &Header = *reinterpret_cast<const SymbolTableHeader *>(Base);
const auto *Entries = reinterpret_cast<const SymbolTableEntry *>(&Header + 1);
for (uint32_t I = 0; I < Header.Count; ++I)
Callback(
StringRef(Base + Entries[I].OffsetToSymbol, Entries[I].SymbolSize));
}
} // namespace sycl
namespace intel {
/// Containerizes an OpenMP SPIR-V image into an OffloadBinary image.
/// \param Binary The SPIR-V binary to containerize.

View File

@@ -91,9 +91,9 @@ PointerType *getBinDescPtrTy(Module &M) {
/// library. It is defined as follows
///
/// __attribute__((visibility("hidden")))
/// extern __tgt_offload_entry *__start_omp_offloading_entries;
/// extern __tgt_offload_entry *__start_llvm_offload_entries;
/// __attribute__((visibility("hidden")))
/// extern __tgt_offload_entry *__stop_omp_offloading_entries;
/// extern __tgt_offload_entry *__stop_llvm_offload_entries;
///
/// static const char Image0[] = { <Bufs.front() contents> };
/// ...
@@ -103,23 +103,23 @@ PointerType *getBinDescPtrTy(Module &M) {
/// {
/// Image0, /*ImageStart*/
/// Image0 + sizeof(Image0), /*ImageEnd*/
/// __start_omp_offloading_entries, /*EntriesBegin*/
/// __stop_omp_offloading_entries /*EntriesEnd*/
/// __start_llvm_offload_entries, /*EntriesBegin*/
/// __stop_llvm_offload_entries /*EntriesEnd*/
/// },
/// ...
/// {
/// ImageN, /*ImageStart*/
/// ImageN + sizeof(ImageN), /*ImageEnd*/
/// __start_omp_offloading_entries, /*EntriesBegin*/
/// __stop_omp_offloading_entries /*EntriesEnd*/
/// __start_llvm_offload_entries, /*EntriesBegin*/
/// __stop_llvm_offload_entries /*EntriesEnd*/
/// }
/// };
///
/// static const __tgt_bin_desc BinDesc = {
/// sizeof(Images) / sizeof(Images[0]), /*NumDeviceImages*/
/// Images, /*DeviceImages*/
/// __start_omp_offloading_entries, /*HostEntriesBegin*/
/// __stop_omp_offloading_entries /*HostEntriesEnd*/
/// __start_llvm_offload_entries, /*HostEntriesBegin*/
/// __stop_llvm_offload_entries /*HostEntriesEnd*/
/// };
///
/// Global variable that represents BinDesc is returned.
@@ -629,384 +629,75 @@ void createRegisterFatbinFunction(Module &M, GlobalVariable *FatbinDesc,
class SYCLWrapper {
public:
SYCLWrapper(Module &M, const SYCLJITOptions &Options)
: M(M), C(M.getContext()), Options(Options) {
EntryTy = offloading::getEntryTy(M);
SyclDeviceImageTy = getSyclDeviceImageTy();
SyclBinDescTy = getSyclBinDescTy();
: M(M), C(M.getContext()), Options(Options) {}
/// Embeds \p Buffer (a raw OffloadBinary) as a global constant and returns
/// a pair of (Start, Size), where Start points to the beginning of the
/// embedded data and Size is its length in bytes.
std::pair<Constant *, Constant *> embedBinary(ArrayRef<char> Buffer) {
Constant *Arr = ConstantDataArray::get(C, Buffer);
GlobalVariable *BinaryGV = new GlobalVariable(
M, Arr->getType(), /*isConstant=*/true, GlobalValue::InternalLinkage,
Arr, ".sycl_offloading.binary");
BinaryGV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
BinaryGV->setSection(".llvm.offloading");
IntegerType *Int64Ty = Type::getInt64Ty(C);
Constant *Zero = ConstantInt::get(Int64Ty, 0);
Constant *Size = ConstantInt::get(Int64Ty, Buffer.size());
Constant *Start = ConstantExpr::getGetElementPtr(
BinaryGV->getValueType(), BinaryGV, ArrayRef<Constant *>{Zero, Zero});
return {Start, Size};
}
/// Creates binary descriptor for the given device images. Binary descriptor
/// is an object that is passed to the offloading runtime at program startup
/// and it describes all device images available in the executable or shared
/// library. It is defined as follows:
///
/// \code
/// __attribute__((visibility("hidden")))
/// __tgt_offload_entry *__sycl_offload_entries_arr0[];
/// ...
/// __attribute__((visibility("hidden")))
/// __tgt_offload_entry *__sycl_offload_entries_arrN[];
///
/// __attribute__((visibility("hidden")))
/// extern const char *CompileOptions = "...";
/// ...
/// __attribute__((visibility("hidden")))
/// extern const char *LinkOptions = "...";
/// ...
///
/// static const char Image0[] = { ... };
/// ...
/// static const char ImageN[] = { ... };
///
/// static const __sycl.tgt_device_image Images[] = {
/// {
/// Version, // Version
/// OffloadKind, // OffloadKind
/// Format, // Format of the image.
// TripleString, // Arch
/// CompileOptions, // CompileOptions
/// LinkOptions, // LinkOptions
/// Image0, // ImageStart
/// Image0 + IMAGE0_SIZE, // ImageEnd
/// __sycl_offload_entries_arr0, // EntriesBegin
/// __sycl_offload_entries_arr0 + ENTRIES0_SIZE, // EntriesEnd
/// NULL, // PropertiesBegin
/// NULL, // PropertiesEnd
/// },
/// ...
/// };
///
/// static const __sycl.tgt_bin_desc FatbinDesc = {
/// Version, //Version
/// sizeof(Images) / sizeof(Images[0]), //NumDeviceImages
/// Images, //DeviceImages
/// NULL, //HostEntriesBegin
/// NULL //HostEntriesEnd
/// };
/// \endcode
///
/// \returns Global variable that represents FatbinDesc.
GlobalVariable *createFatbinDesc(ArrayRef<OffloadFile> OffloadFiles) {
StringRef OffloadKindTag = ".sycl_offloading.";
SmallVector<Constant *> WrappedImages;
WrappedImages.reserve(OffloadFiles.size());
for (size_t I = 0, E = OffloadFiles.size(); I != E; ++I)
WrappedImages.push_back(
wrapImage(*OffloadFiles[I].getBinary(), Twine(I), OffloadKindTag));
return combineWrappedImages(WrappedImages, OffloadKindTag);
}
void createRegisterFatbinFunction(GlobalVariable *FatbinDesc) {
void createRegisterFatbinFunction(Constant *Start, Constant *Size) {
FunctionType *FuncTy =
FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
Function *Func = Function::Create(FuncTy, GlobalValue::InternalLinkage,
Twine("sycl") + ".descriptor_reg", &M);
Func->setSection(".text.startup");
// Get RegFuncName function declaration.
PointerType *PtrTy = PointerType::getUnqual(C);
IntegerType *Int64Ty = Type::getInt64Ty(C);
FunctionType *RegFuncTy =
FunctionType::get(Type::getVoidTy(C), PointerType::getUnqual(C),
FunctionType::get(Type::getVoidTy(C), {PtrTy, Int64Ty},
/*isVarArg=*/false);
FunctionCallee RegFuncC =
M.getOrInsertFunction("__sycl_register_lib", RegFuncTy);
// Construct function body.
IRBuilder Builder(BasicBlock::Create(C, "entry", Func));
Builder.CreateCall(RegFuncC, FatbinDesc);
IRBuilder<> Builder(BasicBlock::Create(C, "entry", Func));
Builder.CreateCall(RegFuncC, {Start, Size});
Builder.CreateRetVoid();
// Add this function to constructors.
appendToGlobalCtors(M, Func, /*Priority*/ 1);
}
void createUnregisterFunction(GlobalVariable *FatbinDesc) {
void createUnregisterFunction(Constant *Start, Constant *Size) {
FunctionType *FuncTy =
FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
Function *Func = Function::Create(FuncTy, GlobalValue::InternalLinkage,
"sycl.descriptor_unreg", &M);
Func->setSection(".text.startup");
// Get UnregFuncName function declaration.
PointerType *PtrTy = PointerType::getUnqual(C);
IntegerType *Int64Ty = Type::getInt64Ty(C);
FunctionType *UnRegFuncTy =
FunctionType::get(Type::getVoidTy(C), PointerType::getUnqual(C),
FunctionType::get(Type::getVoidTy(C), {PtrTy, Int64Ty},
/*isVarArg=*/false);
FunctionCallee UnRegFuncC =
M.getOrInsertFunction("__sycl_unregister_lib", UnRegFuncTy);
// Construct function body
IRBuilder<> Builder(BasicBlock::Create(C, "entry", Func));
Builder.CreateCall(UnRegFuncC, FatbinDesc);
Builder.CreateCall(UnRegFuncC, {Start, Size});
Builder.CreateRetVoid();
// Add this function to global destructors.
appendToGlobalDtors(M, Func, /*Priority*/ 1);
}
private:
IntegerType *getSizeTTy() {
switch (M.getDataLayout().getPointerSize()) {
case 4:
return Type::getInt32Ty(C);
case 8:
return Type::getInt64Ty(C);
}
llvm_unreachable("unsupported pointer type size");
}
SmallVector<Constant *, 2> getSizetConstPair(size_t First, size_t Second) {
IntegerType *SizeTTy = getSizeTTy();
return SmallVector<Constant *, 2>{ConstantInt::get(SizeTTy, First),
ConstantInt::get(SizeTTy, Second)};
}
/// Note: Properties aren't supported and the support is going
/// to be added later.
/// Creates a structure corresponding to:
/// SYCL specific image descriptor type.
/// \code
/// struct __sycl.tgt_device_image {
/// // Version of this structure - for backward compatibility;
/// // all modifications which change order/type/offsets of existing fields
/// // should increment the version.
/// uint16_t Version;
/// // The kind of offload model the image employs.
/// uint8_t OffloadKind;
/// // Format of the image data - SPIRV, LLVMIR bitcode, etc.
/// uint8_t Format;
/// // Null-terminated string representation of the device's target
/// // architecture.
/// const char *Arch;
/// // A null-terminated string; target- and compiler-specific options
/// // which are passed to the device compiler at runtime.
/// const char *CompileOptions;
/// // A null-terminated string; target- and compiler-specific options
/// // which are passed to the device linker at runtime.
/// const char *LinkOptions;
/// // Pointer to the device binary image start.
/// void *ImageStart;
/// // Pointer to the device binary image end.
/// void *ImageEnd;
/// // The entry table.
/// __tgt_offload_entry *EntriesBegin;
/// __tgt_offload_entry *EntriesEnd;
/// const char *PropertiesBegin;
/// const char *PropertiesEnd;
/// };
/// \endcode
StructType *getSyclDeviceImageTy() {
return StructType::create(
{
Type::getInt16Ty(C), // Version
Type::getInt8Ty(C), // OffloadKind
Type::getInt8Ty(C), // Format
PointerType::getUnqual(C), // Arch
PointerType::getUnqual(C), // CompileOptions
PointerType::getUnqual(C), // LinkOptions
PointerType::getUnqual(C), // ImageStart
PointerType::getUnqual(C), // ImageEnd
PointerType::getUnqual(C), // EntriesBegin
PointerType::getUnqual(C), // EntriesEnd
PointerType::getUnqual(C), // PropertiesBegin
PointerType::getUnqual(C) // PropertiesEnd
},
"__sycl.tgt_device_image");
}
/// Creates a structure for SYCL specific binary descriptor type. Corresponds
/// to:
///
/// \code
/// struct __sycl.tgt_bin_desc {
/// // version of this structure - for backward compatibility;
/// // all modifications which change order/type/offsets of existing fields
/// // should increment the version.
/// uint16_t Version;
/// uint16_t NumDeviceImages;
/// __sycl.tgt_device_image *DeviceImages;
/// // the offload entry table
/// __tgt_offload_entry *HostEntriesBegin;
/// __tgt_offload_entry *HostEntriesEnd;
/// };
/// \endcode
StructType *getSyclBinDescTy() {
return StructType::create(
{Type::getInt16Ty(C), Type::getInt16Ty(C), PointerType::getUnqual(C),
PointerType::getUnqual(C), PointerType::getUnqual(C)},
"__sycl.tgt_bin_desc");
}
/// Adds a global readonly variable that is initialized by given
/// \p Initializer to the module.
GlobalVariable *addGlobalArrayVariable(const Twine &Name,
ArrayRef<char> Initializer,
const Twine &Section = "") {
Constant *Arr = ConstantDataArray::get(M.getContext(), Initializer);
GlobalVariable *Var =
new GlobalVariable(M, Arr->getType(), /*isConstant*/ true,
GlobalVariable::InternalLinkage, Arr, Name);
Var->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
SmallVector<char, 32> NameBuf;
StringRef SectionName = Section.toStringRef(NameBuf);
if (!SectionName.empty())
Var->setSection(SectionName);
return Var;
}
/// Adds given \p Buf as a global variable into the module.
/// \returns Pair of pointers that point at the beginning and the end of the
/// variable.
std::pair<Constant *, Constant *>
addArrayToModule(ArrayRef<char> Buf, const Twine &Name,
const Twine &Section = "") {
GlobalVariable *Var = addGlobalArrayVariable(Name, Buf, Section);
Constant *ImageB = ConstantExpr::getGetElementPtr(Var->getValueType(), Var,
getSizetConstPair(0, 0));
Constant *ImageE = ConstantExpr::getGetElementPtr(
Var->getValueType(), Var, getSizetConstPair(0, Buf.size()));
return std::make_pair(ImageB, ImageE);
}
/// Adds given \p Data as constant byte array in the module.
/// \returns Constant pointer to the added data. The pointer type does not
/// carry size information.
Constant *addRawDataToModule(ArrayRef<char> Data, const Twine &Name) {
GlobalVariable *Var = addGlobalArrayVariable(Name, Data);
Constant *DataPtr = ConstantExpr::getGetElementPtr(Var->getValueType(), Var,
getSizetConstPair(0, 0));
return DataPtr;
}
/// Creates a global variable of const char* type and creates an
/// initializer that initializes it with \p Str.
///
/// \returns Link-time constant pointer (constant expr) to that
/// variable.
Constant *addStringToModule(StringRef Str, const Twine &Name) {
Constant *Arr = ConstantDataArray::getString(C, Str);
GlobalVariable *Var =
new GlobalVariable(M, Arr->getType(), /*isConstant*/ true,
GlobalVariable::InternalLinkage, Arr, Name);
Var->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
return Var;
}
/// Each image contains its own set of symbols, which may contain different
/// symbols than other images. This function constructs an array of
/// symbol entries for a particular image.
///
/// \returns Pointers to the beginning and end of the array.
std::pair<Constant *, Constant *>
initOffloadEntriesPerImage(StringRef Entries, const Twine &OffloadKindTag) {
SmallVector<Constant *> EntriesInits;
const char *Current = Entries.data();
const char *End = Current + Entries.size();
while (Current < End) {
StringRef Name(Current);
Current += Name.size() + 1;
if (Name.empty())
continue;
GlobalVariable *GV = emitOffloadingEntry(
M, /*Kind*/ OffloadKind::OFK_SYCL,
Constant::getNullValue(PointerType::getUnqual(C)), Name, /*Size*/ 0,
/*Flags*/ 0, /*Data*/ 0);
EntriesInits.push_back(GV->getInitializer());
}
Constant *Arr = ConstantArray::get(
ArrayType::get(EntryTy, EntriesInits.size()), EntriesInits);
GlobalVariable *EntriesGV = new GlobalVariable(
M, Arr->getType(), /*isConstant*/ true, GlobalVariable::InternalLinkage,
Arr, OffloadKindTag + "entries_arr");
Constant *EntriesB = ConstantExpr::getGetElementPtr(
EntriesGV->getValueType(), EntriesGV, getSizetConstPair(0, 0));
Constant *EntriesE = ConstantExpr::getGetElementPtr(
EntriesGV->getValueType(), EntriesGV,
getSizetConstPair(0, EntriesInits.size()));
return std::make_pair(EntriesB, EntriesE);
}
Constant *wrapImage(const OffloadBinary &OB, const Twine &ImageID,
StringRef OffloadKindTag) {
// Note: Intel DPC++ compiler had 2 versions of this structure
// and clang++ has a third different structure. To avoid ABI incompatibility
// between generated device images the Version here starts from 3.
constexpr uint16_t DeviceImageStructVersion = 3;
Constant *Version =
ConstantInt::get(Type::getInt16Ty(C), DeviceImageStructVersion);
Constant *OffloadKindConstant = ConstantInt::get(
Type::getInt8Ty(C), static_cast<uint8_t>(OB.getOffloadKind()));
Constant *ImageKindConstant = ConstantInt::get(
Type::getInt8Ty(C), static_cast<uint8_t>(OB.getImageKind()));
StringRef Triple = OB.getString("triple");
Constant *TripleConstant =
addStringToModule(Triple, Twine(OffloadKindTag) + "target." + ImageID);
Constant *CompileOptions =
addStringToModule(Options.CompileOptions,
Twine(OffloadKindTag) + "opts.compile." + ImageID);
Constant *LinkOptions = addStringToModule(
Options.LinkOptions, Twine(OffloadKindTag) + "opts.link." + ImageID);
// Note: NULL for now.
std::pair<Constant *, Constant *> PropertiesConstants = {
Constant::getNullValue(PointerType::getUnqual(C)),
Constant::getNullValue(PointerType::getUnqual(C))};
StringRef RawImage = OB.getImage();
std::pair<Constant *, Constant *> Binary = addArrayToModule(
ArrayRef<char>(RawImage.begin(), RawImage.end()),
Twine(OffloadKindTag) + ImageID + ".data", ".llvm.offloading");
// For SYCL images offload entries are defined here per image.
std::pair<Constant *, Constant *> ImageEntriesPtrs =
initOffloadEntriesPerImage(OB.getString("symbols"), OffloadKindTag);
// .first and .second arguments below correspond to start and end pointers
// respectively.
Constant *WrappedBinary = ConstantStruct::get(
SyclDeviceImageTy, Version, OffloadKindConstant, ImageKindConstant,
TripleConstant, CompileOptions, LinkOptions, Binary.first,
Binary.second, ImageEntriesPtrs.first, ImageEntriesPtrs.second,
PropertiesConstants.first, PropertiesConstants.second);
return WrappedBinary;
}
GlobalVariable *combineWrappedImages(ArrayRef<Constant *> WrappedImages,
StringRef OffloadKindTag) {
Constant *ImagesData = ConstantArray::get(
ArrayType::get(SyclDeviceImageTy, WrappedImages.size()), WrappedImages);
GlobalVariable *ImagesGV =
new GlobalVariable(M, ImagesData->getType(), /*isConstant*/ true,
GlobalValue::InternalLinkage, ImagesData,
Twine(OffloadKindTag) + "device_images");
ImagesGV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
Constant *EntriesB = Constant::getNullValue(PointerType::getUnqual(C));
Constant *EntriesE = Constant::getNullValue(PointerType::getUnqual(C));
static constexpr uint16_t BinDescStructVersion = 1;
Constant *DescInit = ConstantStruct::get(
SyclBinDescTy,
ConstantInt::get(Type::getInt16Ty(C), BinDescStructVersion),
ConstantInt::get(Type::getInt16Ty(C), WrappedImages.size()), ImagesGV,
EntriesB, EntriesE);
return new GlobalVariable(M, DescInit->getType(), /*isConstant*/ true,
GlobalValue::InternalLinkage, DescInit,
Twine(OffloadKindTag) + "descriptor");
}
Module &M;
LLVMContext &C;
SYCLJITOptions Options;
StructType *EntryTy = nullptr;
StructType *SyclDeviceImageTy = nullptr;
StructType *SyclBinDescTy = nullptr;
}; // end of SYCLWrapper
} // namespace
@@ -1053,18 +744,8 @@ Error offloading::wrapHIPBinary(Module &M, ArrayRef<char> Image,
Error llvm::offloading::wrapSYCLBinaries(llvm::Module &M, ArrayRef<char> Buffer,
SYCLJITOptions Options) {
SYCLWrapper W(M, Options);
MemoryBufferRef MBR(StringRef(Buffer.begin(), Buffer.size()),
/*Identifier*/ "");
SmallVector<OffloadFile> OffloadFiles;
if (Error E = extractOffloadBinaries(MBR, OffloadFiles))
return E;
GlobalVariable *Desc = W.createFatbinDesc(OffloadFiles);
if (!Desc)
return createStringError(inconvertibleErrorCode(),
"No binary descriptors created.");
W.createRegisterFatbinFunction(Desc);
W.createUnregisterFunction(Desc);
auto [Start, Size] = W.embedBinary(Buffer);
W.createRegisterFatbinFunction(Start, Size);
W.createUnregisterFunction(Start, Size);
return Error::success();
}

View File

@@ -23,6 +23,7 @@
using namespace llvm;
using namespace llvm::offloading;
using namespace llvm::offloading::sycl;
StructType *offloading::getEntryTy(Module &M) {
LLVMContext &C = M.getContext();
@@ -424,3 +425,31 @@ Error offloading::intel::containerizeOpenMPSPIRVImage(
object::OffloadKind::OFK_OpenMP, /*ImageFlags=*/0,
MetaData);
}
void sycl::writeSymbolTable(ArrayRef<StringRef> Names, SmallString<0> &Out) {
uint32_t Count = Names.size();
// Compute the byte offset where string data begins: right after the header
// and the entry array.
uint32_t StringDataOffset =
sizeof(SymbolTableHeader) + Count * sizeof(SymbolTableEntry);
// Pre-size the output to hold the header and entry array; string data is
// appended below.
Out.resize(StringDataOffset);
// Write the header.
auto *Header = reinterpret_cast<SymbolTableHeader *>(Out.data());
Header->Count = Count;
// Write each entry and append the corresponding null-terminated name.
auto *Entries = reinterpret_cast<SymbolTableEntry *>(Header + 1);
uint32_t CurrentOffset = StringDataOffset;
for (uint32_t I = 0; I < Count; ++I) {
Entries[I].OffsetToSymbol = CurrentOffset;
Entries[I].SymbolSize = Names[I].size();
Out.append(Names[I]);
Out.push_back('\0');
CurrentOffset += Names[I].size() + 1;
}
}