Summary: The return value was not zeroed, this was accidentally dropped when we did the port and it's zero "almost always" so I didn't notice. Hopefully this makes the test suite no longer flaky.
305 lines
10 KiB
C++
305 lines
10 KiB
C++
//===-- Main entry into the loader interface ------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This utility is used to launch standard programs onto the GPU in conjunction
|
|
// with the LLVM 'libc' project. It is designed to mimic a standard emulator
|
|
// workflow, allowing for unit tests to be run on the GPU directly.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "llvm-gpu-loader.h"
|
|
|
|
#include "llvm/BinaryFormat/Magic.h"
|
|
#include "llvm/Object/ELF.h"
|
|
#include "llvm/Object/ELFObjectFile.h"
|
|
#include "llvm/Support/CommandLine.h"
|
|
#include "llvm/Support/Error.h"
|
|
#include "llvm/Support/FileSystem.h"
|
|
#include "llvm/Support/MemoryBuffer.h"
|
|
#include "llvm/Support/Path.h"
|
|
#include "llvm/Support/Signals.h"
|
|
#include "llvm/Support/WithColor.h"
|
|
#include "llvm/TargetParser/Triple.h"
|
|
|
|
#include <cerrno>
|
|
#include <cstdio>
|
|
#include <cstdlib>
|
|
#include <cstring>
|
|
#include <string>
|
|
|
|
using namespace llvm;
|
|
|
|
static cl::OptionCategory LoaderCategory("loader options");
|
|
|
|
static cl::opt<bool> Help("h", cl::desc("Alias for -help"), cl::Hidden,
|
|
cl::cat(LoaderCategory));
|
|
|
|
static cl::opt<unsigned>
|
|
ThreadsX("threads-x", cl::desc("Number of threads in the 'x' dimension"),
|
|
cl::init(1), cl::cat(LoaderCategory));
|
|
static cl::opt<unsigned>
|
|
ThreadsY("threads-y", cl::desc("Number of threads in the 'y' dimension"),
|
|
cl::init(1), cl::cat(LoaderCategory));
|
|
static cl::opt<unsigned>
|
|
ThreadsZ("threads-z", cl::desc("Number of threads in the 'z' dimension"),
|
|
cl::init(1), cl::cat(LoaderCategory));
|
|
static cl::alias threads("threads", cl::aliasopt(ThreadsX),
|
|
cl::desc("Alias for --threads-x"),
|
|
cl::cat(LoaderCategory));
|
|
|
|
static cl::opt<unsigned>
|
|
BlocksX("blocks-x", cl::desc("Number of blocks in the 'x' dimension"),
|
|
cl::init(1), cl::cat(LoaderCategory));
|
|
static cl::opt<unsigned>
|
|
BlocksY("blocks-y", cl::desc("Number of blocks in the 'y' dimension"),
|
|
cl::init(1), cl::cat(LoaderCategory));
|
|
static cl::opt<unsigned>
|
|
BlocksZ("blocks-z", cl::desc("Number of blocks in the 'z' dimension"),
|
|
cl::init(1), cl::cat(LoaderCategory));
|
|
static cl::alias Blocks("blocks", cl::aliasopt(BlocksX),
|
|
cl::desc("Alias for --blocks-x"),
|
|
cl::cat(LoaderCategory));
|
|
|
|
static cl::opt<std::string> File(cl::Positional, cl::Required,
|
|
cl::desc("<gpu executable>"),
|
|
cl::cat(LoaderCategory));
|
|
static cl::list<std::string> Args(cl::ConsumeAfter,
|
|
cl::desc("<program arguments>..."),
|
|
cl::cat(LoaderCategory));
|
|
|
|
// The arguments to the '_begin' kernel.
|
|
struct BeginArgs {
|
|
int Argc;
|
|
void *Argv;
|
|
void *Envp;
|
|
};
|
|
|
|
// The arguments to the '_start' kernel.
|
|
struct StartArgs {
|
|
int Argc;
|
|
void *Argv;
|
|
void *Envp;
|
|
void *Ret;
|
|
};
|
|
|
|
// The arguments to the '_end' kernel.
|
|
struct EndArgs {};
|
|
|
|
[[noreturn]] static void handleError(Error E) {
|
|
outs().flush();
|
|
logAllUnhandledErrors(std::move(E), WithColor::error(errs(), "loader"));
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
|
|
[[noreturn]] static void handleError(ol_result_t Err, unsigned Line) {
|
|
fprintf(stderr, "%s:%d %s\n", __FILE__, Line, Err->Details);
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
|
|
#define OFFLOAD_ERR(X) \
|
|
if (ol_result_t Err = X) \
|
|
handleError(Err, __LINE__);
|
|
|
|
static void *copyArgumentVector(int Argc, const char **Argv,
|
|
ol_device_handle_t Device) {
|
|
size_t ArgSize = sizeof(char *) * (Argc + 1);
|
|
size_t StringLen = 0;
|
|
for (int i = 0; i < Argc; ++i)
|
|
StringLen += strlen(Argv[i]) + 1;
|
|
|
|
// We allocate enough space for a null terminated array and all the strings.
|
|
void *DevArgv;
|
|
OFFLOAD_ERR(
|
|
olMemAlloc(Device, OL_ALLOC_TYPE_HOST, ArgSize + StringLen, &DevArgv));
|
|
if (!DevArgv)
|
|
handleError(
|
|
createStringError("Failed to allocate memory for environment."));
|
|
|
|
// Store the strings linerally in the same memory buffer.
|
|
void *DevString = reinterpret_cast<uint8_t *>(DevArgv) + ArgSize;
|
|
for (int i = 0; i < Argc; ++i) {
|
|
size_t size = strlen(Argv[i]) + 1;
|
|
std::memcpy(DevString, Argv[i], size);
|
|
static_cast<void **>(DevArgv)[i] = DevString;
|
|
DevString = reinterpret_cast<uint8_t *>(DevString) + size;
|
|
}
|
|
|
|
// Ensure the vector is null terminated.
|
|
reinterpret_cast<void **>(DevArgv)[Argc] = nullptr;
|
|
return DevArgv;
|
|
}
|
|
|
|
void *copyEnvironment(const char **Envp, ol_device_handle_t Device) {
|
|
int Envc = 0;
|
|
for (const char **Env = Envp; *Env != 0; ++Env)
|
|
++Envc;
|
|
|
|
return copyArgumentVector(Envc, Envp, Device);
|
|
}
|
|
|
|
ol_device_handle_t findDevice(MemoryBufferRef Binary) {
|
|
ol_device_handle_t Device = nullptr;
|
|
std::tuple Data = std::make_tuple(&Device, &Binary);
|
|
OFFLOAD_ERR(olIterateDevices(
|
|
[](ol_device_handle_t Device, void *UserData) {
|
|
auto &[Output, Binary] = *reinterpret_cast<decltype(Data) *>(UserData);
|
|
bool IsValid = false;
|
|
OFFLOAD_ERR(olIsValidBinary(Device, Binary->getBufferStart(),
|
|
Binary->getBufferSize(), &IsValid));
|
|
if (!IsValid)
|
|
return true;
|
|
|
|
*Output = Device;
|
|
return false;
|
|
},
|
|
&Data));
|
|
return Device;
|
|
}
|
|
|
|
ol_device_handle_t getHostDevice() {
|
|
ol_device_handle_t Device;
|
|
OFFLOAD_ERR(olIterateDevices(
|
|
[](ol_device_handle_t Device, void *UserData) {
|
|
ol_platform_handle_t Platform;
|
|
olGetDeviceInfo(Device, OL_DEVICE_INFO_PLATFORM, sizeof(Platform),
|
|
&Platform);
|
|
ol_platform_backend_t Backend;
|
|
olGetPlatformInfo(Platform, OL_PLATFORM_INFO_BACKEND, sizeof(Backend),
|
|
&Backend);
|
|
|
|
auto &Output = *reinterpret_cast<decltype(Device) *>(UserData);
|
|
if (Backend == OL_PLATFORM_BACKEND_HOST) {
|
|
Output = Device;
|
|
return false;
|
|
}
|
|
return true;
|
|
},
|
|
&Device));
|
|
return Device;
|
|
}
|
|
|
|
template <typename Args>
|
|
void launchKernel(ol_queue_handle_t Queue, ol_device_handle_t Device,
|
|
ol_program_handle_t Program, const char *Name,
|
|
ol_kernel_launch_size_args_t LaunchArgs, Args &KernelArgs) {
|
|
ol_symbol_handle_t Kernel;
|
|
OFFLOAD_ERR(olGetSymbol(Program, Name, OL_SYMBOL_KIND_KERNEL, &Kernel));
|
|
|
|
OFFLOAD_ERR(olLaunchKernel(Queue, Device, Kernel, &KernelArgs,
|
|
std::is_empty_v<Args> ? 0 : sizeof(Args),
|
|
&LaunchArgs));
|
|
}
|
|
|
|
int main(int argc, const char **argv, const char **envp) {
|
|
sys::PrintStackTraceOnErrorSignal(argv[0]);
|
|
cl::HideUnrelatedOptions(LoaderCategory);
|
|
cl::ParseCommandLineOptions(
|
|
argc, argv,
|
|
"A utility used to launch unit tests built for a GPU target. This is\n"
|
|
"intended to provide an interface similar to cross-compiling "
|
|
"emulators\n");
|
|
|
|
if (Help) {
|
|
cl::PrintHelpMessage();
|
|
return EXIT_SUCCESS;
|
|
}
|
|
|
|
if (Error Err = loadLLVMOffload())
|
|
handleError(std::move(Err));
|
|
|
|
ErrorOr<std::unique_ptr<MemoryBuffer>> ImageOrErr =
|
|
MemoryBuffer::getFileOrSTDIN(File);
|
|
if (std::error_code EC = ImageOrErr.getError())
|
|
handleError(errorCodeToError(EC));
|
|
MemoryBufferRef Image = **ImageOrErr;
|
|
|
|
ol_platform_backend_t Backend;
|
|
ol_init_args_t InitArgs = OL_INIT_ARGS_INIT;
|
|
|
|
file_magic Magic = identify_magic(Image.getBuffer());
|
|
if (Magic >= file_magic::elf && Magic <= file_magic::elf_core) {
|
|
Expected<object::ELFFile<object::ELF64LE>> ElfOrErr =
|
|
object::ELFFile<object::ELF64LE>::create(Image.getBuffer());
|
|
if (!ElfOrErr)
|
|
handleError(ElfOrErr.takeError());
|
|
|
|
switch (ElfOrErr->getHeader().e_machine) {
|
|
case ELF::EM_AMDGPU:
|
|
Backend = OL_PLATFORM_BACKEND_AMDGPU;
|
|
break;
|
|
case ELF::EM_CUDA:
|
|
Backend = OL_PLATFORM_BACKEND_CUDA;
|
|
break;
|
|
default:
|
|
handleError(createStringError(
|
|
"unhandled ELF architecture: %s",
|
|
ELF::convertEMachineToArchName(ElfOrErr->getHeader().e_machine)
|
|
.data()));
|
|
}
|
|
InitArgs.NumPlatforms = 1;
|
|
InitArgs.Platforms = &Backend;
|
|
}
|
|
|
|
SmallVector<const char *> NewArgv = {File.c_str()};
|
|
llvm::transform(Args, std::back_inserter(NewArgv),
|
|
[](const std::string &Arg) { return Arg.c_str(); });
|
|
|
|
OFFLOAD_ERR(olInit(&InitArgs));
|
|
ol_device_handle_t Device = findDevice(Image);
|
|
if (!Device)
|
|
handleError(createStringError("No compatible device was found"));
|
|
ol_device_handle_t Host = getHostDevice();
|
|
assert(Host && "Host device should always be present");
|
|
|
|
ol_program_handle_t Program;
|
|
OFFLOAD_ERR(olCreateProgram(Device, Image.getBufferStart(),
|
|
Image.getBufferSize(), &Program));
|
|
|
|
ol_queue_handle_t Queue;
|
|
OFFLOAD_ERR(olCreateQueue(Device, &Queue));
|
|
|
|
int DevArgc = static_cast<int>(NewArgv.size());
|
|
void *DevArgv = copyArgumentVector(NewArgv.size(), NewArgv.begin(), Device);
|
|
void *DevEnvp = copyEnvironment(envp, Device);
|
|
|
|
void *DevRet;
|
|
int Zero = 0;
|
|
OFFLOAD_ERR(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, sizeof(int), &DevRet));
|
|
OFFLOAD_ERR(olMemcpy(Queue, DevRet, Device, &Zero, Host, sizeof(int)));
|
|
|
|
ol_kernel_launch_size_args_t BeginLaunch{1, {1, 1, 1}, {1, 1, 1}, 0};
|
|
BeginArgs BeginArgs = {DevArgc, DevArgv, DevEnvp};
|
|
launchKernel(Queue, Device, Program, "_begin", BeginLaunch, BeginArgs);
|
|
OFFLOAD_ERR(olSyncQueue(Queue));
|
|
|
|
uint32_t Dims = (BlocksZ > 1) ? 3 : (BlocksY > 1) ? 2 : 1;
|
|
ol_kernel_launch_size_args_t StartLaunch{Dims,
|
|
{BlocksX, BlocksY, BlocksZ},
|
|
{ThreadsX, ThreadsY, ThreadsZ},
|
|
/*SharedMemBytes=*/0};
|
|
StartArgs StartArgs = {DevArgc, DevArgv, DevEnvp, DevRet};
|
|
launchKernel(Queue, Device, Program, "_start", StartLaunch, StartArgs);
|
|
|
|
ol_kernel_launch_size_args_t EndLaunch{1, {1, 1, 1}, {1, 1, 1}, 0};
|
|
EndArgs EndArgs = {};
|
|
launchKernel(Queue, Device, Program, "_end", EndLaunch, EndArgs);
|
|
|
|
int Ret;
|
|
OFFLOAD_ERR(olMemcpy(Queue, &Ret, Host, DevRet, Device, sizeof(int)));
|
|
OFFLOAD_ERR(olSyncQueue(Queue));
|
|
|
|
OFFLOAD_ERR(olMemFree(DevRet));
|
|
OFFLOAD_ERR(olMemFree(DevArgv));
|
|
OFFLOAD_ERR(olMemFree(DevEnvp));
|
|
OFFLOAD_ERR(olDestroyQueue(Queue));
|
|
OFFLOAD_ERR(olDestroyProgram(Program));
|
|
OFFLOAD_ERR(olShutDown());
|
|
|
|
return Ret;
|
|
}
|