Files
llvm-project/clang/tools/offload-arch/OffloadArch.cpp
Yaxun (Sam) Liu f93fcde52b [offload-arch] Fix amdgpu-arch crash on Windows with ROCm 7.1 (#167695)
The tool was crashing on Windows with ROCm 7.1 due to two issues: misuse
of hipDeviceGet which should not be used (it worked before by accident
but was undefined behavior), and ABI incompatibility from
hipDeviceProp_t struct layout changes between HIP versions where the
gcnArchName offset changed from 396 to 1160 bytes.

The fix removes hipDeviceGet and queries properties directly by device
index. It defines separate struct layouts for R0600 (HIP 6.x+) and R0000
(legacy) to handle the different memory layouts correctly.

An automatic API fallback mechanism tries R0600, then R0000, then the
unversioned API until one succeeds, ensuring compatibility across
different HIP runtime versions. A new --hip-api-version option allows
manually selecting the API version when needed.

Additional improvements include enhanced error handling with
hipGetErrorString, verbose logging throughout the detection process, and
runtime version detection using hipRuntimeGetVersion when available. The
versioned API functions provide stable ABI across HIP versions.

Fixes: SWDEV-564272
2025-11-13 19:03:21 -05:00

96 lines
2.8 KiB
C++

//===- OffloadArch.cpp - list available GPUs ------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "clang/Basic/Version.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Path.h"
using namespace llvm;
static cl::opt<bool> Help("h", cl::desc("Alias for -help"), cl::Hidden);
// Mark all our options with this category.
static cl::OptionCategory OffloadArchCategory("offload-arch options");
extern cl::OptionCategory AMDGPUArchByHIPCategory;
enum VendorName {
all,
amdgpu,
nvptx,
intel,
};
static cl::opt<VendorName>
Only("only", cl::desc("Restrict to vendor:"), cl::cat(OffloadArchCategory),
cl::init(all),
cl::values(clEnumVal(all, "Print all GPUs (default)"),
clEnumVal(amdgpu, "Only print AMD GPUs"),
clEnumVal(nvptx, "Only print NVIDIA GPUs"),
clEnumVal(intel, "Only print Intel GPUs")));
cl::opt<bool> Verbose("verbose", cl::desc("Enable verbose output"),
cl::init(false), cl::cat(OffloadArchCategory));
static void PrintVersion(raw_ostream &OS) {
OS << clang::getClangToolFullVersion("offload-arch") << '\n';
}
int printGPUsByKFD();
int printGPUsByHIP();
int printGPUsByCUDA();
int printGPUsByLevelZero();
static int printAMD() {
#ifndef _WIN32
if (!printGPUsByKFD())
return 0;
#endif
return printGPUsByHIP();
}
static int printNVIDIA() { return printGPUsByCUDA(); }
static int printIntel() { return printGPUsByLevelZero(); }
const std::array<std::pair<VendorName, function_ref<int()>>, 3> VendorTable{
{{VendorName::amdgpu, printAMD},
{VendorName::nvptx, printNVIDIA},
{VendorName::intel, printIntel}}};
int main(int argc, char *argv[]) {
cl::HideUnrelatedOptions({&OffloadArchCategory, &AMDGPUArchByHIPCategory});
cl::SetVersionPrinter(PrintVersion);
cl::ParseCommandLineOptions(
argc, argv,
"A tool to detect the presence of offloading devices on the system. \n\n"
"The tool will output each detected GPU architecture separated by a\n"
"newline character. If multiple GPUs of the same architecture are found\n"
"a string will be printed for each\n");
if (Help) {
cl::PrintHelpMessage();
return 0;
}
// Support legacy binaries.
if (sys::path::stem(argv[0]).starts_with("amdgpu-arch"))
Only = VendorName::amdgpu;
if (sys::path::stem(argv[0]).starts_with("nvptx-arch"))
Only = VendorName::nvptx;
int Result = 1;
for (auto [Name, Func] : VendorTable) {
if (Only == VendorName::all || Only == Name)
Result &= Func();
}
return Result;
}