From 5710e418c335c99f1d1ea619a4622837553b53d5 Mon Sep 17 00:00:00 2001 From: Ben Dunbobbin Date: Wed, 11 Feb 2026 21:20:25 +0000 Subject: [PATCH] [DTLTO][Windows] Expand short 8.3 form paths in ThinLTO module IDs (#178303) Windows supports short 8.3 form filenames (e.g. `compile_commands.json` -> `COMPIL~1.JSO`) for legacy reasons. See: https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file#short-vs-long-names. Short 8.3 form paths are undesirable in distributed compilation scenarios because they are local mappings tied to a specific directory layout on a specific machine. As a result, they can break or defeat sandboxing and path-based isolation mechanisms used by distributed build systems. We have observed such failures with DTLTO even in simple scenarios. For example, on Windows, running: ``` clang.exe hello.c -flto=thin -fuse-ld=lld -fthinlto-distributor=fastbuild.exe -### ``` on my development machine reveals a short 8.3 form path being passed to LLD (output paraphrased): ``` ld.lld -o a.out -plugin-opt=thinlto --thinlto-distributor=fastbuild.exe \ --thinlto-remote-compiler=clang.exe C:\Users\DUNBOB~1\AppData\Local\Temp\hello-380d65.o ``` This behavior occurs because, on Windows, the system temporary directory is commonly derived from the `TMP`/`TEMP` environment variables. For historical compatibility reasons, these variables are often set to short 8.3 form paths, particularly on systems where user names exceed eight characters. Whilst it's possible for users to work around these issues, in practice, especially in automated and CI environments, users often have limited control over their environment. DTLTO generally tries to avoid embedding distribution-specific logic in the LLVM source tree, and this principle also applies to path normalization. However, on Windows, such short 8.3 form paths are undesirable for any distribution system. This normalization also cannot be delegated to distributors, as the ThinLTO module ID must be finalized early during LTO and cannot be modified later. Given this, normalizing away short 8.3 paths on Windows is a pragmatic, targeted improvement, even though path normalization is not something a toolchain would typically perform in the general case. SIE internal tracker: TOOLCHAIN-19185 --- cross-project-tests/dtlto/path.test | 92 ++++++++++++++++++++++++++ cross-project-tests/lit.site.cfg.py.in | 15 ++++- llvm/include/llvm/DTLTO/DTLTO.h | 7 +- llvm/lib/DTLTO/DTLTO.cpp | 68 ++++++++++++++++--- 4 files changed, 171 insertions(+), 11 deletions(-) create mode 100644 cross-project-tests/dtlto/path.test diff --git a/cross-project-tests/dtlto/path.test b/cross-project-tests/dtlto/path.test new file mode 100644 index 000000000000..e435fc6b0b58 --- /dev/null +++ b/cross-project-tests/dtlto/path.test @@ -0,0 +1,92 @@ +# REQUIRES: ld.lld,llvm-ar,tempshortpaths + +# Check that any short 8.3 form paths (containing '~' characters) in ThinLTO +# Module IDs are expanded prior to being passed to the distributor. +# +# This test is Windows-specific, as enforced by `tempshortpaths`. + +RUN: rm -rf %t && split-file %s %t && cd %t + +# Create an archive and a thin archive to confirm member paths are expanded. +RUN: %clang --target=x86_64-linux-gnu -c foo.c moo.c start.c -flto=thin -O2 +RUN: prospero-llvm-ar rcs foo.a foo.o +RUN: prospero-llvm-ar --thin rcs moo.a moo.o + +# Build with DTLTO. +RUN: %python in-83-dir.py \ +RUN: %clang --target=x86_64-linux-gnu -flto=thin -fuse-ld=lld -nostdlib \ +RUN: foo.a moo.a start.o -Wl,--save-temps \ +RUN: -fthinlto-distributor=%python \ +RUN: -Xthinlto-distributor=%llvm_src_root/utils/dtlto/local.py + +# Check that all short 8.3 form paths have been expanded. +RUN: FileCheck --input-file a.*.dist-file.json %s --check-prefix=TILDE +TILDE-NOT: ~ + +# It is important that cross-module inlining occurs for this test to show that +# Clang can successfully load the bitcode file dependencies recorded in the +# summary indices. Explicitly check that the expected importing has occurred. +RUN: llvm-dis *.1.*.thinlto.bc -o - | \ +RUN: FileCheck %s --check-prefixes=FOO,MOO,START +RUN: llvm-dis *.2.*.thinlto.bc -o - | \ +RUN: FileCheck %s --check-prefixes=FOO,MOO,START +RUN: llvm-dis *.3.*.thinlto.bc -o - | \ +RUN: FileCheck %s --check-prefixes=FOO,MOO,START +FOO-DAG: foo.o +MOO-DAG: moo.o +START-DAG: start.o + +#--- foo.c +extern int moo(int), _start(int); +__attribute__((retain)) int foo(int x) { return x + moo(x) + _start(x); } + +#--- moo.c +extern int foo(int), _start(int); +__attribute__((retain)) int moo(int x) { return x + foo(x) + _start(x); } + +#--- start.c +extern int foo(int), moo(int); +__attribute__((retain)) int _start(int x) { return x + foo(x) + moo(x); } + +#--- in-83-dir.py +import os, shutil, sys, uuid, subprocess +from pathlib import Path +import ctypes +from ctypes import wintypes + + +def get_short_path(p): + g = ctypes.windll.kernel32.GetShortPathNameW + g.argtypes = [wintypes.LPCWSTR, wintypes.LPWSTR, wintypes.DWORD] + g.restype = wintypes.DWORD + n = g(os.path.abspath(p), None, 0) # First call gets required buffer size. + b = ctypes.create_unicode_buffer(n) + if g(os.path.abspath(p), b, n): # Second call fills buffer. + return b.value + raise ctypes.WinError() + + +temp = Path(os.environ["TEMP"]) +assert temp.is_dir() + +# Copy the CWD to a unique directory inside TEMP and ensure that one of the path +# components is long enough to have a distinct short 8.3 form. +# TEMP is likely to be on a drive that supports short 8.3 form paths. +d = (temp / str(uuid.uuid4()) / "veryverylong").resolve() +d.parent.mkdir(parents=True) +try: + shutil.copytree(Path.cwd(), d) + + # Replace the arguments of the command that name files with equivalents in + # our temp directory, prefixed with the short 8.3 form. + cmd = [ + a if Path(a).is_absolute() or not (d / a).is_file() else get_short_path(d / a) + for a in sys.argv[1:] + ] + + print(cmd) + + sys.exit(subprocess.run(cmd).returncode) + +finally: + shutil.rmtree(d.parent) diff --git a/cross-project-tests/lit.site.cfg.py.in b/cross-project-tests/lit.site.cfg.py.in index 39458dfc79af..b8992b6dca45 100644 --- a/cross-project-tests/lit.site.cfg.py.in +++ b/cross-project-tests/lit.site.cfg.py.in @@ -1,7 +1,8 @@ @LIT_SITE_CFG_IN_HEADER@ -import sys +import shutil, sys, os, uuid import lit.util +from pathlib import Path config.targets_to_build = "@TARGETS_TO_BUILD@".split() config.llvm_src_root = "@LLVM_SOURCE_DIR@" @@ -22,7 +23,19 @@ config.mlir_src_root = "@MLIR_SOURCE_DIR@" config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@" import lit.llvm + lit.llvm.initialize(lit_config, config) # Let the main config do the real work. lit_config.load_config(config, "@CROSS_PROJECT_TESTS_SOURCE_DIR@/lit.cfg.py") + +def are_short_names_enabled(path): + d = Path(path) / str(uuid.uuid4()) / "verylongdir" + d.mkdir(parents=True) + try: + return (d.parent / "verylo~1").exists() + finally: + shutil.rmtree(d.parent) + +if os.name == "nt" and are_short_names_enabled(config.environment.get("TEMP")): + config.available_features.add("tempshortpaths") diff --git a/llvm/include/llvm/DTLTO/DTLTO.h b/llvm/include/llvm/DTLTO/DTLTO.h index e80577aa1283..5a8566b71bc1 100644 --- a/llvm/include/llvm/DTLTO/DTLTO.h +++ b/llvm/include/llvm/DTLTO/DTLTO.h @@ -24,7 +24,9 @@ namespace lto { // standalone file. Similarly, for FatLTO objects, the bitcode is stored in a // section of the containing ELF object file. To address this, the class ensures // that an individual bitcode file exists for each input (by writing it out if -// necessary) and that the ModuleID is updated to point to it. +// necessary) and that the ModuleID is updated to point to it. Module IDs are +// also normalized on Windows to remove short 8.3 form paths that cannot be +// loaded on remote machines. // // The class ensures that lto::InputFile objects are preserved until enough of // the LTO pipeline has executed to determine the required per-module @@ -62,6 +64,9 @@ private: /// The output file to which this LTO invocation will contribute. StringRef LinkerOutputFile; + /// The normalized output directory, derived from LinkerOutputFile. + StringRef LinkerOutputDir; + /// Controls preservation of any created temporary files. bool SaveTemps; diff --git a/llvm/lib/DTLTO/DTLTO.cpp b/llvm/lib/DTLTO/DTLTO.cpp index 92b5bb362ba4..b5e233fe0ef5 100644 --- a/llvm/lib/DTLTO/DTLTO.cpp +++ b/llvm/lib/DTLTO/DTLTO.cpp @@ -27,6 +27,9 @@ #include "llvm/Support/Signals.h" #include "llvm/Support/TimeProfiler.h" #include "llvm/Support/raw_ostream.h" +#ifdef _WIN32 +#include "llvm/Support/Windows/WindowsSupport.h" +#endif #include @@ -55,6 +58,25 @@ Error save(lto::InputFile *Input, StringRef Path) { return save(MB.getBuffer(), Path); } +// Normalize and save a path. Aside from expanding Windows 8.3 short paths, +// no other normalization is currently required here. These paths are +// machine-local and break distribution systems; other normalization is +// handled by the DTLTO distributors. +Expected normalizePath(StringRef Path, StringSaver &Saver) { +#if defined(_WIN32) + if (Path.empty()) + return Path; + SmallString<256> Expanded; + if (std::error_code EC = llvm::sys::windows::makeLongFormPath(Path, Expanded)) + return createStringError(inconvertibleErrorCode(), + "Normalization failed for path %s: %s", + Path.str().c_str(), EC.message().c_str()); + return Saver.save(Expanded.str()); +#else + return Saver.save(Path); +#endif +} + // Compute the file path for a thin archive member. // // For thin archives, an archive member name is typically a file path relative @@ -122,11 +144,14 @@ Expected lto::DTLTO::isThinArchive(const StringRef ArchivePath) { // // This function performs the following tasks: // 1. Add the input file to the LTO object's list of input files. -// 2. For thin archive members, overwrite the module ID with the path to the -// member file on disk. -// 3. For archive members and FatLTO objects, overwrite the module ID with a -// unique path naming a file that will contain the member content. The file -// is created and populated later (see serializeInputs()). +// 2. For individual bitcode file inputs on Windows only, overwrite the module +// ID with a normalized path to remove short 8.3 form components. +// 3. For thin archive members, overwrite the module ID with the path +// (normalized on Windows) to the member file on disk. +// 4. For archive members and FatLTO objects, overwrite the module ID with a +// unique path (normalized on Windows) naming a file that will contain the +// member content. The file is created and populated later (see +// serializeInputs()). Expected> lto::DTLTO::addInput(std::unique_ptr InputPtr) { TimeTraceScope TimeScope("Add input for DTLTO"); @@ -136,12 +161,28 @@ lto::DTLTO::addInput(std::unique_ptr InputPtr) { auto &Input = InputFiles.back(); BitcodeModule &BM = Input->getPrimaryBitcodeModule(); + auto setIdFromPath = [&](StringRef Path) -> Error { + auto N = normalizePath(Path, Saver); + if (!N) + return N.takeError(); + BM.setModuleIdentifier(*N); + return Error::success(); + }; + StringRef ArchivePath = Input->getArchivePath(); // In most cases, the module ID already points to an individual bitcode file - // on disk, so no further preparation for distribution is required. - if (ArchivePath.empty() && !Input->isFatLTOObject()) + // on disk, so no further preparation for distribution is required. However, + // on Windows we overwite the module ID to expand Windows 8.3 short form + // paths. These paths are machine-local and break distribution systems; other + // normalization is handled by the DTLTO distributors. + if (ArchivePath.empty() && !Input->isFatLTOObject()) { +#if defined(_WIN32) + if (Error E = setIdFromPath(Input->getName())) + return std::move(E); +#endif return Input; + } // For a member of a thin archive that is not a FatLTO object, there is an // existing file on disk that can be used, so we can avoid having to @@ -154,16 +195,25 @@ lto::DTLTO::addInput(std::unique_ptr InputPtr) { // For thin archives, use the path to the actual member file on disk. auto MemberPath = computeThinArchiveMemberPath(ArchivePath, Input->getMemberName()); - BM.setModuleIdentifier(Saver.save(MemberPath.str())); + if (Error E = setIdFromPath(MemberPath)) + return std::move(E); return Input; } // A new file on disk will be needed for archive members and FatLTO objects. Input->setSerializeForDistribution(true); + // Get the normalized output directory, if we haven't already. + if (LinkerOutputDir.empty()) { + auto N = normalizePath(sys::path::parent_path(LinkerOutputFile), Saver); + if (!N) + return N.takeError(); + LinkerOutputDir = *N; + } + // Create a unique path by including the process ID and sequence number in the // filename. - SmallString<256> Id(sys::path::parent_path(LinkerOutputFile)); + SmallString<256> Id(LinkerOutputDir); sys::path::append(Id, Twine(sys::path::filename(Input->getName())) + "." + std::to_string(InputFiles.size()) /*Sequence number*/ +