[ELF] Parallelize input file loading (#191690)
During `createFiles`, `addFile()` records a `LoadJob` for each
non-script input (archive, relocatable, DSO, bitcode, binary) with a
state-machine snapshot (`inWholeArchive`, `inLib`, `asNeeded`,
`withLOption`, `groupId`) and expands them on worker threads in
`loadFiles()`. Linker scripts are still processed inline since their
`INPUT()` and `GROUP()` commands recursively call `addFile()`.
Outside `createFiles()`, `loadFiles()` is called with a single job and
drained immediately (`deferLoad` is false). Two cases:
- `addDependentLibrary()`: `.deplibs` sections trigger `addFile()`
during the serial `doParseFiles()` loop.
- `--just-symbols`: pushes files directly, bypassing
`addFile`/`LoadJob`.
Thread-safety:
- A mutex serializes `BitcodeFile` / fatLTO constructors that call
`ctx.saver` / `ctx.uniqueSaver`. Zero contention on pure ELF links.
- Thin-archive member buffers accumulate in per-job `SmallVector`s and
are merged into `ctx.memoryBuffers` in command-line order.
- `groupId` is pre-claimed during the serial walk and written to each
produced file after construction (the `InputFile` constructor no
longer reads `nextGroupId`).
Performance (--threads=8):
```
clang-relassert (267 thin archives, 10 .o, 2 .so):
965 +/- 32 ms -> 924 +/- 24 ms (1.05x, 80 runs)
(Apple M4) 249.7ms ± 2.5ms -> 221.2ms ± 1.4ms (1.13x, 10 runs)
chromium (532 .a, 3314 .o, 343 .so):
8.071 +/- 0.472 s -> 7.370 +/- 0.198 s (1.10x, 20 runs)
```
Parallelizing all file kinds (not just archives) matters for
.o-dominated workloads like chromium where archive-only parallelization
shows no improvement.
Output is byte-identical to the old lld and deterministic across
`--threads` values.
This commit is contained in:
@@ -175,6 +175,21 @@ struct VersionDefinition {
|
||||
SmallVector<SymbolVersion, 0> localPatterns;
|
||||
};
|
||||
|
||||
// Deferred file-load job: one per input, expanded by loadFiles().
|
||||
struct LoadJob {
|
||||
enum Kind : uint8_t { Obj, Bitcode, Archive, Shared, Binary };
|
||||
llvm::MemoryBufferRef mbref;
|
||||
llvm::StringRef path;
|
||||
Kind kind;
|
||||
bool inWholeArchive;
|
||||
bool lazy;
|
||||
bool asNeeded;
|
||||
bool withLOption;
|
||||
uint32_t groupId;
|
||||
SmallVector<std::unique_ptr<InputFile>, 0> out;
|
||||
std::vector<std::unique_ptr<llvm::MemoryBuffer>> thinBufs;
|
||||
};
|
||||
|
||||
class LinkerDriver {
|
||||
public:
|
||||
LinkerDriver(Ctx &ctx);
|
||||
@@ -186,17 +201,20 @@ public:
|
||||
private:
|
||||
Ctx &ctx;
|
||||
void createFiles(llvm::opt::InputArgList &args);
|
||||
void loadFiles();
|
||||
void inferMachineType();
|
||||
template <class ELFT> void link(llvm::opt::InputArgList &args);
|
||||
template <class ELFT> void compileBitcodeFiles(bool skipLinkedOutput);
|
||||
bool tryAddFatLTOFile(MemoryBufferRef mb, StringRef archiveName,
|
||||
uint64_t offsetInArchive, bool lazy);
|
||||
// True if we are in --whole-archive and --no-whole-archive.
|
||||
bool inWholeArchive = false;
|
||||
|
||||
// True if we are in --start-lib and --end-lib.
|
||||
bool inLib = false;
|
||||
|
||||
// True inside createFiles(): defers to loadFiles().
|
||||
bool deferLoad = false;
|
||||
SmallVector<LoadJob, 0> loadJobs;
|
||||
|
||||
std::unique_ptr<BitcodeCompiler> lto;
|
||||
SmallVector<std::unique_ptr<InputFile>, 0> files, ltoObjectFiles;
|
||||
|
||||
|
||||
@@ -195,7 +195,8 @@ static std::tuple<ELFKind, uint16_t, uint8_t> parseEmulation(Ctx &ctx,
|
||||
// Returns slices of MB by parsing MB as an archive file.
|
||||
// Each slice consists of a member file in the archive.
|
||||
std::vector<std::pair<MemoryBufferRef, uint64_t>> static getArchiveMembers(
|
||||
Ctx &ctx, MemoryBufferRef mb) {
|
||||
Ctx &ctx, LoadJob &job) {
|
||||
MemoryBufferRef mb = job.mbref;
|
||||
std::unique_ptr<Archive> file =
|
||||
CHECK(Archive::create(mb),
|
||||
mb.getBufferIdentifier() + ": failed to parse archive");
|
||||
@@ -218,32 +219,16 @@ std::vector<std::pair<MemoryBufferRef, uint64_t>> static getArchiveMembers(
|
||||
<< ": Archive::children failed: " << std::move(err);
|
||||
|
||||
// Take ownership of memory buffers created for members of thin archives.
|
||||
std::vector<std::unique_ptr<MemoryBuffer>> mbs = file->takeThinBuffers();
|
||||
std::move(mbs.begin(), mbs.end(), std::back_inserter(ctx.memoryBuffers));
|
||||
job.thinBufs = file->takeThinBuffers();
|
||||
|
||||
return v;
|
||||
}
|
||||
|
||||
static bool isBitcode(MemoryBufferRef mb) {
|
||||
return identify_magic(mb.getBuffer()) == llvm::file_magic::bitcode;
|
||||
}
|
||||
|
||||
bool LinkerDriver::tryAddFatLTOFile(MemoryBufferRef mb, StringRef archiveName,
|
||||
uint64_t offsetInArchive, bool lazy) {
|
||||
if (!ctx.arg.fatLTOObjects)
|
||||
return false;
|
||||
Expected<MemoryBufferRef> fatLTOData =
|
||||
IRObjectFile::findBitcodeInMemBuffer(mb);
|
||||
if (errorToBool(fatLTOData.takeError()))
|
||||
return false;
|
||||
auto file = std::make_unique<BitcodeFile>(ctx, *fatLTOData, archiveName,
|
||||
offsetInArchive, lazy);
|
||||
file->obj->fatLTOObject(true);
|
||||
files.push_back(std::move(file));
|
||||
return true;
|
||||
}
|
||||
|
||||
// Opens a file and create a file object. Path has to be resolved already.
|
||||
// Every regular input (not binary-format or linker scripts) is recorded as a
|
||||
// LoadJob. Inside createFiles() jobs batch up and are expanded in parallel at
|
||||
// the end. Outside createFiles() (e.g. addDependentLibrary during parseFiles)
|
||||
// the single job is expanded immediately.
|
||||
void LinkerDriver::addFile(StringRef path, bool withLOption) {
|
||||
using namespace sys::fs;
|
||||
|
||||
@@ -253,93 +238,59 @@ void LinkerDriver::addFile(StringRef path, bool withLOption) {
|
||||
MemoryBufferRef mbref = *buffer;
|
||||
|
||||
if (ctx.arg.formatBinary) {
|
||||
files.push_back(std::make_unique<BinaryFile>(ctx, mbref));
|
||||
if (!isInGroup)
|
||||
++nextGroupId;
|
||||
return;
|
||||
}
|
||||
|
||||
switch (identify_magic(mbref.getBuffer())) {
|
||||
case file_magic::unknown:
|
||||
readLinkerScript(ctx, mbref);
|
||||
return;
|
||||
case file_magic::archive: {
|
||||
auto members = getArchiveMembers(ctx, mbref);
|
||||
if (inWholeArchive) {
|
||||
for (const std::pair<MemoryBufferRef, uint64_t> &p : members) {
|
||||
if (isBitcode(p.first))
|
||||
files.push_back(std::make_unique<BitcodeFile>(ctx, p.first, path,
|
||||
p.second, false));
|
||||
else if (!tryAddFatLTOFile(p.first, path, p.second, false))
|
||||
files.push_back(createObjFile(ctx, p.first, path));
|
||||
loadJobs.push_back({mbref,
|
||||
path,
|
||||
LoadJob::Binary,
|
||||
/*inWholeArchive=*/false,
|
||||
/*lazy=*/false,
|
||||
/*asNeeded=*/false,
|
||||
/*withLOption=*/false,
|
||||
nextGroupId,
|
||||
{},
|
||||
{}});
|
||||
} else {
|
||||
auto magic = identify_magic(mbref.getBuffer());
|
||||
if (magic == file_magic::unknown) {
|
||||
readLinkerScript(ctx, mbref);
|
||||
return;
|
||||
}
|
||||
LoadJob::Kind kind;
|
||||
switch (magic) {
|
||||
case file_magic::archive:
|
||||
kind = LoadJob::Archive;
|
||||
break;
|
||||
case file_magic::elf_relocatable:
|
||||
kind = LoadJob::Obj;
|
||||
break;
|
||||
case file_magic::bitcode:
|
||||
kind = LoadJob::Bitcode;
|
||||
break;
|
||||
case file_magic::elf_shared_object:
|
||||
if (ctx.arg.isStatic) {
|
||||
Err(ctx) << "attempted static link of dynamic object " << path;
|
||||
return;
|
||||
}
|
||||
kind = LoadJob::Shared;
|
||||
break;
|
||||
default:
|
||||
Err(ctx) << path << ": unknown file type";
|
||||
return;
|
||||
}
|
||||
|
||||
archiveFiles.emplace_back(path, members.size());
|
||||
|
||||
// Handle archives and --start-lib/--end-lib using the same code path. This
|
||||
// scans all the ELF relocatable object files and bitcode files in the
|
||||
// archive rather than just the index file, with the benefit that the
|
||||
// symbols are only loaded once. For many projects archives see high
|
||||
// utilization rates and it is a net performance win. --start-lib scans
|
||||
// symbols in the same order that llvm-ar adds them to the index, so in the
|
||||
// common case the semantics are identical. If the archive symbol table was
|
||||
// created in a different order, or is incomplete, this strategy has
|
||||
// different semantics. Such output differences are considered user error.
|
||||
//
|
||||
// All files within the archive get the same group ID to allow mutual
|
||||
// references for --warn-backrefs.
|
||||
SaveAndRestore saved(isInGroup, true);
|
||||
for (const std::pair<MemoryBufferRef, uint64_t> &p : members) {
|
||||
auto magic = identify_magic(p.first.getBuffer());
|
||||
if (magic == file_magic::elf_relocatable) {
|
||||
if (!tryAddFatLTOFile(p.first, path, p.second, true))
|
||||
files.push_back(createObjFile(ctx, p.first, path, true));
|
||||
} else if (magic == file_magic::bitcode)
|
||||
files.push_back(
|
||||
std::make_unique<BitcodeFile>(ctx, p.first, path, p.second, true));
|
||||
else
|
||||
Warn(ctx) << path << ": archive member '"
|
||||
<< p.first.getBufferIdentifier()
|
||||
<< "' is neither ET_REL nor LLVM bitcode";
|
||||
}
|
||||
if (!saved.get())
|
||||
++nextGroupId;
|
||||
return;
|
||||
loadJobs.push_back({mbref,
|
||||
path,
|
||||
kind,
|
||||
inWholeArchive,
|
||||
inLib,
|
||||
ctx.arg.asNeeded,
|
||||
withLOption,
|
||||
nextGroupId,
|
||||
{},
|
||||
{}});
|
||||
}
|
||||
case file_magic::elf_shared_object: {
|
||||
if (ctx.arg.isStatic) {
|
||||
ErrAlways(ctx) << "attempted static link of dynamic object " << path;
|
||||
return;
|
||||
}
|
||||
|
||||
// Shared objects are identified by soname. soname is (if specified)
|
||||
// DT_SONAME and falls back to filename. If a file was specified by -lfoo,
|
||||
// the directory part is ignored. Note that path may be a temporary and
|
||||
// cannot be stored into SharedFile::soName.
|
||||
path = mbref.getBufferIdentifier();
|
||||
auto f = std::make_unique<SharedFile>(
|
||||
ctx, mbref, withLOption ? path::filename(path) : path);
|
||||
f->init();
|
||||
files.push_back(std::move(f));
|
||||
break;
|
||||
}
|
||||
case file_magic::bitcode:
|
||||
files.push_back(std::make_unique<BitcodeFile>(ctx, mbref, "", 0, inLib));
|
||||
break;
|
||||
case file_magic::elf_relocatable:
|
||||
if (!tryAddFatLTOFile(mbref, "", 0, inLib))
|
||||
files.push_back(createObjFile(ctx, mbref, "", inLib));
|
||||
break;
|
||||
default:
|
||||
ErrAlways(ctx) << path << ": unknown file type";
|
||||
return;
|
||||
}
|
||||
// All files within the same --{start,end}-group get the same group ID.
|
||||
// Otherwise, a new file will get a new group ID.
|
||||
if (!isInGroup)
|
||||
++nextGroupId;
|
||||
if (!deferLoad)
|
||||
loadFiles();
|
||||
}
|
||||
|
||||
// Add a given library by searching it from input search paths.
|
||||
@@ -2160,8 +2111,108 @@ static bool isFormatBinary(Ctx &ctx, StringRef s) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Expand LoadJob entries recorded by addFile(). Called in batch from
|
||||
// createFiles() (parallel), or immediately from addFile() for late additions
|
||||
// like dependent libraries (single job, runs inline).
|
||||
void LinkerDriver::loadFiles() {
|
||||
// BitcodeFile / fatLTO constructors call ctx.saver which is not thread-safe.
|
||||
// SharedFile and ObjFile constructors are safe without the mutex.
|
||||
std::mutex mu;
|
||||
auto makeFile = [&](MemoryBufferRef mb, file_magic magic, StringRef arPath,
|
||||
uint64_t offset,
|
||||
bool lazy) -> std::unique_ptr<InputFile> {
|
||||
if (magic == file_magic::bitcode) {
|
||||
std::lock_guard<std::mutex> lk(mu);
|
||||
return std::make_unique<BitcodeFile>(ctx, mb, arPath, offset, lazy);
|
||||
}
|
||||
if (ctx.arg.fatLTOObjects) {
|
||||
Expected<MemoryBufferRef> fatLTOData =
|
||||
IRObjectFile::findBitcodeInMemBuffer(mb);
|
||||
if (!errorToBool(fatLTOData.takeError())) {
|
||||
std::lock_guard<std::mutex> lk(mu);
|
||||
auto f = std::make_unique<BitcodeFile>(ctx, *fatLTOData, arPath, offset,
|
||||
lazy);
|
||||
f->obj->fatLTOObject(true);
|
||||
return f;
|
||||
}
|
||||
}
|
||||
return createObjFile(ctx, mb, arPath, lazy);
|
||||
};
|
||||
|
||||
{
|
||||
llvm::TimeTraceScope timeScope("Parallel load");
|
||||
parallelFor(0, loadJobs.size(), [&](size_t i) {
|
||||
LoadJob &job = loadJobs[i];
|
||||
switch (job.kind) {
|
||||
case LoadJob::Obj:
|
||||
case LoadJob::Bitcode:
|
||||
job.out.push_back(makeFile(job.mbref,
|
||||
job.kind == LoadJob::Bitcode
|
||||
? file_magic::bitcode
|
||||
: file_magic::elf_relocatable,
|
||||
"", 0, job.lazy));
|
||||
break;
|
||||
case LoadJob::Archive: {
|
||||
// Scan all archive members rather than using the archive symbol
|
||||
// index. We assume the archive symbol table order matches the order
|
||||
// of symbols in the member symbol tables. All files within the
|
||||
// archive share the same group ID to allow mutual references for
|
||||
// --warn-backrefs.
|
||||
auto members = getArchiveMembers(ctx, job);
|
||||
job.out.reserve(members.size());
|
||||
bool lazy = !job.inWholeArchive;
|
||||
for (const auto &[mb, offset] : members) {
|
||||
auto mm = identify_magic(mb.getBuffer());
|
||||
if (mm == file_magic::elf_relocatable || mm == file_magic::bitcode ||
|
||||
job.inWholeArchive)
|
||||
job.out.push_back(makeFile(mb, mm, job.path, offset, lazy));
|
||||
else
|
||||
Warn(ctx) << job.path << ": archive member '"
|
||||
<< mb.getBufferIdentifier()
|
||||
<< "' is neither ET_REL nor LLVM bitcode";
|
||||
}
|
||||
break;
|
||||
}
|
||||
case LoadJob::Shared: {
|
||||
// Shared objects are identified by soname. soname is (if specified)
|
||||
// DT_SONAME and falls back to filename. If a file was specified by
|
||||
// -lfoo, the directory part is ignored.
|
||||
StringRef bufPath = job.mbref.getBufferIdentifier();
|
||||
auto f = std::make_unique<SharedFile>(
|
||||
ctx, job.mbref,
|
||||
job.withLOption ? path::filename(bufPath) : bufPath);
|
||||
f->init();
|
||||
f->isNeeded = !job.asNeeded;
|
||||
job.out.push_back(std::move(f));
|
||||
break;
|
||||
}
|
||||
case LoadJob::Binary:
|
||||
job.out.push_back(std::make_unique<BinaryFile>(ctx, job.mbref));
|
||||
break;
|
||||
}
|
||||
for (auto &m : job.out)
|
||||
m->groupId = job.groupId;
|
||||
});
|
||||
}
|
||||
|
||||
size_t numFiles = 0;
|
||||
for (auto &job : loadJobs)
|
||||
numFiles += job.out.size();
|
||||
files.reserve(files.size() + numFiles);
|
||||
for (auto &job : loadJobs) {
|
||||
if (job.kind == LoadJob::Archive)
|
||||
archiveFiles.emplace_back(job.path, (unsigned)job.out.size());
|
||||
files.append(std::make_move_iterator(job.out.begin()),
|
||||
std::make_move_iterator(job.out.end()));
|
||||
ctx.memoryBuffers.append(std::make_move_iterator(job.thinBufs.begin()),
|
||||
std::make_move_iterator(job.thinBufs.end()));
|
||||
}
|
||||
loadJobs.clear();
|
||||
}
|
||||
|
||||
void LinkerDriver::createFiles(opt::InputArgList &args) {
|
||||
llvm::TimeTraceScope timeScope("Load input files");
|
||||
SaveAndRestore saveDefer(deferLoad, true);
|
||||
// For --{push,pop}-state.
|
||||
std::vector<std::tuple<bool, bool, bool>> stack;
|
||||
|
||||
@@ -2283,6 +2334,7 @@ void LinkerDriver::createFiles(opt::InputArgList &args) {
|
||||
|
||||
if (defaultScript && !hasScript)
|
||||
readLinkerScript(ctx, *defaultScript);
|
||||
loadFiles();
|
||||
if (files.empty() && !hasInput && errCount(ctx) == 0)
|
||||
ErrAlways(ctx) << "no input files";
|
||||
}
|
||||
|
||||
@@ -207,7 +207,7 @@ static void updateSupportedARMFeatures(Ctx &ctx,
|
||||
}
|
||||
|
||||
InputFile::InputFile(Ctx &ctx, Kind k, MemoryBufferRef m)
|
||||
: ctx(ctx), mb(m), groupId(ctx.driver.nextGroupId), fileKind(k) {}
|
||||
: ctx(ctx), mb(m), fileKind(k) {}
|
||||
|
||||
InputFile::~InputFile() {}
|
||||
|
||||
|
||||
@@ -130,7 +130,7 @@ public:
|
||||
// checking feature. All files within the same --{start,end}-group or
|
||||
// --{start,end}-lib get the same group ID. Otherwise, each file gets a new
|
||||
// group ID. For more info, see checkDependency() in SymbolTable.cpp.
|
||||
uint32_t groupId;
|
||||
uint32_t groupId = 0;
|
||||
|
||||
// If this is an architecture-specific file, the following members
|
||||
// have ELF type (i.e. ELF{32,64}{LE,BE}) and target machine type.
|
||||
|
||||
Reference in New Issue
Block a user