[ELF] Optimize binary search in getSectionPiece (#187916)

Two optimizations to make getSectionPiece O(1) for common cases:

1. For non-string fixed-size merge sections, use direct computation
   (offset / entsize) instead of binary search.

2. Pre-resolve piece indices for non-section Defined symbols during
   splitSections. The piece index and intra-piece offset are packed
   into Defined::value as ((pieceIdx+1) << 32) | intraPieceOffset,
   replacing repeated binary searches (MarkLive, includeInSymtab,
   getRelocTargetVA) with a single upfront resolution.

On x86-64, references to mergeable strings use local labels:

    leaq .LC0(%rip), %rax  # R_X86_64_PC32 .LC0-4

The relocations use non-section symbols and benefit from optimization 2.
On many other targets (e.g. AArch64), the addend is 0 and the assembler
adjusts such relocations to reference section symbols, which still use
binary search.

On a clang link (clang-relassert reproduce tarball, x86-64):
- --gc-sections: 1.05x as fast
This commit is contained in:
Fangrui Song
2026-03-30 20:51:30 -07:00
committed by GitHub
parent 1ec7e86b3a
commit 42cc454777
4 changed files with 39 additions and 2 deletions

View File

@@ -1548,13 +1548,25 @@ void MergeInputSection::splitIntoPieces() {
}
SectionPiece &MergeInputSection::getSectionPiece(uint64_t offset) {
// Pre-resolved by splitSections: pieceIdx + 1 in upper bits,
// intra-piece offset in lower bits.
if (uint32_t idx = offset >> mergeValueShift)
return pieces[idx - 1];
assert(offset < content().size());
// For non-string fixed-size records, piece index = offset / entsize.
if (!(flags & SHF_STRINGS))
return pieces[offset / entsize];
return partition_point(
pieces, [=](SectionPiece p) { return p.inputOff <= offset; })[-1];
}
// Return the offset in an output section for a given input offset.
uint64_t MergeInputSection::getParentOffset(uint64_t offset) const {
// Pre-resolved by splitSections: pieceIdx + 1 in upper bits,
// intra-piece offset in lower bits.
if (uint32_t idx = offset >> mergeValueShift)
return pieces[idx - 1].outputOff +
(offset & llvm::maskTrailingOnes<uint64_t>(mergeValueShift));
const SectionPiece &piece = getSectionPiece(offset);
return piece.outputOff + (offset - piece.inputOff);
}

View File

@@ -326,6 +326,10 @@ struct SectionPiece {
static_assert(sizeof(SectionPiece) == 16, "SectionPiece is too big");
// Used by splitSections to pre-resolve section piece indexes. 32 bits of offset
// supports section piece up to 4GB.
constexpr unsigned mergeValueShift = 32;
// This corresponds to a SHF_MERGE section of an input file.
class MergeInputSection : public InputSectionBase {
public:
@@ -339,7 +343,8 @@ public:
void splitIntoPieces();
// Translate an offset in the input section to an offset in the parent
// MergeSyntheticSection.
// MergeSyntheticSection. If the offset was pre-resolved by
// resolveSymbolPieces (upper bits non-zero), this is O(1).
uint64_t getParentOffset(uint64_t offset) const;
// Splittable sections are handled as a sequence of data

View File

@@ -3809,6 +3809,26 @@ template <class ELFT> void elf::splitSections(Ctx &ctx) {
else if (auto *eh = dyn_cast<EhInputSection>(sec))
eh->split<ELFT>();
}
// For non-section Defined symbols in merge sections, pre-resolve the piece
// index to avoid potentially repeated binary search (MarkLive, RelocScan,
// includeInSymtab). Encode each non-section Defined symbol's value as
// ((pieceIdx + 1) << mergeValueShift) | intraPieceOffset.
auto resolve = [](Defined *d) {
auto *ms = dyn_cast_or_null<MergeInputSection>(d->section);
if (!ms || d->isSection())
return;
SectionPiece &piece = ms->getSectionPiece(d->value);
uint32_t idx = &piece - ms->pieces.data();
uint64_t off = d->value - piece.inputOff;
d->value = ((uint64_t)(idx + 1) << mergeValueShift) | off;
};
for (Symbol *sym : file->getLocalSymbols())
if (auto *d = dyn_cast<Defined>(sym))
resolve(d);
for (Symbol *sym : file->getGlobalSymbols())
if (auto *d = dyn_cast<Defined>(sym); d && d->file == file)
resolve(d);
});
}

View File

@@ -12,7 +12,7 @@
# CHECK-NEXT: [[PREFIX]]: {{.*}}:(.foo): offset 0xffffffffffffffff is outside the section
## .rodata.str1.1 is "abc\0" (4 bytes). offset<=size is accepted.
# CHECK-NEXT: [[PREFIX]]: {{.*}}:(.rodata.str1.1): offset 0x5 is outside the section
## .data.retain references .foo-1 as well.
## .data.retain references .foo-2 as well.
# CHECK-NEXT: [[PREFIX]]: {{.*}}:(.foo): offset 0xfffffffffffffffe is outside the section
## Test that --gc-sections with an out-of-bounds offset doesn't crash.