[ELF] Optimize binary search in getSectionPiece (#187916)
Two optimizations to make getSectionPiece O(1) for common cases:
1. For non-string fixed-size merge sections, use direct computation
(offset / entsize) instead of binary search.
2. Pre-resolve piece indices for non-section Defined symbols during
splitSections. The piece index and intra-piece offset are packed
into Defined::value as ((pieceIdx+1) << 32) | intraPieceOffset,
replacing repeated binary searches (MarkLive, includeInSymtab,
getRelocTargetVA) with a single upfront resolution.
On x86-64, references to mergeable strings use local labels:
leaq .LC0(%rip), %rax # R_X86_64_PC32 .LC0-4
The relocations use non-section symbols and benefit from optimization 2.
On many other targets (e.g. AArch64), the addend is 0 and the assembler
adjusts such relocations to reference section symbols, which still use
binary search.
On a clang link (clang-relassert reproduce tarball, x86-64):
- --gc-sections: 1.05x as fast
This commit is contained in:
@@ -1548,13 +1548,25 @@ void MergeInputSection::splitIntoPieces() {
|
||||
}
|
||||
|
||||
SectionPiece &MergeInputSection::getSectionPiece(uint64_t offset) {
|
||||
// Pre-resolved by splitSections: pieceIdx + 1 in upper bits,
|
||||
// intra-piece offset in lower bits.
|
||||
if (uint32_t idx = offset >> mergeValueShift)
|
||||
return pieces[idx - 1];
|
||||
assert(offset < content().size());
|
||||
// For non-string fixed-size records, piece index = offset / entsize.
|
||||
if (!(flags & SHF_STRINGS))
|
||||
return pieces[offset / entsize];
|
||||
return partition_point(
|
||||
pieces, [=](SectionPiece p) { return p.inputOff <= offset; })[-1];
|
||||
}
|
||||
|
||||
// Return the offset in an output section for a given input offset.
|
||||
uint64_t MergeInputSection::getParentOffset(uint64_t offset) const {
|
||||
// Pre-resolved by splitSections: pieceIdx + 1 in upper bits,
|
||||
// intra-piece offset in lower bits.
|
||||
if (uint32_t idx = offset >> mergeValueShift)
|
||||
return pieces[idx - 1].outputOff +
|
||||
(offset & llvm::maskTrailingOnes<uint64_t>(mergeValueShift));
|
||||
const SectionPiece &piece = getSectionPiece(offset);
|
||||
return piece.outputOff + (offset - piece.inputOff);
|
||||
}
|
||||
|
||||
@@ -326,6 +326,10 @@ struct SectionPiece {
|
||||
|
||||
static_assert(sizeof(SectionPiece) == 16, "SectionPiece is too big");
|
||||
|
||||
// Used by splitSections to pre-resolve section piece indexes. 32 bits of offset
|
||||
// supports section piece up to 4GB.
|
||||
constexpr unsigned mergeValueShift = 32;
|
||||
|
||||
// This corresponds to a SHF_MERGE section of an input file.
|
||||
class MergeInputSection : public InputSectionBase {
|
||||
public:
|
||||
@@ -339,7 +343,8 @@ public:
|
||||
void splitIntoPieces();
|
||||
|
||||
// Translate an offset in the input section to an offset in the parent
|
||||
// MergeSyntheticSection.
|
||||
// MergeSyntheticSection. If the offset was pre-resolved by
|
||||
// resolveSymbolPieces (upper bits non-zero), this is O(1).
|
||||
uint64_t getParentOffset(uint64_t offset) const;
|
||||
|
||||
// Splittable sections are handled as a sequence of data
|
||||
|
||||
@@ -3809,6 +3809,26 @@ template <class ELFT> void elf::splitSections(Ctx &ctx) {
|
||||
else if (auto *eh = dyn_cast<EhInputSection>(sec))
|
||||
eh->split<ELFT>();
|
||||
}
|
||||
|
||||
// For non-section Defined symbols in merge sections, pre-resolve the piece
|
||||
// index to avoid potentially repeated binary search (MarkLive, RelocScan,
|
||||
// includeInSymtab). Encode each non-section Defined symbol's value as
|
||||
// ((pieceIdx + 1) << mergeValueShift) | intraPieceOffset.
|
||||
auto resolve = [](Defined *d) {
|
||||
auto *ms = dyn_cast_or_null<MergeInputSection>(d->section);
|
||||
if (!ms || d->isSection())
|
||||
return;
|
||||
SectionPiece &piece = ms->getSectionPiece(d->value);
|
||||
uint32_t idx = &piece - ms->pieces.data();
|
||||
uint64_t off = d->value - piece.inputOff;
|
||||
d->value = ((uint64_t)(idx + 1) << mergeValueShift) | off;
|
||||
};
|
||||
for (Symbol *sym : file->getLocalSymbols())
|
||||
if (auto *d = dyn_cast<Defined>(sym))
|
||||
resolve(d);
|
||||
for (Symbol *sym : file->getGlobalSymbols())
|
||||
if (auto *d = dyn_cast<Defined>(sym); d && d->file == file)
|
||||
resolve(d);
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
# CHECK-NEXT: [[PREFIX]]: {{.*}}:(.foo): offset 0xffffffffffffffff is outside the section
|
||||
## .rodata.str1.1 is "abc\0" (4 bytes). offset<=size is accepted.
|
||||
# CHECK-NEXT: [[PREFIX]]: {{.*}}:(.rodata.str1.1): offset 0x5 is outside the section
|
||||
## .data.retain references .foo-1 as well.
|
||||
## .data.retain references .foo-2 as well.
|
||||
# CHECK-NEXT: [[PREFIX]]: {{.*}}:(.foo): offset 0xfffffffffffffffe is outside the section
|
||||
|
||||
## Test that --gc-sections with an out-of-bounds offset doesn't crash.
|
||||
|
||||
Reference in New Issue
Block a user