[ELF] Handle INCLUDE like a call stack (#193427)

The lexer maintains a stack of buffers, which allows a construct
started in an INCLUDE'd file to be closed by the parent. This produces
spurious acceptance of malformed scripts (e.g. a bare assignment with
no trailing `;` in the include, terminated by the parent's `;` after
`INCLUDE`) and undefined-behavior span computations in
`readAssignment`'s `commandString` (issue #190376).

Force each INCLUDE to fully parse its own content, similar to a call
stack frame. `ScriptLexer::lex` no longer auto-pops on EOF; the
`buffers` member is gone. `readInclude` takes a `function_ref<void()>`
callback, and the four call sites (top-level, SECTIONS, output
section, MEMORY) pass a context-appropriate parser.

With this, each buffer contains complete parser structures by
construction, so the `[oldS, curTok)` pointer range in
`readAssignment` no longer needs a guard.
This commit is contained in:
Fangrui Song
2026-04-22 19:59:00 -07:00
committed by GitHub
parent 96bc719fba
commit 2855525c4a
7 changed files with 110 additions and 57 deletions

View File

@@ -52,14 +52,13 @@ ScriptLexer::Buffer::Buffer(Ctx &ctx, MemoryBufferRef mb)
}
ScriptLexer::ScriptLexer(Ctx &ctx, MemoryBufferRef mb)
: ctx(ctx), curBuf(ctx, mb), mbs(1, mb) {
: ctx(ctx), curBuf(ctx, mb) {
activeFilenames.insert(mb.getBufferIdentifier());
}
// Returns a whole line containing the current token.
StringRef ScriptLexer::getLine() {
StringRef s = getCurrentMB().getBuffer();
StringRef s(curBuf.begin, curBuf.s.end() - curBuf.begin);
size_t pos = s.rfind('\n', prevTok.data() - s.data());
if (pos != StringRef::npos)
s = s.substr(pos + 1);
@@ -72,8 +71,7 @@ size_t ScriptLexer::getColumnNumber() {
}
std::string ScriptLexer::getCurrentLocation() {
std::string filename = std::string(getCurrentMB().getBufferIdentifier());
return (filename + ":" + Twine(prevTokLine)).str();
return (curBuf.filename + ":" + Twine(prevTokLine)).str();
}
// We don't want to record cascading errors. Keep only the first one.
@@ -93,15 +91,10 @@ void ScriptLexer::lex() {
StringRef &s = curBuf.s;
s = skipSpace(s);
if (s.empty()) {
// If this buffer is from an INCLUDE command, switch to the "return
// value"; otherwise, mark EOF.
if (buffers.empty()) {
eof = true;
return;
}
activeFilenames.erase(curBuf.filename);
curBuf = buffers.pop_back_val();
continue;
// If this buffer is from an INCLUDE, the caller is responsible for
// popping to the parent buffer.
eof = true;
return;
}
curTokState = lexState;
@@ -275,17 +268,3 @@ ScriptLexer::Token ScriptLexer::till(StringRef tok) {
setError("unexpected EOF");
return {};
}
// Returns true if S encloses T.
static bool encloses(StringRef s, StringRef t) {
return s.bytes_begin() <= t.bytes_begin() && t.bytes_end() <= s.bytes_end();
}
MemoryBufferRef ScriptLexer::getCurrentMB() {
// Find input buffer containing the current token.
assert(!mbs.empty());
for (MemoryBufferRef mb : mbs)
if (encloses(mb.getBuffer(), curBuf.s))
return mb;
llvm_unreachable("getCurrentMB: failed to find a token");
}

View File

@@ -14,7 +14,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/MemoryBufferRef.h"
#include <vector>
namespace lld::elf {
struct Ctx;
@@ -34,9 +33,9 @@ protected:
Buffer(Ctx &ctx, MemoryBufferRef mb);
};
Ctx &ctx;
// The current buffer and parent buffers due to INCLUDE.
// The currently lexed buffer. INCLUDE runs a nested parse on a new `Buffer`,
// similar to a call stack frame.
Buffer curBuf;
SmallVector<Buffer, 0> buffers;
// Used to detect INCLUDE() cycles.
llvm::DenseSet<StringRef> activeFilenames;
@@ -80,9 +79,6 @@ public:
void expect(StringRef expect);
Token till(StringRef tok);
std::string getCurrentLocation();
MemoryBufferRef getCurrentMB();
std::vector<MemoryBufferRef> mbs;
private:
StringRef getLine();

View File

@@ -59,7 +59,7 @@ private:
void readEntry();
void readExtern();
void readGroup();
void readInclude();
void readInclude(llvm::function_ref<void()> parse);
void readInput();
void readLinkerScriptStmt(StringRef tok);
void readMemory();
@@ -74,6 +74,7 @@ private:
void readSections();
void readSectionsStmt(SmallVectorImpl<SectionCommand *> &v, StringRef tok);
void readOutputSectionStmt(OutputSection &osec, StringRef tok);
void readStmts(llvm::function_ref<void(StringRef)> readStmt);
void readTarget();
void readVersion();
void readVersionScriptCommand();
@@ -239,12 +240,7 @@ void ScriptParser::readVersion() {
}
void ScriptParser::readLinkerScript() {
while (!atEOF()) {
StringRef tok = next();
if (atEOF())
break;
readLinkerScriptStmt(tok);
}
readStmts([&](StringRef t) { readLinkerScriptStmt(t); });
}
void ScriptParser::readLinkerScriptStmt(StringRef tok) {
@@ -258,7 +254,8 @@ void ScriptParser::readLinkerScriptStmt(StringRef tok) {
} else if (tok == "GROUP") {
readGroup();
} else if (tok == "INCLUDE") {
readInclude();
readInclude(
[&] { readStmts([&](StringRef t) { readLinkerScriptStmt(t); }); });
} else if (tok == "INPUT") {
readInput();
} else if (tok == "MEMORY") {
@@ -303,8 +300,7 @@ void ScriptParser::readDefsym() {
Expr e = readExpr();
if (!atEOF())
setError("EOF expected, but got " + next());
auto *cmd = make<SymbolAssignment>(
name, e, 0, getCurrentMB().getBufferIdentifier().str());
auto *cmd = make<SymbolAssignment>(name, e, 0, curBuf.filename.str());
ctx.script->sectionCommands.push_back(cmd);
}
@@ -346,8 +342,7 @@ void ScriptParser::addFile(StringRef s) {
ctx.driver.addLibrary(s.substr(2));
} else {
// Case 4: s is a relative path. Search in the directory of the script file.
std::string filename = std::string(getCurrentMB().getBufferIdentifier());
StringRef directory = sys::path::parent_path(filename);
StringRef directory = sys::path::parent_path(curBuf.filename);
if (!directory.empty()) {
SmallString<0> path(directory);
sys::path::append(path, s);
@@ -400,22 +395,41 @@ void ScriptParser::readGroup() {
++ctx.driver.nextGroupId;
}
void ScriptParser::readInclude() {
void ScriptParser::readInclude(llvm::function_ref<void()> parse) {
StringRef name = readName();
if (!activeFilenames.insert(name).second) {
setError("there is a cycle in linker script INCLUDEs");
return;
}
if (std::optional<std::string> path = searchScript(ctx, name)) {
if (std::optional<MemoryBufferRef> mb = readFile(ctx, *path)) {
buffers.push_back(curBuf);
curBuf = Buffer(ctx, *mb);
mbs.push_back(*mb);
}
std::optional<std::string> path = searchScript(ctx, name);
if (!path) {
setError("cannot find linker script " + name);
return;
}
setError("cannot find linker script " + name);
std::optional<MemoryBufferRef> mb = readFile(ctx, *path);
if (!mb)
return;
SaveAndRestore savedBuf(curBuf, Buffer(ctx, *mb));
SaveAndRestore savedPrevTok(prevTok, StringRef());
SaveAndRestore savedPrevTokLine(prevTokLine, size_t(1));
parse();
// parse() leaves `eof` true on normal completion; reset so the parent
// buffer continues to be lexed.
eof = false;
activeFilenames.erase(name);
}
// Drive `readStmt` on each token until EOF of the current buffer.
void ScriptParser::readStmts(llvm::function_ref<void(StringRef)> readStmt) {
while (!atEOF()) {
StringRef tok = next();
if (atEOF())
return;
readStmt(tok);
}
}
void ScriptParser::readInput() {
@@ -707,7 +721,8 @@ void ScriptParser::readSectionsStmt(SmallVectorImpl<SectionCommand *> &v,
return;
}
if (tok == "INCLUDE") {
readInclude();
readInclude(
[&] { readStmts([&](StringRef t) { readSectionsStmt(v, t); }); });
return;
}
@@ -1097,7 +1112,9 @@ void ScriptParser::readOutputSectionStmt(OutputSection &osec, StringRef tok) {
} else if (tok == "SORT") {
readSort();
} else if (tok == "INCLUDE") {
readInclude();
readInclude([&] {
readStmts([&](StringRef t) { readOutputSectionStmt(osec, t); });
});
} else if (tok == "(" || tok == ")") {
setError("expected filename pattern");
} else if (peek() == "(") {
@@ -1856,7 +1873,7 @@ void ScriptParser::readMemory() {
void ScriptParser::readMemoryStmt(StringRef tok) {
if (tok == "INCLUDE") {
readInclude();
readInclude([&] { readStmts([&](StringRef t) { readMemoryStmt(t); }); });
return;
}

View File

@@ -0,0 +1,18 @@
# REQUIRES: x86
# RUN: rm -rf %t && split-file %s %t && cd %t
# RUN: llvm-mc -filetype=obj -triple=x86_64 a.s -o a.o
## A stray ';' in the parent after INCLUDE cannot complete the inner assignment.
# RUN: not ld.lld a.o -T top.lds 2>&1 | FileCheck %s --check-prefix=TOP
# TOP: error: inc-top.lds:1: unexpected EOF
#--- top.lds
INCLUDE "inc-top.lds";
#--- inc-top.lds
foo = 1
#--- a.s
.globl _start
_start:
ret

View File

@@ -17,6 +17,17 @@
# EMPTY: LOAD {{.*}} 0x0000000000001000 0x0000000000001000 {{.*}} R E
# EMPTY-NEXT: LOAD {{.*}} 0x0000000000002000 0x0000000000002000 {{.*}} RW
## A region declaration truncated mid-expression cannot be completed by the
## parent MEMORY { ... }.
# RUN: cp trunc.lds inc.lds
# RUN: not ld.lld -T a.lds a.o 2>&1 | FileCheck %s --check-prefix=TRUNC
# TRUNC: error: inc.lds:1: unexpected EOF
## A stray '}' in the include cannot close the parent MEMORY { ... }.
# RUN: cp brace.lds inc.lds
# RUN: not ld.lld -T a.lds a.o 2>&1 | FileCheck %s --check-prefix=BRACE
# BRACE: error: inc.lds:1: unexpected EOF
#--- a.s
.section .text,"ax"
.global _start
@@ -54,3 +65,9 @@ SECTIONS {
}
#--- inc-empty.lds
#--- trunc.lds
RAM3 : ORIGIN = 0x4000, LENGTH
#--- brace.lds
}

View File

@@ -16,6 +16,12 @@
# RUN: llvm-objdump --section-headers a.out | FileCheck %s --check-prefix=CHECK2
# CHECK2: .data 00000010 0000000000002000 DATA
## A BYTE() with an unclosed paren in the include cannot be completed by the
## parent output-section body.
# RUN: cp trunc.lds inc.lds
# RUN: not ld.lld -T a.lds a.o 2>&1 | FileCheck %s --check-prefix=TRUNC
# TRUNC: error: inc.lds:1: unexpected EOF
#--- a.s
.section .text,"ax"
.global _start
@@ -42,3 +48,6 @@ SECTIONS {
#--- full.lds
QUAD(0)
#--- trunc.lds
BYTE(42

View File

@@ -19,6 +19,17 @@
# CHECK2-NEXT: .data2 00000008 0000000000002008 DATA
# CHECK2-NEXT: .data3 00000008 0000000000002010 DATA
## An unclosed output section in the include cannot be closed by the outer
## SECTIONS { ... } '}'.
# RUN: cp trunc.lds inc.lds
# RUN: not ld.lld -T a.lds a.o 2>&1 | FileCheck %s --check-prefix=TRUNC
# TRUNC: error: inc.lds:1: unexpected EOF
## A stray '}' in the include cannot close the parent SECTIONS { ... }.
# RUN: cp brace.lds inc.lds
# RUN: not ld.lld -T a.lds a.o 2>&1 | FileCheck %s --check-prefix=BRACE
# BRACE: error: inc.lds:1: unexpected EOF
#--- a.s
.global _start
_start: nop
@@ -43,3 +54,9 @@ SECTIONS {
#--- full.lds
.data2 : { QUAD(0) } > RAM
#--- trunc.lds
.text : { *(.text*)
#--- brace.lds
}