[ELF] Handle INCLUDE like a call stack (#193427)
The lexer maintains a stack of buffers, which allows a construct started in an INCLUDE'd file to be closed by the parent. This produces spurious acceptance of malformed scripts (e.g. a bare assignment with no trailing `;` in the include, terminated by the parent's `;` after `INCLUDE`) and undefined-behavior span computations in `readAssignment`'s `commandString` (issue #190376). Force each INCLUDE to fully parse its own content, similar to a call stack frame. `ScriptLexer::lex` no longer auto-pops on EOF; the `buffers` member is gone. `readInclude` takes a `function_ref<void()>` callback, and the four call sites (top-level, SECTIONS, output section, MEMORY) pass a context-appropriate parser. With this, each buffer contains complete parser structures by construction, so the `[oldS, curTok)` pointer range in `readAssignment` no longer needs a guard.
This commit is contained in:
@@ -52,14 +52,13 @@ ScriptLexer::Buffer::Buffer(Ctx &ctx, MemoryBufferRef mb)
|
||||
}
|
||||
|
||||
ScriptLexer::ScriptLexer(Ctx &ctx, MemoryBufferRef mb)
|
||||
: ctx(ctx), curBuf(ctx, mb), mbs(1, mb) {
|
||||
: ctx(ctx), curBuf(ctx, mb) {
|
||||
activeFilenames.insert(mb.getBufferIdentifier());
|
||||
}
|
||||
|
||||
// Returns a whole line containing the current token.
|
||||
StringRef ScriptLexer::getLine() {
|
||||
StringRef s = getCurrentMB().getBuffer();
|
||||
|
||||
StringRef s(curBuf.begin, curBuf.s.end() - curBuf.begin);
|
||||
size_t pos = s.rfind('\n', prevTok.data() - s.data());
|
||||
if (pos != StringRef::npos)
|
||||
s = s.substr(pos + 1);
|
||||
@@ -72,8 +71,7 @@ size_t ScriptLexer::getColumnNumber() {
|
||||
}
|
||||
|
||||
std::string ScriptLexer::getCurrentLocation() {
|
||||
std::string filename = std::string(getCurrentMB().getBufferIdentifier());
|
||||
return (filename + ":" + Twine(prevTokLine)).str();
|
||||
return (curBuf.filename + ":" + Twine(prevTokLine)).str();
|
||||
}
|
||||
|
||||
// We don't want to record cascading errors. Keep only the first one.
|
||||
@@ -93,15 +91,10 @@ void ScriptLexer::lex() {
|
||||
StringRef &s = curBuf.s;
|
||||
s = skipSpace(s);
|
||||
if (s.empty()) {
|
||||
// If this buffer is from an INCLUDE command, switch to the "return
|
||||
// value"; otherwise, mark EOF.
|
||||
if (buffers.empty()) {
|
||||
eof = true;
|
||||
return;
|
||||
}
|
||||
activeFilenames.erase(curBuf.filename);
|
||||
curBuf = buffers.pop_back_val();
|
||||
continue;
|
||||
// If this buffer is from an INCLUDE, the caller is responsible for
|
||||
// popping to the parent buffer.
|
||||
eof = true;
|
||||
return;
|
||||
}
|
||||
curTokState = lexState;
|
||||
|
||||
@@ -275,17 +268,3 @@ ScriptLexer::Token ScriptLexer::till(StringRef tok) {
|
||||
setError("unexpected EOF");
|
||||
return {};
|
||||
}
|
||||
|
||||
// Returns true if S encloses T.
|
||||
static bool encloses(StringRef s, StringRef t) {
|
||||
return s.bytes_begin() <= t.bytes_begin() && t.bytes_end() <= s.bytes_end();
|
||||
}
|
||||
|
||||
MemoryBufferRef ScriptLexer::getCurrentMB() {
|
||||
// Find input buffer containing the current token.
|
||||
assert(!mbs.empty());
|
||||
for (MemoryBufferRef mb : mbs)
|
||||
if (encloses(mb.getBuffer(), curBuf.s))
|
||||
return mb;
|
||||
llvm_unreachable("getCurrentMB: failed to find a token");
|
||||
}
|
||||
|
||||
@@ -14,7 +14,6 @@
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/Support/MemoryBufferRef.h"
|
||||
#include <vector>
|
||||
|
||||
namespace lld::elf {
|
||||
struct Ctx;
|
||||
@@ -34,9 +33,9 @@ protected:
|
||||
Buffer(Ctx &ctx, MemoryBufferRef mb);
|
||||
};
|
||||
Ctx &ctx;
|
||||
// The current buffer and parent buffers due to INCLUDE.
|
||||
// The currently lexed buffer. INCLUDE runs a nested parse on a new `Buffer`,
|
||||
// similar to a call stack frame.
|
||||
Buffer curBuf;
|
||||
SmallVector<Buffer, 0> buffers;
|
||||
|
||||
// Used to detect INCLUDE() cycles.
|
||||
llvm::DenseSet<StringRef> activeFilenames;
|
||||
@@ -80,9 +79,6 @@ public:
|
||||
void expect(StringRef expect);
|
||||
Token till(StringRef tok);
|
||||
std::string getCurrentLocation();
|
||||
MemoryBufferRef getCurrentMB();
|
||||
|
||||
std::vector<MemoryBufferRef> mbs;
|
||||
|
||||
private:
|
||||
StringRef getLine();
|
||||
|
||||
@@ -59,7 +59,7 @@ private:
|
||||
void readEntry();
|
||||
void readExtern();
|
||||
void readGroup();
|
||||
void readInclude();
|
||||
void readInclude(llvm::function_ref<void()> parse);
|
||||
void readInput();
|
||||
void readLinkerScriptStmt(StringRef tok);
|
||||
void readMemory();
|
||||
@@ -74,6 +74,7 @@ private:
|
||||
void readSections();
|
||||
void readSectionsStmt(SmallVectorImpl<SectionCommand *> &v, StringRef tok);
|
||||
void readOutputSectionStmt(OutputSection &osec, StringRef tok);
|
||||
void readStmts(llvm::function_ref<void(StringRef)> readStmt);
|
||||
void readTarget();
|
||||
void readVersion();
|
||||
void readVersionScriptCommand();
|
||||
@@ -239,12 +240,7 @@ void ScriptParser::readVersion() {
|
||||
}
|
||||
|
||||
void ScriptParser::readLinkerScript() {
|
||||
while (!atEOF()) {
|
||||
StringRef tok = next();
|
||||
if (atEOF())
|
||||
break;
|
||||
readLinkerScriptStmt(tok);
|
||||
}
|
||||
readStmts([&](StringRef t) { readLinkerScriptStmt(t); });
|
||||
}
|
||||
|
||||
void ScriptParser::readLinkerScriptStmt(StringRef tok) {
|
||||
@@ -258,7 +254,8 @@ void ScriptParser::readLinkerScriptStmt(StringRef tok) {
|
||||
} else if (tok == "GROUP") {
|
||||
readGroup();
|
||||
} else if (tok == "INCLUDE") {
|
||||
readInclude();
|
||||
readInclude(
|
||||
[&] { readStmts([&](StringRef t) { readLinkerScriptStmt(t); }); });
|
||||
} else if (tok == "INPUT") {
|
||||
readInput();
|
||||
} else if (tok == "MEMORY") {
|
||||
@@ -303,8 +300,7 @@ void ScriptParser::readDefsym() {
|
||||
Expr e = readExpr();
|
||||
if (!atEOF())
|
||||
setError("EOF expected, but got " + next());
|
||||
auto *cmd = make<SymbolAssignment>(
|
||||
name, e, 0, getCurrentMB().getBufferIdentifier().str());
|
||||
auto *cmd = make<SymbolAssignment>(name, e, 0, curBuf.filename.str());
|
||||
ctx.script->sectionCommands.push_back(cmd);
|
||||
}
|
||||
|
||||
@@ -346,8 +342,7 @@ void ScriptParser::addFile(StringRef s) {
|
||||
ctx.driver.addLibrary(s.substr(2));
|
||||
} else {
|
||||
// Case 4: s is a relative path. Search in the directory of the script file.
|
||||
std::string filename = std::string(getCurrentMB().getBufferIdentifier());
|
||||
StringRef directory = sys::path::parent_path(filename);
|
||||
StringRef directory = sys::path::parent_path(curBuf.filename);
|
||||
if (!directory.empty()) {
|
||||
SmallString<0> path(directory);
|
||||
sys::path::append(path, s);
|
||||
@@ -400,22 +395,41 @@ void ScriptParser::readGroup() {
|
||||
++ctx.driver.nextGroupId;
|
||||
}
|
||||
|
||||
void ScriptParser::readInclude() {
|
||||
void ScriptParser::readInclude(llvm::function_ref<void()> parse) {
|
||||
StringRef name = readName();
|
||||
if (!activeFilenames.insert(name).second) {
|
||||
setError("there is a cycle in linker script INCLUDEs");
|
||||
return;
|
||||
}
|
||||
|
||||
if (std::optional<std::string> path = searchScript(ctx, name)) {
|
||||
if (std::optional<MemoryBufferRef> mb = readFile(ctx, *path)) {
|
||||
buffers.push_back(curBuf);
|
||||
curBuf = Buffer(ctx, *mb);
|
||||
mbs.push_back(*mb);
|
||||
}
|
||||
std::optional<std::string> path = searchScript(ctx, name);
|
||||
if (!path) {
|
||||
setError("cannot find linker script " + name);
|
||||
return;
|
||||
}
|
||||
setError("cannot find linker script " + name);
|
||||
std::optional<MemoryBufferRef> mb = readFile(ctx, *path);
|
||||
if (!mb)
|
||||
return;
|
||||
|
||||
SaveAndRestore savedBuf(curBuf, Buffer(ctx, *mb));
|
||||
SaveAndRestore savedPrevTok(prevTok, StringRef());
|
||||
SaveAndRestore savedPrevTokLine(prevTokLine, size_t(1));
|
||||
parse();
|
||||
|
||||
// parse() leaves `eof` true on normal completion; reset so the parent
|
||||
// buffer continues to be lexed.
|
||||
eof = false;
|
||||
activeFilenames.erase(name);
|
||||
}
|
||||
|
||||
// Drive `readStmt` on each token until EOF of the current buffer.
|
||||
void ScriptParser::readStmts(llvm::function_ref<void(StringRef)> readStmt) {
|
||||
while (!atEOF()) {
|
||||
StringRef tok = next();
|
||||
if (atEOF())
|
||||
return;
|
||||
readStmt(tok);
|
||||
}
|
||||
}
|
||||
|
||||
void ScriptParser::readInput() {
|
||||
@@ -707,7 +721,8 @@ void ScriptParser::readSectionsStmt(SmallVectorImpl<SectionCommand *> &v,
|
||||
return;
|
||||
}
|
||||
if (tok == "INCLUDE") {
|
||||
readInclude();
|
||||
readInclude(
|
||||
[&] { readStmts([&](StringRef t) { readSectionsStmt(v, t); }); });
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -1097,7 +1112,9 @@ void ScriptParser::readOutputSectionStmt(OutputSection &osec, StringRef tok) {
|
||||
} else if (tok == "SORT") {
|
||||
readSort();
|
||||
} else if (tok == "INCLUDE") {
|
||||
readInclude();
|
||||
readInclude([&] {
|
||||
readStmts([&](StringRef t) { readOutputSectionStmt(osec, t); });
|
||||
});
|
||||
} else if (tok == "(" || tok == ")") {
|
||||
setError("expected filename pattern");
|
||||
} else if (peek() == "(") {
|
||||
@@ -1856,7 +1873,7 @@ void ScriptParser::readMemory() {
|
||||
|
||||
void ScriptParser::readMemoryStmt(StringRef tok) {
|
||||
if (tok == "INCLUDE") {
|
||||
readInclude();
|
||||
readInclude([&] { readStmts([&](StringRef t) { readMemoryStmt(t); }); });
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
18
lld/test/ELF/linkerscript/include-mid-construct.s
Normal file
18
lld/test/ELF/linkerscript/include-mid-construct.s
Normal file
@@ -0,0 +1,18 @@
|
||||
# REQUIRES: x86
|
||||
# RUN: rm -rf %t && split-file %s %t && cd %t
|
||||
# RUN: llvm-mc -filetype=obj -triple=x86_64 a.s -o a.o
|
||||
|
||||
## A stray ';' in the parent after INCLUDE cannot complete the inner assignment.
|
||||
# RUN: not ld.lld a.o -T top.lds 2>&1 | FileCheck %s --check-prefix=TOP
|
||||
# TOP: error: inc-top.lds:1: unexpected EOF
|
||||
|
||||
#--- top.lds
|
||||
INCLUDE "inc-top.lds";
|
||||
|
||||
#--- inc-top.lds
|
||||
foo = 1
|
||||
|
||||
#--- a.s
|
||||
.globl _start
|
||||
_start:
|
||||
ret
|
||||
@@ -17,6 +17,17 @@
|
||||
# EMPTY: LOAD {{.*}} 0x0000000000001000 0x0000000000001000 {{.*}} R E
|
||||
# EMPTY-NEXT: LOAD {{.*}} 0x0000000000002000 0x0000000000002000 {{.*}} RW
|
||||
|
||||
## A region declaration truncated mid-expression cannot be completed by the
|
||||
## parent MEMORY { ... }.
|
||||
# RUN: cp trunc.lds inc.lds
|
||||
# RUN: not ld.lld -T a.lds a.o 2>&1 | FileCheck %s --check-prefix=TRUNC
|
||||
# TRUNC: error: inc.lds:1: unexpected EOF
|
||||
|
||||
## A stray '}' in the include cannot close the parent MEMORY { ... }.
|
||||
# RUN: cp brace.lds inc.lds
|
||||
# RUN: not ld.lld -T a.lds a.o 2>&1 | FileCheck %s --check-prefix=BRACE
|
||||
# BRACE: error: inc.lds:1: unexpected EOF
|
||||
|
||||
#--- a.s
|
||||
.section .text,"ax"
|
||||
.global _start
|
||||
@@ -54,3 +65,9 @@ SECTIONS {
|
||||
}
|
||||
|
||||
#--- inc-empty.lds
|
||||
|
||||
#--- trunc.lds
|
||||
RAM3 : ORIGIN = 0x4000, LENGTH
|
||||
|
||||
#--- brace.lds
|
||||
}
|
||||
|
||||
@@ -16,6 +16,12 @@
|
||||
# RUN: llvm-objdump --section-headers a.out | FileCheck %s --check-prefix=CHECK2
|
||||
# CHECK2: .data 00000010 0000000000002000 DATA
|
||||
|
||||
## A BYTE() with an unclosed paren in the include cannot be completed by the
|
||||
## parent output-section body.
|
||||
# RUN: cp trunc.lds inc.lds
|
||||
# RUN: not ld.lld -T a.lds a.o 2>&1 | FileCheck %s --check-prefix=TRUNC
|
||||
# TRUNC: error: inc.lds:1: unexpected EOF
|
||||
|
||||
#--- a.s
|
||||
.section .text,"ax"
|
||||
.global _start
|
||||
@@ -42,3 +48,6 @@ SECTIONS {
|
||||
|
||||
#--- full.lds
|
||||
QUAD(0)
|
||||
|
||||
#--- trunc.lds
|
||||
BYTE(42
|
||||
|
||||
@@ -19,6 +19,17 @@
|
||||
# CHECK2-NEXT: .data2 00000008 0000000000002008 DATA
|
||||
# CHECK2-NEXT: .data3 00000008 0000000000002010 DATA
|
||||
|
||||
## An unclosed output section in the include cannot be closed by the outer
|
||||
## SECTIONS { ... } '}'.
|
||||
# RUN: cp trunc.lds inc.lds
|
||||
# RUN: not ld.lld -T a.lds a.o 2>&1 | FileCheck %s --check-prefix=TRUNC
|
||||
# TRUNC: error: inc.lds:1: unexpected EOF
|
||||
|
||||
## A stray '}' in the include cannot close the parent SECTIONS { ... }.
|
||||
# RUN: cp brace.lds inc.lds
|
||||
# RUN: not ld.lld -T a.lds a.o 2>&1 | FileCheck %s --check-prefix=BRACE
|
||||
# BRACE: error: inc.lds:1: unexpected EOF
|
||||
|
||||
#--- a.s
|
||||
.global _start
|
||||
_start: nop
|
||||
@@ -43,3 +54,9 @@ SECTIONS {
|
||||
|
||||
#--- full.lds
|
||||
.data2 : { QUAD(0) } > RAM
|
||||
|
||||
#--- trunc.lds
|
||||
.text : { *(.text*)
|
||||
|
||||
#--- brace.lds
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user