Files
llvm-project/lld/ELF/ScriptLexer.h
Fangrui Song 2855525c4a [ELF] Handle INCLUDE like a call stack (#193427)
The lexer maintains a stack of buffers, which allows a construct
started in an INCLUDE'd file to be closed by the parent. This produces
spurious acceptance of malformed scripts (e.g. a bare assignment with
no trailing `;` in the include, terminated by the parent's `;` after
`INCLUDE`) and undefined-behavior span computations in
`readAssignment`'s `commandString` (issue #190376).

Force each INCLUDE to fully parse its own content, similar to a call
stack frame. `ScriptLexer::lex` no longer auto-pops on EOF; the
`buffers` member is gone. `readInclude` takes a `function_ref<void()>`
callback, and the four call sites (top-level, SECTIONS, output
section, MEMORY) pass a context-appropriate parser.

With this, each buffer contains complete parser structures by
construction, so the `[oldS, curTok)` pointer range in
`readAssignment` no longer needs a guard.
2026-04-22 19:59:00 -07:00

91 lines
2.3 KiB
C++

//===- ScriptLexer.h --------------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLD_ELF_SCRIPT_LEXER_H
#define LLD_ELF_SCRIPT_LEXER_H
#include "lld/Common/LLVM.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/MemoryBufferRef.h"
namespace lld::elf {
struct Ctx;
class ScriptLexer {
protected:
struct Buffer {
// The remaining content to parse and the filename.
StringRef s, filename;
const char *begin = nullptr;
size_t lineNumber = 1;
// True if the script is opened as an absolute path under the --sysroot
// directory.
bool isUnderSysroot = false;
Buffer() = default;
Buffer(Ctx &ctx, MemoryBufferRef mb);
};
Ctx &ctx;
// The currently lexed buffer. INCLUDE runs a nested parse on a new `Buffer`,
// similar to a call stack frame.
Buffer curBuf;
// Used to detect INCLUDE() cycles.
llvm::DenseSet<StringRef> activeFilenames;
enum class State {
Script,
Expr,
// Used by version node and dynamic list parsing.
VersionNode,
};
struct Token {
StringRef str;
explicit operator bool() const { return !str.empty(); }
operator StringRef() const { return str; }
};
// The token before the last next().
StringRef prevTok;
// Rules for what is a token are different when we are in an expression.
// curTok holds the cached return value of peek() and is invalid when the
// expression state changes.
StringRef curTok;
size_t prevTokLine = 1;
// The lex state when curTok is cached.
State curTokState = State::Script;
State lexState = State::Script;
bool eof = false;
public:
explicit ScriptLexer(Ctx &ctx, MemoryBufferRef mb);
void setError(const Twine &msg);
void lex();
StringRef skipSpace(StringRef s);
bool atEOF();
StringRef next();
StringRef peek();
void skip();
bool consume(StringRef tok);
void expect(StringRef expect);
Token till(StringRef tok);
std::string getCurrentLocation();
private:
StringRef getLine();
size_t getColumnNumber();
};
} // namespace lld::elf
#endif