The lexer maintains a stack of buffers, which allows a construct started in an INCLUDE'd file to be closed by the parent. This produces spurious acceptance of malformed scripts (e.g. a bare assignment with no trailing `;` in the include, terminated by the parent's `;` after `INCLUDE`) and undefined-behavior span computations in `readAssignment`'s `commandString` (issue #190376). Force each INCLUDE to fully parse its own content, similar to a call stack frame. `ScriptLexer::lex` no longer auto-pops on EOF; the `buffers` member is gone. `readInclude` takes a `function_ref<void()>` callback, and the four call sites (top-level, SECTIONS, output section, MEMORY) pass a context-appropriate parser. With this, each buffer contains complete parser structures by construction, so the `[oldS, curTok)` pointer range in `readAssignment` no longer needs a guard.
91 lines
2.3 KiB
C++
91 lines
2.3 KiB
C++
//===- ScriptLexer.h --------------------------------------------*- C++ -*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#ifndef LLD_ELF_SCRIPT_LEXER_H
|
|
#define LLD_ELF_SCRIPT_LEXER_H
|
|
|
|
#include "lld/Common/LLVM.h"
|
|
#include "llvm/ADT/DenseSet.h"
|
|
#include "llvm/ADT/SmallVector.h"
|
|
#include "llvm/ADT/StringRef.h"
|
|
#include "llvm/Support/MemoryBufferRef.h"
|
|
|
|
namespace lld::elf {
|
|
struct Ctx;
|
|
|
|
class ScriptLexer {
|
|
protected:
|
|
struct Buffer {
|
|
// The remaining content to parse and the filename.
|
|
StringRef s, filename;
|
|
const char *begin = nullptr;
|
|
size_t lineNumber = 1;
|
|
// True if the script is opened as an absolute path under the --sysroot
|
|
// directory.
|
|
bool isUnderSysroot = false;
|
|
|
|
Buffer() = default;
|
|
Buffer(Ctx &ctx, MemoryBufferRef mb);
|
|
};
|
|
Ctx &ctx;
|
|
// The currently lexed buffer. INCLUDE runs a nested parse on a new `Buffer`,
|
|
// similar to a call stack frame.
|
|
Buffer curBuf;
|
|
|
|
// Used to detect INCLUDE() cycles.
|
|
llvm::DenseSet<StringRef> activeFilenames;
|
|
|
|
enum class State {
|
|
Script,
|
|
Expr,
|
|
// Used by version node and dynamic list parsing.
|
|
VersionNode,
|
|
};
|
|
|
|
struct Token {
|
|
StringRef str;
|
|
explicit operator bool() const { return !str.empty(); }
|
|
operator StringRef() const { return str; }
|
|
};
|
|
|
|
// The token before the last next().
|
|
StringRef prevTok;
|
|
// Rules for what is a token are different when we are in an expression.
|
|
// curTok holds the cached return value of peek() and is invalid when the
|
|
// expression state changes.
|
|
StringRef curTok;
|
|
size_t prevTokLine = 1;
|
|
// The lex state when curTok is cached.
|
|
State curTokState = State::Script;
|
|
State lexState = State::Script;
|
|
bool eof = false;
|
|
|
|
public:
|
|
explicit ScriptLexer(Ctx &ctx, MemoryBufferRef mb);
|
|
|
|
void setError(const Twine &msg);
|
|
void lex();
|
|
StringRef skipSpace(StringRef s);
|
|
bool atEOF();
|
|
StringRef next();
|
|
StringRef peek();
|
|
void skip();
|
|
bool consume(StringRef tok);
|
|
void expect(StringRef expect);
|
|
Token till(StringRef tok);
|
|
std::string getCurrentLocation();
|
|
|
|
private:
|
|
StringRef getLine();
|
|
size_t getColumnNumber();
|
|
};
|
|
|
|
} // namespace lld::elf
|
|
|
|
#endif
|