The patch reapply https://github.com/llvm/llvm-project/pull/173130. This patch implement the following papers: [P1857R3 Modules Dependency Discovery](https://wg21.link/p1857r3). [P3034R1 Module Declarations Shouldn’t be Macros](https://wg21.link/P3034R1). [CWG2947](https://cplusplus.github.io/CWG/issues/2947.html). At the start of phase 4 an import or module token is treated as starting a directive and are converted to their respective keywords iff: - After skipping horizontal whitespace are - at the start of a logical line, or - preceded by an export at the start of the logical line. - Are followed by an identifier pp token (before macro expansion), or - <, ", or : (but not ::) pp tokens for import, or - ; for module Otherwise the token is treated as an identifier. Additionally: - The entire import or module directive (including the closing ;) must be on a single logical line and for module must not come from an #include. - The expansion of macros must not result in an import or module directive introducer that was not there prior to macro expansion. - A module directive may only appear as the first preprocessing tokens in a file (excluding the global module fragment.) - Preprocessor conditionals shall not span a module declaration. After this patch, we handle C++ module-import and module-declaration as a real pp-directive in preprocessor. Additionally, we refactor module name lexing, remove the complex state machine and read full module name during module/import directive handling. Possibly we can introduce a tok::annot_module_name token in the future, avoid duplicatly parsing module name in both preprocessor and parser, but it's makes error recovery much diffcult(eg. import a; import b; in same line). This patch also introduce 2 new keyword `__preprocessed_module` and `__preprocessed_import`. These 2 keyword was generated during `-E` mode. This is useful to avoid confusion with `module` and `import` keyword in preprocessed output: ```cpp export module m; struct import {}; #define EMPTY EMPTY import foo; ``` Fixes https://github.com/llvm/llvm-project/issues/54047 The previous patch has an use-after-free issue in Lexer::LexTokenInternal function. Since C++20, the `export`, `import` and `module` identifiers may be an introducer of a C++ module declaration/importing directive, and the directive will handled in `LexIdentifierContinue`. Unfortunately, the EOF may be encountered in `LexIdentifierContinue` and `CurLexer` might be destructed in `HandleEndOfFile`, If the code after `LexIdentifierContinue` try to access `LangOps` or other class members in this Lexer, it will hit undefined behavior. This patch also fix the use-after-free issue in Lexer by introduce a mechanism to delay the destruction of `CurLexer` in `Preprocessor` class. --------- Signed-off-by: yronglin <yronglin777@gmail.com>
89 lines
2.2 KiB
C++
89 lines
2.2 KiB
C++
//===--- TokenKinds.cpp - Token Kinds Support -----------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file implements the TokenKind enum and support functions.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "clang/Basic/TokenKinds.h"
|
|
#include "llvm/Support/ErrorHandling.h"
|
|
using namespace clang;
|
|
|
|
static const char * const TokNames[] = {
|
|
#define TOK(X) #X,
|
|
#define KEYWORD(X,Y) #X,
|
|
#include "clang/Basic/TokenKinds.def"
|
|
nullptr
|
|
};
|
|
|
|
const char *tok::getTokenName(TokenKind Kind) {
|
|
if (Kind < tok::NUM_TOKENS)
|
|
return TokNames[Kind];
|
|
llvm_unreachable("unknown TokenKind");
|
|
return nullptr;
|
|
}
|
|
|
|
const char *tok::getPunctuatorSpelling(TokenKind Kind) {
|
|
switch (Kind) {
|
|
#define PUNCTUATOR(X,Y) case X: return Y;
|
|
#include "clang/Basic/TokenKinds.def"
|
|
default: break;
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
const char *tok::getKeywordSpelling(TokenKind Kind) {
|
|
switch (Kind) {
|
|
#define KEYWORD(X,Y) case kw_ ## X: return #X;
|
|
#include "clang/Basic/TokenKinds.def"
|
|
default: break;
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
const char *tok::getObjCKeywordSpelling(ObjCKeywordKind Kind) {
|
|
switch (Kind) {
|
|
#define OBJC_AT_KEYWORD(X) \
|
|
case objc_##X: \
|
|
return "@" #X;
|
|
#include "clang/Basic/TokenKinds.def"
|
|
default:
|
|
break;
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
const char *tok::getPPKeywordSpelling(tok::PPKeywordKind Kind) {
|
|
switch (Kind) {
|
|
#define PPKEYWORD(x) case tok::pp_##x: return #x;
|
|
#include "clang/Basic/TokenKinds.def"
|
|
default: break;
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
bool tok::isAnnotation(TokenKind Kind) {
|
|
switch (Kind) {
|
|
#define ANNOTATION(X) case annot_ ## X: return true;
|
|
#include "clang/Basic/TokenKinds.def"
|
|
default:
|
|
break;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool tok::isPragmaAnnotation(TokenKind Kind) {
|
|
switch (Kind) {
|
|
#define PRAGMA_ANNOTATION(X) case annot_ ## X: return true;
|
|
#include "clang/Basic/TokenKinds.def"
|
|
default:
|
|
break;
|
|
}
|
|
return false;
|
|
}
|