This adds support for the following kinds of formats: * Hexadecimal literals like 0x1.fp13 * Special values +inf/-inf, +qnan/-qnan * NaN values with payloads like +nan(0x1) Additionally, the floating-point hexadecimal format that records the bitpattern exactly no longer requires the 0xL or 0xK or similar code for the floating-point type. The current hexadecimal syntax is retained for the moment, but it is expected to be ripped out after the next release of LLVM. These changes were discussed in an RFC at https://discourse.llvm.org/t/rfc-floating-point-literals-in-llvm-ir/82974.
1416 lines
41 KiB
C++
1416 lines
41 KiB
C++
//===- LLLexer.cpp - Lexer for .ll Files ----------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// Implement the Lexer for .ll files.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "llvm/AsmParser/LLLexer.h"
|
|
#include "llvm/ADT/APInt.h"
|
|
#include "llvm/ADT/STLExtras.h"
|
|
#include "llvm/ADT/StringExtras.h"
|
|
#include "llvm/ADT/Twine.h"
|
|
#include "llvm/IR/DerivedTypes.h"
|
|
#include "llvm/IR/Instruction.h"
|
|
#include "llvm/Support/ErrorHandling.h"
|
|
#include "llvm/Support/SourceMgr.h"
|
|
#include <cassert>
|
|
#include <cctype>
|
|
#include <cstdio>
|
|
|
|
using namespace llvm;
|
|
|
|
// Both the lexer and parser can issue error messages. If the lexer issues a
|
|
// lexer error, since we do not terminate execution immediately, usually that
|
|
// is followed by the parser issuing a parser error. However, the error issued
|
|
// by the lexer is more relevant in that case as opposed to potentially more
|
|
// generic parser error. So instead of always recording the last error message
|
|
// use the `Priority` to establish a priority, with Lexer > Parser > None. We
|
|
// record the issued message only if the message has same or higher priority
|
|
// than the existing one. This prevents lexer errors from being overwritten by
|
|
// parser errors.
|
|
void LLLexer::Error(LocTy ErrorLoc, const Twine &Msg,
|
|
LLLexer::ErrorPriority Priority) {
|
|
if (Priority < ErrorInfo.Priority)
|
|
return;
|
|
ErrorInfo.Error = SM.GetMessage(ErrorLoc, SourceMgr::DK_Error, Msg);
|
|
ErrorInfo.Priority = Priority;
|
|
}
|
|
|
|
void LLLexer::Warning(LocTy WarningLoc, const Twine &Msg) const {
|
|
SM.PrintMessage(WarningLoc, SourceMgr::DK_Warning, Msg);
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Helper functions.
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// atoull - Convert an ascii string of decimal digits into the unsigned long
|
|
// long representation... this does not have to do input error checking,
|
|
// because we know that the input will be matched by a suitable regex...
|
|
//
|
|
uint64_t LLLexer::atoull(const char *Buffer, const char *End) {
|
|
uint64_t Result = 0;
|
|
for (; Buffer != End; Buffer++) {
|
|
uint64_t OldRes = Result;
|
|
Result *= 10;
|
|
Result += *Buffer-'0';
|
|
if (Result < OldRes) { // overflow detected.
|
|
LexError("constant bigger than 64 bits detected");
|
|
return 0;
|
|
}
|
|
}
|
|
return Result;
|
|
}
|
|
|
|
uint64_t LLLexer::HexIntToVal(const char *Buffer, const char *End) {
|
|
uint64_t Result = 0;
|
|
for (; Buffer != End; ++Buffer) {
|
|
uint64_t OldRes = Result;
|
|
Result *= 16;
|
|
Result += hexDigitValue(*Buffer);
|
|
|
|
if (Result < OldRes) { // overflow detected.
|
|
LexError("constant bigger than 64 bits detected");
|
|
return 0;
|
|
}
|
|
}
|
|
return Result;
|
|
}
|
|
|
|
void LLLexer::HexToIntPair(const char *Buffer, const char *End,
|
|
uint64_t Pair[2]) {
|
|
Pair[0] = 0;
|
|
if (End - Buffer >= 16) {
|
|
for (int i = 0; i < 16; i++, Buffer++) {
|
|
assert(Buffer != End);
|
|
Pair[0] *= 16;
|
|
Pair[0] += hexDigitValue(*Buffer);
|
|
}
|
|
}
|
|
Pair[1] = 0;
|
|
for (int i = 0; i < 16 && Buffer != End; i++, Buffer++) {
|
|
Pair[1] *= 16;
|
|
Pair[1] += hexDigitValue(*Buffer);
|
|
}
|
|
if (Buffer != End)
|
|
LexError("constant bigger than 128 bits detected");
|
|
}
|
|
|
|
/// FP80HexToIntPair - translate an 80 bit FP80 number (20 hexits) into
|
|
/// { low64, high16 } as usual for an APInt.
|
|
void LLLexer::FP80HexToIntPair(const char *Buffer, const char *End,
|
|
uint64_t Pair[2]) {
|
|
Pair[1] = 0;
|
|
for (int i=0; i<4 && Buffer != End; i++, Buffer++) {
|
|
assert(Buffer != End);
|
|
Pair[1] *= 16;
|
|
Pair[1] += hexDigitValue(*Buffer);
|
|
}
|
|
Pair[0] = 0;
|
|
for (int i = 0; i < 16 && Buffer != End; i++, Buffer++) {
|
|
Pair[0] *= 16;
|
|
Pair[0] += hexDigitValue(*Buffer);
|
|
}
|
|
if (Buffer != End)
|
|
LexError("constant bigger than 128 bits detected");
|
|
}
|
|
|
|
// UnEscapeLexed - Run through the specified buffer and change \xx codes to the
|
|
// appropriate character.
|
|
static void UnEscapeLexed(std::string &Str) {
|
|
if (Str.empty()) return;
|
|
|
|
char *Buffer = &Str[0], *EndBuffer = Buffer+Str.size();
|
|
char *BOut = Buffer;
|
|
for (char *BIn = Buffer; BIn != EndBuffer; ) {
|
|
if (BIn[0] == '\\') {
|
|
if (BIn < EndBuffer-1 && BIn[1] == '\\') {
|
|
*BOut++ = '\\'; // Two \ becomes one
|
|
BIn += 2;
|
|
} else if (BIn < EndBuffer-2 &&
|
|
isxdigit(static_cast<unsigned char>(BIn[1])) &&
|
|
isxdigit(static_cast<unsigned char>(BIn[2]))) {
|
|
*BOut = hexDigitValue(BIn[1]) * 16 + hexDigitValue(BIn[2]);
|
|
BIn += 3; // Skip over handled chars
|
|
++BOut;
|
|
} else {
|
|
*BOut++ = *BIn++;
|
|
}
|
|
} else {
|
|
*BOut++ = *BIn++;
|
|
}
|
|
}
|
|
Str.resize(BOut-Buffer);
|
|
}
|
|
|
|
/// isLabelChar - Return true for [-a-zA-Z$._0-9].
|
|
static bool isLabelChar(char C) {
|
|
return isalnum(static_cast<unsigned char>(C)) || C == '-' || C == '$' ||
|
|
C == '.' || C == '_';
|
|
}
|
|
|
|
/// isLabelTail - Return true if this pointer points to a valid end of a label.
|
|
static const char *isLabelTail(const char *CurPtr) {
|
|
while (true) {
|
|
if (CurPtr[0] == ':') return CurPtr+1;
|
|
if (!isLabelChar(CurPtr[0])) return nullptr;
|
|
++CurPtr;
|
|
}
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Lexer definition.
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
LLLexer::LLLexer(StringRef StartBuf, SourceMgr &SM, SMDiagnostic &Err,
|
|
LLVMContext &C)
|
|
: CurBuf(StartBuf), ErrorInfo(Err), SM(SM), Context(C) {
|
|
CurPtr = CurBuf.begin();
|
|
}
|
|
|
|
int LLLexer::getNextChar() {
|
|
char CurChar = *CurPtr++;
|
|
switch (CurChar) {
|
|
default: return (unsigned char)CurChar;
|
|
case 0:
|
|
// A nul character in the stream is either the end of the current buffer or
|
|
// a random nul in the file. Disambiguate that here.
|
|
if (CurPtr-1 != CurBuf.end())
|
|
return 0; // Just whitespace.
|
|
|
|
// Otherwise, return end of file.
|
|
--CurPtr; // Another call to lex will return EOF again.
|
|
return EOF;
|
|
}
|
|
}
|
|
|
|
lltok::Kind LLLexer::LexToken() {
|
|
// Set token end to next location, since the end is exclusive.
|
|
PrevTokEnd = CurPtr;
|
|
while (true) {
|
|
TokStart = CurPtr;
|
|
|
|
int CurChar = getNextChar();
|
|
switch (CurChar) {
|
|
default:
|
|
// Handle letters: [a-zA-Z_]
|
|
if (isalpha(static_cast<unsigned char>(CurChar)) || CurChar == '_')
|
|
return LexIdentifier();
|
|
return lltok::Error;
|
|
case EOF: return lltok::Eof;
|
|
case 0:
|
|
case ' ':
|
|
case '\t':
|
|
case '\n':
|
|
case '\r':
|
|
// Ignore whitespace.
|
|
continue;
|
|
case '+': return LexPositive();
|
|
case '@': return LexAt();
|
|
case '$': return LexDollar();
|
|
case '%': return LexPercent();
|
|
case '"': return LexQuote();
|
|
case '.':
|
|
if (const char *Ptr = isLabelTail(CurPtr)) {
|
|
CurPtr = Ptr;
|
|
StrVal.assign(TokStart, CurPtr-1);
|
|
return lltok::LabelStr;
|
|
}
|
|
if (CurPtr[0] == '.' && CurPtr[1] == '.') {
|
|
CurPtr += 2;
|
|
return lltok::dotdotdot;
|
|
}
|
|
return lltok::Error;
|
|
case ';':
|
|
SkipLineComment();
|
|
continue;
|
|
case '!': return LexExclaim();
|
|
case '^':
|
|
return LexCaret();
|
|
case ':':
|
|
return lltok::colon;
|
|
case '#': return LexHash();
|
|
case '0': case '1': case '2': case '3': case '4':
|
|
case '5': case '6': case '7': case '8': case '9':
|
|
case '-':
|
|
return LexDigitOrNegative();
|
|
case '=': return lltok::equal;
|
|
case '[': return lltok::lsquare;
|
|
case ']': return lltok::rsquare;
|
|
case '{': return lltok::lbrace;
|
|
case '}': return lltok::rbrace;
|
|
case '<': return lltok::less;
|
|
case '>': return lltok::greater;
|
|
case '(': return lltok::lparen;
|
|
case ')': return lltok::rparen;
|
|
case ',': return lltok::comma;
|
|
case '*': return lltok::star;
|
|
case '|': return lltok::bar;
|
|
case '/':
|
|
if (getNextChar() != '*')
|
|
return lltok::Error;
|
|
if (SkipCComment())
|
|
return lltok::Error;
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
|
|
void LLLexer::SkipLineComment() {
|
|
while (true) {
|
|
if (CurPtr[0] == '\n' || CurPtr[0] == '\r' || getNextChar() == EOF)
|
|
return;
|
|
}
|
|
}
|
|
|
|
/// This skips C-style /**/ comments. Returns true if there
|
|
/// was an error.
|
|
bool LLLexer::SkipCComment() {
|
|
while (true) {
|
|
int CurChar = getNextChar();
|
|
switch (CurChar) {
|
|
case EOF:
|
|
LexError("unterminated comment");
|
|
return true;
|
|
case '*':
|
|
// End of the comment?
|
|
CurChar = getNextChar();
|
|
if (CurChar == '/')
|
|
return false;
|
|
if (CurChar == EOF) {
|
|
LexError("unterminated comment");
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Lex all tokens that start with an @ character.
|
|
/// GlobalVar @\"[^\"]*\"
|
|
/// GlobalVar @[-a-zA-Z$._][-a-zA-Z$._0-9]*
|
|
/// GlobalVarID @[0-9]+
|
|
lltok::Kind LLLexer::LexAt() {
|
|
return LexVar(lltok::GlobalVar, lltok::GlobalID);
|
|
}
|
|
|
|
lltok::Kind LLLexer::LexDollar() {
|
|
if (const char *Ptr = isLabelTail(TokStart)) {
|
|
CurPtr = Ptr;
|
|
StrVal.assign(TokStart, CurPtr - 1);
|
|
return lltok::LabelStr;
|
|
}
|
|
|
|
// Handle DollarStringConstant: $\"[^\"]*\"
|
|
if (CurPtr[0] == '"') {
|
|
++CurPtr;
|
|
|
|
while (true) {
|
|
int CurChar = getNextChar();
|
|
|
|
if (CurChar == EOF) {
|
|
LexError("end of file in COMDAT variable name");
|
|
return lltok::Error;
|
|
}
|
|
if (CurChar == '"') {
|
|
StrVal.assign(TokStart + 2, CurPtr - 1);
|
|
UnEscapeLexed(StrVal);
|
|
if (StringRef(StrVal).contains(0)) {
|
|
LexError("NUL character is not allowed in names");
|
|
return lltok::Error;
|
|
}
|
|
return lltok::ComdatVar;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Handle ComdatVarName: $[-a-zA-Z$._][-a-zA-Z$._0-9]*
|
|
if (ReadVarName())
|
|
return lltok::ComdatVar;
|
|
|
|
return lltok::Error;
|
|
}
|
|
|
|
/// ReadString - Read a string until the closing quote.
|
|
lltok::Kind LLLexer::ReadString(lltok::Kind kind) {
|
|
const char *Start = CurPtr;
|
|
while (true) {
|
|
int CurChar = getNextChar();
|
|
|
|
if (CurChar == EOF) {
|
|
LexError("end of file in string constant");
|
|
return lltok::Error;
|
|
}
|
|
if (CurChar == '"') {
|
|
StrVal.assign(Start, CurPtr-1);
|
|
UnEscapeLexed(StrVal);
|
|
return kind;
|
|
}
|
|
}
|
|
}
|
|
|
|
/// ReadVarName - Read the rest of a token containing a variable name.
|
|
bool LLLexer::ReadVarName() {
|
|
const char *NameStart = CurPtr;
|
|
if (isalpha(static_cast<unsigned char>(CurPtr[0])) ||
|
|
CurPtr[0] == '-' || CurPtr[0] == '$' ||
|
|
CurPtr[0] == '.' || CurPtr[0] == '_') {
|
|
++CurPtr;
|
|
while (isalnum(static_cast<unsigned char>(CurPtr[0])) ||
|
|
CurPtr[0] == '-' || CurPtr[0] == '$' ||
|
|
CurPtr[0] == '.' || CurPtr[0] == '_')
|
|
++CurPtr;
|
|
|
|
StrVal.assign(NameStart, CurPtr);
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// Lex an ID: [0-9]+. On success, the ID is stored in UIntVal and Token is
|
|
// returned, otherwise the Error token is returned.
|
|
lltok::Kind LLLexer::LexUIntID(lltok::Kind Token) {
|
|
if (!isdigit(static_cast<unsigned char>(CurPtr[0])))
|
|
return lltok::Error;
|
|
|
|
for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
|
|
/*empty*/;
|
|
|
|
uint64_t Val = atoull(TokStart + 1, CurPtr);
|
|
if ((unsigned)Val != Val)
|
|
LexError("invalid value number (too large)");
|
|
UIntVal = unsigned(Val);
|
|
return Token;
|
|
}
|
|
|
|
lltok::Kind LLLexer::LexVar(lltok::Kind Var, lltok::Kind VarID) {
|
|
// Handle StringConstant: \"[^\"]*\"
|
|
if (CurPtr[0] == '"') {
|
|
++CurPtr;
|
|
|
|
while (true) {
|
|
int CurChar = getNextChar();
|
|
|
|
if (CurChar == EOF) {
|
|
LexError("end of file in global variable name");
|
|
return lltok::Error;
|
|
}
|
|
if (CurChar == '"') {
|
|
StrVal.assign(TokStart+2, CurPtr-1);
|
|
UnEscapeLexed(StrVal);
|
|
if (StringRef(StrVal).contains(0)) {
|
|
LexError("NUL character is not allowed in names");
|
|
return lltok::Error;
|
|
}
|
|
return Var;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Handle VarName: [-a-zA-Z$._][-a-zA-Z$._0-9]*
|
|
if (ReadVarName())
|
|
return Var;
|
|
|
|
// Handle VarID: [0-9]+
|
|
return LexUIntID(VarID);
|
|
}
|
|
|
|
/// Lex all tokens that start with a % character.
|
|
/// LocalVar ::= %\"[^\"]*\"
|
|
/// LocalVar ::= %[-a-zA-Z$._][-a-zA-Z$._0-9]*
|
|
/// LocalVarID ::= %[0-9]+
|
|
lltok::Kind LLLexer::LexPercent() {
|
|
return LexVar(lltok::LocalVar, lltok::LocalVarID);
|
|
}
|
|
|
|
/// Lex all tokens that start with a " character.
|
|
/// QuoteLabel "[^"]+":
|
|
/// StringConstant "[^"]*"
|
|
lltok::Kind LLLexer::LexQuote() {
|
|
lltok::Kind kind = ReadString(lltok::StringConstant);
|
|
if (kind == lltok::Error || kind == lltok::Eof)
|
|
return kind;
|
|
|
|
if (CurPtr[0] == ':') {
|
|
++CurPtr;
|
|
if (StringRef(StrVal).contains(0)) {
|
|
LexError("NUL character is not allowed in names");
|
|
kind = lltok::Error;
|
|
} else {
|
|
kind = lltok::LabelStr;
|
|
}
|
|
}
|
|
|
|
return kind;
|
|
}
|
|
|
|
/// Lex all tokens that start with a ! character.
|
|
/// !foo
|
|
/// !
|
|
lltok::Kind LLLexer::LexExclaim() {
|
|
// Lex a metadata name as a MetadataVar.
|
|
if (isalpha(static_cast<unsigned char>(CurPtr[0])) ||
|
|
CurPtr[0] == '-' || CurPtr[0] == '$' ||
|
|
CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\') {
|
|
++CurPtr;
|
|
while (isalnum(static_cast<unsigned char>(CurPtr[0])) ||
|
|
CurPtr[0] == '-' || CurPtr[0] == '$' ||
|
|
CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\')
|
|
++CurPtr;
|
|
|
|
StrVal.assign(TokStart+1, CurPtr); // Skip !
|
|
UnEscapeLexed(StrVal);
|
|
return lltok::MetadataVar;
|
|
}
|
|
return lltok::exclaim;
|
|
}
|
|
|
|
/// Lex all tokens that start with a ^ character.
|
|
/// SummaryID ::= ^[0-9]+
|
|
lltok::Kind LLLexer::LexCaret() {
|
|
// Handle SummaryID: ^[0-9]+
|
|
return LexUIntID(lltok::SummaryID);
|
|
}
|
|
|
|
/// Lex all tokens that start with a # character.
|
|
/// AttrGrpID ::= #[0-9]+
|
|
/// Hash ::= #
|
|
lltok::Kind LLLexer::LexHash() {
|
|
// Handle AttrGrpID: #[0-9]+
|
|
if (isdigit(static_cast<unsigned char>(CurPtr[0])))
|
|
return LexUIntID(lltok::AttrGrpID);
|
|
return lltok::hash;
|
|
}
|
|
|
|
/// Lex a label, integer or byte types, keyword, or hexadecimal integer
|
|
/// constant.
|
|
/// Label [-a-zA-Z$._0-9]+:
|
|
/// ByteType b[0-9]+
|
|
/// IntegerType i[0-9]+
|
|
/// Keyword sdiv, float, ...
|
|
/// HexIntConstant [us]0x[0-9A-Fa-f]+
|
|
/// HexFloatConstant f0x[0-9A-Fa-f]+
|
|
lltok::Kind LLLexer::LexIdentifier() {
|
|
const char *StartChar = CurPtr;
|
|
const char IntOrByteIdentifier = CurPtr[-1];
|
|
const char *IntOrByteEnd =
|
|
(IntOrByteIdentifier == 'i' || IntOrByteIdentifier == 'b') ? nullptr
|
|
: StartChar;
|
|
const char *KeywordEnd = nullptr;
|
|
|
|
for (; isLabelChar(*CurPtr); ++CurPtr) {
|
|
// If we decide this is a byte or an integer, remember the end of the
|
|
// sequence.
|
|
if (!IntOrByteEnd && !isdigit(static_cast<unsigned char>(*CurPtr)))
|
|
IntOrByteEnd = CurPtr;
|
|
if (!KeywordEnd && !isalnum(static_cast<unsigned char>(*CurPtr)) &&
|
|
*CurPtr != '_')
|
|
KeywordEnd = CurPtr;
|
|
}
|
|
|
|
// If we stopped due to a colon, unless we were directed to ignore it,
|
|
// this really is a label.
|
|
if (!IgnoreColonInIdentifiers && *CurPtr == ':') {
|
|
StrVal.assign(StartChar-1, CurPtr++);
|
|
return lltok::LabelStr;
|
|
}
|
|
|
|
// Otherwise, this wasn't a label. If this was valid as a byte or an integer
|
|
// type, return it.
|
|
if (!IntOrByteEnd)
|
|
IntOrByteEnd = CurPtr;
|
|
if (IntOrByteEnd != StartChar) {
|
|
CurPtr = IntOrByteEnd;
|
|
uint64_t NumBits = atoull(StartChar, CurPtr);
|
|
if (NumBits < IntegerType::MIN_INT_BITS ||
|
|
NumBits > IntegerType::MAX_INT_BITS) {
|
|
LexError("bitwidth for integer or byte type out of range");
|
|
return lltok::Error;
|
|
}
|
|
if (IntOrByteIdentifier == 'i')
|
|
TyVal = IntegerType::get(Context, NumBits);
|
|
else
|
|
TyVal = ByteType::get(Context, NumBits);
|
|
|
|
return lltok::Type;
|
|
}
|
|
|
|
// Otherwise, this was a letter sequence. See which keyword this is.
|
|
if (!KeywordEnd) KeywordEnd = CurPtr;
|
|
CurPtr = KeywordEnd;
|
|
--StartChar;
|
|
StringRef Keyword(StartChar, CurPtr - StartChar);
|
|
|
|
#define KEYWORD(STR) \
|
|
do { \
|
|
if (Keyword == #STR) \
|
|
return lltok::kw_##STR; \
|
|
} while (false)
|
|
|
|
KEYWORD(true); KEYWORD(false);
|
|
KEYWORD(declare); KEYWORD(define);
|
|
KEYWORD(global); KEYWORD(constant);
|
|
KEYWORD(br);
|
|
|
|
KEYWORD(dso_local);
|
|
KEYWORD(dso_preemptable);
|
|
|
|
KEYWORD(private);
|
|
KEYWORD(internal);
|
|
KEYWORD(available_externally);
|
|
KEYWORD(linkonce);
|
|
KEYWORD(linkonce_odr);
|
|
KEYWORD(weak); // Use as a linkage, and a modifier for "cmpxchg".
|
|
KEYWORD(weak_odr);
|
|
KEYWORD(appending);
|
|
KEYWORD(dllimport);
|
|
KEYWORD(dllexport);
|
|
KEYWORD(common);
|
|
KEYWORD(default);
|
|
KEYWORD(hidden);
|
|
KEYWORD(protected);
|
|
KEYWORD(unnamed_addr);
|
|
KEYWORD(local_unnamed_addr);
|
|
KEYWORD(externally_initialized);
|
|
KEYWORD(extern_weak);
|
|
KEYWORD(external);
|
|
KEYWORD(thread_local);
|
|
KEYWORD(localdynamic);
|
|
KEYWORD(initialexec);
|
|
KEYWORD(localexec);
|
|
KEYWORD(zeroinitializer);
|
|
KEYWORD(undef);
|
|
KEYWORD(null);
|
|
KEYWORD(none);
|
|
KEYWORD(poison);
|
|
KEYWORD(to);
|
|
KEYWORD(caller);
|
|
KEYWORD(within);
|
|
KEYWORD(from);
|
|
KEYWORD(tail);
|
|
KEYWORD(musttail);
|
|
KEYWORD(notail);
|
|
KEYWORD(target);
|
|
KEYWORD(triple);
|
|
KEYWORD(source_filename);
|
|
KEYWORD(unwind);
|
|
KEYWORD(datalayout);
|
|
KEYWORD(volatile);
|
|
KEYWORD(atomic);
|
|
KEYWORD(unordered);
|
|
KEYWORD(monotonic);
|
|
KEYWORD(acquire);
|
|
KEYWORD(release);
|
|
KEYWORD(acq_rel);
|
|
KEYWORD(seq_cst);
|
|
KEYWORD(syncscope);
|
|
|
|
KEYWORD(nnan);
|
|
KEYWORD(ninf);
|
|
KEYWORD(nsz);
|
|
KEYWORD(arcp);
|
|
KEYWORD(contract);
|
|
KEYWORD(reassoc);
|
|
KEYWORD(afn);
|
|
KEYWORD(fast);
|
|
KEYWORD(nuw);
|
|
KEYWORD(nsw);
|
|
KEYWORD(nusw);
|
|
KEYWORD(exact);
|
|
KEYWORD(disjoint);
|
|
KEYWORD(inbounds);
|
|
KEYWORD(nneg);
|
|
KEYWORD(samesign);
|
|
KEYWORD(inrange);
|
|
KEYWORD(addrspace);
|
|
KEYWORD(section);
|
|
KEYWORD(partition);
|
|
KEYWORD(code_model);
|
|
KEYWORD(alias);
|
|
KEYWORD(ifunc);
|
|
KEYWORD(module);
|
|
KEYWORD(asm);
|
|
KEYWORD(sideeffect);
|
|
KEYWORD(inteldialect);
|
|
KEYWORD(gc);
|
|
KEYWORD(prefix);
|
|
KEYWORD(prologue);
|
|
KEYWORD(prefalign);
|
|
|
|
KEYWORD(no_sanitize_address);
|
|
KEYWORD(no_sanitize_hwaddress);
|
|
KEYWORD(sanitize_address_dyninit);
|
|
|
|
KEYWORD(ccc);
|
|
KEYWORD(fastcc);
|
|
KEYWORD(coldcc);
|
|
KEYWORD(cfguard_checkcc);
|
|
KEYWORD(x86_stdcallcc);
|
|
KEYWORD(x86_fastcallcc);
|
|
KEYWORD(x86_thiscallcc);
|
|
KEYWORD(x86_vectorcallcc);
|
|
KEYWORD(arm_apcscc);
|
|
KEYWORD(arm_aapcscc);
|
|
KEYWORD(arm_aapcs_vfpcc);
|
|
KEYWORD(aarch64_vector_pcs);
|
|
KEYWORD(aarch64_sve_vector_pcs);
|
|
KEYWORD(aarch64_sme_preservemost_from_x0);
|
|
KEYWORD(aarch64_sme_preservemost_from_x1);
|
|
KEYWORD(aarch64_sme_preservemost_from_x2);
|
|
KEYWORD(msp430_intrcc);
|
|
KEYWORD(avr_intrcc);
|
|
KEYWORD(avr_signalcc);
|
|
KEYWORD(ptx_kernel);
|
|
KEYWORD(ptx_device);
|
|
KEYWORD(spir_kernel);
|
|
KEYWORD(spir_func);
|
|
KEYWORD(intel_ocl_bicc);
|
|
KEYWORD(x86_64_sysvcc);
|
|
KEYWORD(win64cc);
|
|
KEYWORD(x86_regcallcc);
|
|
KEYWORD(swiftcc);
|
|
KEYWORD(swifttailcc);
|
|
KEYWORD(anyregcc);
|
|
KEYWORD(preserve_mostcc);
|
|
KEYWORD(preserve_allcc);
|
|
KEYWORD(preserve_nonecc);
|
|
KEYWORD(ghccc);
|
|
KEYWORD(x86_intrcc);
|
|
KEYWORD(hhvmcc);
|
|
KEYWORD(hhvm_ccc);
|
|
KEYWORD(cxx_fast_tlscc);
|
|
KEYWORD(amdgpu_vs);
|
|
KEYWORD(amdgpu_ls);
|
|
KEYWORD(amdgpu_hs);
|
|
KEYWORD(amdgpu_es);
|
|
KEYWORD(amdgpu_gs);
|
|
KEYWORD(amdgpu_ps);
|
|
KEYWORD(amdgpu_cs);
|
|
KEYWORD(amdgpu_cs_chain);
|
|
KEYWORD(amdgpu_cs_chain_preserve);
|
|
KEYWORD(amdgpu_kernel);
|
|
KEYWORD(amdgpu_gfx);
|
|
KEYWORD(amdgpu_gfx_whole_wave);
|
|
KEYWORD(tailcc);
|
|
KEYWORD(m68k_rtdcc);
|
|
KEYWORD(graalcc);
|
|
KEYWORD(riscv_vector_cc);
|
|
KEYWORD(riscv_vls_cc);
|
|
KEYWORD(cheriot_compartmentcallcc);
|
|
KEYWORD(cheriot_compartmentcalleecc);
|
|
KEYWORD(cheriot_librarycallcc);
|
|
|
|
KEYWORD(cc);
|
|
KEYWORD(c);
|
|
|
|
KEYWORD(attributes);
|
|
KEYWORD(sync);
|
|
KEYWORD(async);
|
|
|
|
#define GET_ATTR_NAMES
|
|
#define ATTRIBUTE_ENUM(ENUM_NAME, DISPLAY_NAME) \
|
|
KEYWORD(DISPLAY_NAME);
|
|
#include "llvm/IR/Attributes.inc"
|
|
|
|
KEYWORD(read);
|
|
KEYWORD(write);
|
|
KEYWORD(readwrite);
|
|
KEYWORD(argmem);
|
|
KEYWORD(target_mem0);
|
|
KEYWORD(target_mem1);
|
|
KEYWORD(target_mem);
|
|
KEYWORD(inaccessiblemem);
|
|
KEYWORD(errnomem);
|
|
KEYWORD(argmemonly);
|
|
KEYWORD(inaccessiblememonly);
|
|
KEYWORD(inaccessiblemem_or_argmemonly);
|
|
KEYWORD(nocapture);
|
|
KEYWORD(address_is_null);
|
|
KEYWORD(address);
|
|
KEYWORD(provenance);
|
|
KEYWORD(read_provenance);
|
|
|
|
// denormal_fpenv attribute
|
|
KEYWORD(ieee);
|
|
KEYWORD(preservesign);
|
|
KEYWORD(positivezero);
|
|
KEYWORD(dynamic);
|
|
|
|
// nofpclass attribute
|
|
KEYWORD(all);
|
|
KEYWORD(nan);
|
|
KEYWORD(snan);
|
|
KEYWORD(qnan);
|
|
KEYWORD(inf);
|
|
// ninf already a keyword
|
|
KEYWORD(pinf);
|
|
KEYWORD(norm);
|
|
KEYWORD(nnorm);
|
|
KEYWORD(pnorm);
|
|
// sub already a keyword
|
|
KEYWORD(nsub);
|
|
KEYWORD(psub);
|
|
KEYWORD(zero);
|
|
KEYWORD(nzero);
|
|
KEYWORD(pzero);
|
|
|
|
KEYWORD(type);
|
|
KEYWORD(opaque);
|
|
|
|
KEYWORD(comdat);
|
|
|
|
// Comdat types
|
|
KEYWORD(any);
|
|
KEYWORD(exactmatch);
|
|
KEYWORD(largest);
|
|
KEYWORD(nodeduplicate);
|
|
KEYWORD(samesize);
|
|
|
|
KEYWORD(eq); KEYWORD(ne); KEYWORD(slt); KEYWORD(sgt); KEYWORD(sle);
|
|
KEYWORD(sge); KEYWORD(ult); KEYWORD(ugt); KEYWORD(ule); KEYWORD(uge);
|
|
KEYWORD(oeq); KEYWORD(one); KEYWORD(olt); KEYWORD(ogt); KEYWORD(ole);
|
|
KEYWORD(oge); KEYWORD(ord); KEYWORD(uno); KEYWORD(ueq); KEYWORD(une);
|
|
|
|
KEYWORD(xchg); KEYWORD(nand); KEYWORD(max); KEYWORD(min); KEYWORD(umax);
|
|
KEYWORD(umin); KEYWORD(fmax); KEYWORD(fmin);
|
|
KEYWORD(fmaximum);
|
|
KEYWORD(fminimum);
|
|
KEYWORD(fmaximumnum);
|
|
KEYWORD(fminimumnum);
|
|
KEYWORD(uinc_wrap);
|
|
KEYWORD(udec_wrap);
|
|
KEYWORD(usub_cond);
|
|
KEYWORD(usub_sat);
|
|
|
|
KEYWORD(splat);
|
|
KEYWORD(vscale);
|
|
KEYWORD(x);
|
|
KEYWORD(blockaddress);
|
|
KEYWORD(dso_local_equivalent);
|
|
KEYWORD(no_cfi);
|
|
KEYWORD(ptrauth);
|
|
|
|
// Metadata types.
|
|
KEYWORD(distinct);
|
|
|
|
// Use-list order directives.
|
|
KEYWORD(uselistorder);
|
|
KEYWORD(uselistorder_bb);
|
|
|
|
KEYWORD(personality);
|
|
KEYWORD(cleanup);
|
|
KEYWORD(catch);
|
|
KEYWORD(filter);
|
|
|
|
// Summary index keywords.
|
|
KEYWORD(path);
|
|
KEYWORD(hash);
|
|
KEYWORD(gv);
|
|
KEYWORD(guid);
|
|
KEYWORD(name);
|
|
KEYWORD(summaries);
|
|
KEYWORD(flags);
|
|
KEYWORD(blockcount);
|
|
KEYWORD(linkage);
|
|
KEYWORD(visibility);
|
|
KEYWORD(notEligibleToImport);
|
|
KEYWORD(live);
|
|
KEYWORD(dsoLocal);
|
|
KEYWORD(canAutoHide);
|
|
KEYWORD(importType);
|
|
KEYWORD(definition);
|
|
KEYWORD(declaration);
|
|
KEYWORD(noRenameOnPromotion);
|
|
KEYWORD(function);
|
|
KEYWORD(insts);
|
|
KEYWORD(funcFlags);
|
|
KEYWORD(readNone);
|
|
KEYWORD(readOnly);
|
|
KEYWORD(noRecurse);
|
|
KEYWORD(returnDoesNotAlias);
|
|
KEYWORD(noInline);
|
|
KEYWORD(alwaysInline);
|
|
KEYWORD(noUnwind);
|
|
KEYWORD(mayThrow);
|
|
KEYWORD(hasUnknownCall);
|
|
KEYWORD(mustBeUnreachable);
|
|
KEYWORD(calls);
|
|
KEYWORD(callee);
|
|
KEYWORD(params);
|
|
KEYWORD(param);
|
|
KEYWORD(hotness);
|
|
KEYWORD(unknown);
|
|
KEYWORD(critical);
|
|
// Deprecated, keep in order to support old files.
|
|
KEYWORD(relbf);
|
|
KEYWORD(variable);
|
|
KEYWORD(vTableFuncs);
|
|
KEYWORD(virtFunc);
|
|
KEYWORD(aliasee);
|
|
KEYWORD(refs);
|
|
KEYWORD(typeIdInfo);
|
|
KEYWORD(typeTests);
|
|
KEYWORD(typeTestAssumeVCalls);
|
|
KEYWORD(typeCheckedLoadVCalls);
|
|
KEYWORD(typeTestAssumeConstVCalls);
|
|
KEYWORD(typeCheckedLoadConstVCalls);
|
|
KEYWORD(vFuncId);
|
|
KEYWORD(offset);
|
|
KEYWORD(args);
|
|
KEYWORD(typeid);
|
|
KEYWORD(typeidCompatibleVTable);
|
|
KEYWORD(summary);
|
|
KEYWORD(typeTestRes);
|
|
KEYWORD(kind);
|
|
KEYWORD(unsat);
|
|
KEYWORD(byteArray);
|
|
KEYWORD(inline);
|
|
KEYWORD(single);
|
|
KEYWORD(allOnes);
|
|
KEYWORD(sizeM1BitWidth);
|
|
KEYWORD(alignLog2);
|
|
KEYWORD(sizeM1);
|
|
KEYWORD(bitMask);
|
|
KEYWORD(inlineBits);
|
|
KEYWORD(vcall_visibility);
|
|
KEYWORD(wpdResolutions);
|
|
KEYWORD(wpdRes);
|
|
KEYWORD(indir);
|
|
KEYWORD(singleImpl);
|
|
KEYWORD(branchFunnel);
|
|
KEYWORD(singleImplName);
|
|
KEYWORD(resByArg);
|
|
KEYWORD(byArg);
|
|
KEYWORD(uniformRetVal);
|
|
KEYWORD(uniqueRetVal);
|
|
KEYWORD(virtualConstProp);
|
|
KEYWORD(info);
|
|
KEYWORD(byte);
|
|
KEYWORD(bit);
|
|
KEYWORD(varFlags);
|
|
KEYWORD(callsites);
|
|
KEYWORD(clones);
|
|
KEYWORD(stackIds);
|
|
KEYWORD(allocs);
|
|
KEYWORD(versions);
|
|
KEYWORD(memProf);
|
|
KEYWORD(notcold);
|
|
|
|
#undef KEYWORD
|
|
|
|
// Keywords for types.
|
|
#define TYPEKEYWORD(STR, LLVMTY) \
|
|
do { \
|
|
if (Keyword == STR) { \
|
|
TyVal = LLVMTY; \
|
|
return lltok::Type; \
|
|
} \
|
|
} while (false)
|
|
|
|
TYPEKEYWORD("void", Type::getVoidTy(Context));
|
|
TYPEKEYWORD("half", Type::getHalfTy(Context));
|
|
TYPEKEYWORD("bfloat", Type::getBFloatTy(Context));
|
|
TYPEKEYWORD("float", Type::getFloatTy(Context));
|
|
TYPEKEYWORD("double", Type::getDoubleTy(Context));
|
|
TYPEKEYWORD("x86_fp80", Type::getX86_FP80Ty(Context));
|
|
TYPEKEYWORD("fp128", Type::getFP128Ty(Context));
|
|
TYPEKEYWORD("ppc_fp128", Type::getPPC_FP128Ty(Context));
|
|
TYPEKEYWORD("label", Type::getLabelTy(Context));
|
|
TYPEKEYWORD("metadata", Type::getMetadataTy(Context));
|
|
TYPEKEYWORD("x86_amx", Type::getX86_AMXTy(Context));
|
|
TYPEKEYWORD("token", Type::getTokenTy(Context));
|
|
TYPEKEYWORD("ptr", PointerType::getUnqual(Context));
|
|
|
|
#undef TYPEKEYWORD
|
|
|
|
// Keywords for instructions.
|
|
#define INSTKEYWORD(STR, Enum) \
|
|
do { \
|
|
if (Keyword == #STR) { \
|
|
UIntVal = Instruction::Enum; \
|
|
return lltok::kw_##STR; \
|
|
} \
|
|
} while (false)
|
|
|
|
INSTKEYWORD(fneg, FNeg);
|
|
|
|
INSTKEYWORD(add, Add); INSTKEYWORD(fadd, FAdd);
|
|
INSTKEYWORD(sub, Sub); INSTKEYWORD(fsub, FSub);
|
|
INSTKEYWORD(mul, Mul); INSTKEYWORD(fmul, FMul);
|
|
INSTKEYWORD(udiv, UDiv); INSTKEYWORD(sdiv, SDiv); INSTKEYWORD(fdiv, FDiv);
|
|
INSTKEYWORD(urem, URem); INSTKEYWORD(srem, SRem); INSTKEYWORD(frem, FRem);
|
|
INSTKEYWORD(shl, Shl); INSTKEYWORD(lshr, LShr); INSTKEYWORD(ashr, AShr);
|
|
INSTKEYWORD(and, And); INSTKEYWORD(or, Or); INSTKEYWORD(xor, Xor);
|
|
INSTKEYWORD(icmp, ICmp); INSTKEYWORD(fcmp, FCmp);
|
|
|
|
INSTKEYWORD(phi, PHI);
|
|
INSTKEYWORD(call, Call);
|
|
INSTKEYWORD(trunc, Trunc);
|
|
INSTKEYWORD(zext, ZExt);
|
|
INSTKEYWORD(sext, SExt);
|
|
INSTKEYWORD(fptrunc, FPTrunc);
|
|
INSTKEYWORD(fpext, FPExt);
|
|
INSTKEYWORD(uitofp, UIToFP);
|
|
INSTKEYWORD(sitofp, SIToFP);
|
|
INSTKEYWORD(fptoui, FPToUI);
|
|
INSTKEYWORD(fptosi, FPToSI);
|
|
INSTKEYWORD(inttoptr, IntToPtr);
|
|
INSTKEYWORD(ptrtoaddr, PtrToAddr);
|
|
INSTKEYWORD(ptrtoint, PtrToInt);
|
|
INSTKEYWORD(bitcast, BitCast);
|
|
INSTKEYWORD(addrspacecast, AddrSpaceCast);
|
|
INSTKEYWORD(select, Select);
|
|
INSTKEYWORD(va_arg, VAArg);
|
|
INSTKEYWORD(ret, Ret);
|
|
INSTKEYWORD(switch, Switch);
|
|
INSTKEYWORD(indirectbr, IndirectBr);
|
|
INSTKEYWORD(invoke, Invoke);
|
|
INSTKEYWORD(resume, Resume);
|
|
INSTKEYWORD(unreachable, Unreachable);
|
|
INSTKEYWORD(callbr, CallBr);
|
|
|
|
INSTKEYWORD(alloca, Alloca);
|
|
INSTKEYWORD(load, Load);
|
|
INSTKEYWORD(store, Store);
|
|
INSTKEYWORD(cmpxchg, AtomicCmpXchg);
|
|
INSTKEYWORD(atomicrmw, AtomicRMW);
|
|
INSTKEYWORD(fence, Fence);
|
|
INSTKEYWORD(getelementptr, GetElementPtr);
|
|
|
|
INSTKEYWORD(extractelement, ExtractElement);
|
|
INSTKEYWORD(insertelement, InsertElement);
|
|
INSTKEYWORD(shufflevector, ShuffleVector);
|
|
INSTKEYWORD(extractvalue, ExtractValue);
|
|
INSTKEYWORD(insertvalue, InsertValue);
|
|
INSTKEYWORD(landingpad, LandingPad);
|
|
INSTKEYWORD(cleanupret, CleanupRet);
|
|
INSTKEYWORD(catchret, CatchRet);
|
|
INSTKEYWORD(catchswitch, CatchSwitch);
|
|
INSTKEYWORD(catchpad, CatchPad);
|
|
INSTKEYWORD(cleanuppad, CleanupPad);
|
|
|
|
INSTKEYWORD(freeze, Freeze);
|
|
|
|
#undef INSTKEYWORD
|
|
|
|
#define DWKEYWORD(TYPE, TOKEN) \
|
|
do { \
|
|
if (Keyword.starts_with("DW_" #TYPE "_")) { \
|
|
StrVal.assign(Keyword.begin(), Keyword.end()); \
|
|
return lltok::TOKEN; \
|
|
} \
|
|
} while (false)
|
|
|
|
DWKEYWORD(TAG, DwarfTag);
|
|
DWKEYWORD(ATE, DwarfAttEncoding);
|
|
DWKEYWORD(VIRTUALITY, DwarfVirtuality);
|
|
DWKEYWORD(LANG, DwarfLang);
|
|
DWKEYWORD(LNAME, DwarfSourceLangName);
|
|
DWKEYWORD(CC, DwarfCC);
|
|
DWKEYWORD(OP, DwarfOp);
|
|
DWKEYWORD(MACINFO, DwarfMacinfo);
|
|
DWKEYWORD(APPLE_ENUM_KIND, DwarfEnumKind);
|
|
|
|
#undef DWKEYWORD
|
|
|
|
// Keywords for debug record types.
|
|
#define DBGRECORDTYPEKEYWORD(STR) \
|
|
do { \
|
|
if (Keyword == "dbg_" #STR) { \
|
|
StrVal = #STR; \
|
|
return lltok::DbgRecordType; \
|
|
} \
|
|
} while (false)
|
|
|
|
DBGRECORDTYPEKEYWORD(value);
|
|
DBGRECORDTYPEKEYWORD(declare);
|
|
DBGRECORDTYPEKEYWORD(assign);
|
|
DBGRECORDTYPEKEYWORD(label);
|
|
DBGRECORDTYPEKEYWORD(declare_value);
|
|
#undef DBGRECORDTYPEKEYWORD
|
|
|
|
if (Keyword.starts_with("DIFlag")) {
|
|
StrVal.assign(Keyword.begin(), Keyword.end());
|
|
return lltok::DIFlag;
|
|
}
|
|
|
|
if (Keyword.starts_with("DISPFlag")) {
|
|
StrVal.assign(Keyword.begin(), Keyword.end());
|
|
return lltok::DISPFlag;
|
|
}
|
|
|
|
if (Keyword.starts_with("CSK_")) {
|
|
StrVal.assign(Keyword.begin(), Keyword.end());
|
|
return lltok::ChecksumKind;
|
|
}
|
|
|
|
if (Keyword == "NoDebug" || Keyword == "FullDebug" ||
|
|
Keyword == "LineTablesOnly" || Keyword == "DebugDirectivesOnly") {
|
|
StrVal.assign(Keyword.begin(), Keyword.end());
|
|
return lltok::EmissionKind;
|
|
}
|
|
|
|
if (Keyword == "GNU" || Keyword == "Apple" || Keyword == "None" ||
|
|
Keyword == "Default") {
|
|
StrVal.assign(Keyword.begin(), Keyword.end());
|
|
return lltok::NameTableKind;
|
|
}
|
|
|
|
if (Keyword == "Binary" || Keyword == "Decimal" || Keyword == "Rational") {
|
|
StrVal.assign(Keyword.begin(), Keyword.end());
|
|
return lltok::FixedPointKind;
|
|
}
|
|
|
|
// Check for [us]0x[0-9A-Fa-f]+ which are Hexadecimal constant generated by
|
|
// the CFE to avoid forcing it to deal with 64-bit numbers. Also check for
|
|
// f0x[0-9A-Fa-f]+, which is the floating-point hexadecimal literal constant.
|
|
if ((TokStart[0] == 'u' || TokStart[0] == 's' || TokStart[0] == 'f') &&
|
|
TokStart[1] == '0' && TokStart[2] == 'x' &&
|
|
isxdigit(static_cast<unsigned char>(TokStart[3]))) {
|
|
bool IsFloatConst = TokStart[0] == 'f';
|
|
size_t Len = CurPtr - TokStart - 3;
|
|
uint32_t Bits = Len * 4;
|
|
StringRef HexStr(TokStart + 3, Len);
|
|
if (!all_of(HexStr, isxdigit)) {
|
|
// Bad token, return it as an error.
|
|
CurPtr = TokStart + 3;
|
|
return lltok::Error;
|
|
}
|
|
APInt Tmp(Bits, HexStr, 16);
|
|
uint32_t ActiveBits = Tmp.getActiveBits();
|
|
if (!IsFloatConst && ActiveBits > 0 && ActiveBits < Bits)
|
|
Tmp = Tmp.trunc(ActiveBits);
|
|
APSIntVal = APSInt(Tmp, TokStart[0] != 's');
|
|
return IsFloatConst ? lltok::FloatHexLiteral : lltok::APSInt;
|
|
}
|
|
|
|
// If this is "cc1234", return this as just "cc".
|
|
if (TokStart[0] == 'c' && TokStart[1] == 'c') {
|
|
CurPtr = TokStart+2;
|
|
return lltok::kw_cc;
|
|
}
|
|
|
|
// Finally, if this isn't known, return an error.
|
|
CurPtr = TokStart+1;
|
|
return lltok::Error;
|
|
}
|
|
|
|
/// Lex all tokens that start with a 0x prefix, knowing they match and are not
|
|
/// labels.
|
|
/// HexFPLiteral [-+]?0x[0-9A-Fa-f]+.[0-9A-Fa-f]*[pP][-+]?[0-9]+
|
|
/// HexFPConstant 0x[0-9A-Fa-f]+
|
|
/// HexFP80Constant 0xK[0-9A-Fa-f]+
|
|
/// HexFP128Constant 0xL[0-9A-Fa-f]+
|
|
/// HexPPC128Constant 0xM[0-9A-Fa-f]+
|
|
/// HexHalfConstant 0xH[0-9A-Fa-f]+
|
|
/// HexBFloatConstant 0xR[0-9A-Fa-f]+
|
|
lltok::Kind LLLexer::Lex0x() {
|
|
CurPtr = TokStart + 2;
|
|
|
|
char Kind;
|
|
if ((CurPtr[0] >= 'K' && CurPtr[0] <= 'M') || CurPtr[0] == 'H' ||
|
|
CurPtr[0] == 'R') {
|
|
Kind = *CurPtr++;
|
|
} else {
|
|
Kind = 'J';
|
|
}
|
|
|
|
if (!isxdigit(static_cast<unsigned char>(CurPtr[0]))) {
|
|
// Bad token, return it as an error.
|
|
CurPtr = TokStart+1;
|
|
return lltok::Error;
|
|
}
|
|
|
|
while (isxdigit(static_cast<unsigned char>(CurPtr[0])))
|
|
++CurPtr;
|
|
|
|
if (*CurPtr == '.') {
|
|
// HexFPLiteral, following C's %a syntax
|
|
return LexFloatStr();
|
|
}
|
|
|
|
if (Kind == 'J') {
|
|
// HexFPConstant - Floating point constant represented in IEEE format as a
|
|
// hexadecimal number for when exponential notation is not precise enough.
|
|
// Half, BFloat, Float, and double only.
|
|
APFloatVal = APFloat(APFloat::IEEEdouble(),
|
|
APInt(64, HexIntToVal(TokStart + 2, CurPtr)));
|
|
return lltok::APFloat;
|
|
}
|
|
|
|
uint64_t Pair[2];
|
|
switch (Kind) {
|
|
default:
|
|
llvm_unreachable("Unknown kind!");
|
|
case 'K':
|
|
// F80HexFPConstant - x87 long double in hexadecimal format (10 bytes)
|
|
FP80HexToIntPair(TokStart + 3, CurPtr, Pair);
|
|
APSIntVal = APInt(80, Pair);
|
|
return lltok::FloatHexLiteral;
|
|
case 'L':
|
|
// F128HexFPConstant - IEEE 128-bit in hexadecimal format (16 bytes)
|
|
HexToIntPair(TokStart + 3, CurPtr, Pair);
|
|
APSIntVal = APInt(128, Pair);
|
|
return lltok::FloatHexLiteral;
|
|
case 'M':
|
|
// PPC128HexFPConstant - PowerPC 128-bit in hexadecimal format (16 bytes)
|
|
HexToIntPair(TokStart + 3, CurPtr, Pair);
|
|
APSIntVal = APInt(128, Pair);
|
|
return lltok::FloatHexLiteral;
|
|
case 'H': {
|
|
uint64_t Val = HexIntToVal(TokStart + 3, CurPtr);
|
|
if (!llvm::isUInt<16>(Val)) {
|
|
LexError("hexadecimal constant too large for half (16-bit)");
|
|
return lltok::Error;
|
|
}
|
|
APSIntVal = APInt(16, Val);
|
|
return lltok::FloatHexLiteral;
|
|
}
|
|
case 'R': {
|
|
// Brain floating point
|
|
uint64_t Val = HexIntToVal(TokStart + 3, CurPtr);
|
|
if (!llvm::isUInt<16>(Val)) {
|
|
LexError("hexadecimal constant too large for bfloat (16-bit)");
|
|
return lltok::Error;
|
|
}
|
|
APSIntVal = APInt(16, Val);
|
|
return lltok::FloatHexLiteral;
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Lex tokens for a label or a numeric constant, possibly starting with -.
|
|
/// Label [-a-zA-Z$._0-9]+:
|
|
/// NInteger -[0-9]+
|
|
/// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
|
|
/// PInteger [0-9]+
|
|
/// HexFPLiteral [-+]?0x[0-9A-Fa-f]+.[0-9A-Fa-f]*[pP][-+]?[0-9]+
|
|
/// HexFPConstant 0x[0-9A-Fa-f]+
|
|
/// HexFP80Constant 0xK[0-9A-Fa-f]+
|
|
/// HexFP128Constant 0xL[0-9A-Fa-f]+
|
|
/// HexPPC128Constant 0xM[0-9A-Fa-f]+
|
|
lltok::Kind LLLexer::LexDigitOrNegative() {
|
|
// If the letter after the negative is not a number, this is probably a label.
|
|
if (!isdigit(static_cast<unsigned char>(TokStart[0])) &&
|
|
!isdigit(static_cast<unsigned char>(CurPtr[0]))) {
|
|
// Okay, this is not a number after the -, it's probably a label.
|
|
if (const char *End = isLabelTail(CurPtr)) {
|
|
StrVal.assign(TokStart, End-1);
|
|
CurPtr = End;
|
|
return lltok::LabelStr;
|
|
}
|
|
|
|
// It might be a -inf, -nan, etc. Check if it's a float string (which will
|
|
// also handle error conditions there).
|
|
return LexFloatStr();
|
|
}
|
|
|
|
// At this point, it is either a label, int or fp constant.
|
|
|
|
// Skip digits, we have at least one.
|
|
for (; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
|
|
/*empty*/;
|
|
|
|
// Check if this is a fully-numeric label:
|
|
if (isdigit(TokStart[0]) && CurPtr[0] == ':') {
|
|
uint64_t Val = atoull(TokStart, CurPtr);
|
|
++CurPtr; // Skip the colon.
|
|
if ((unsigned)Val != Val)
|
|
LexError("invalid value number (too large)");
|
|
UIntVal = unsigned(Val);
|
|
return lltok::LabelID;
|
|
}
|
|
|
|
// Check to see if this really is a string label, e.g. "-1:".
|
|
if (isLabelChar(CurPtr[0]) || CurPtr[0] == ':') {
|
|
if (const char *End = isLabelTail(CurPtr)) {
|
|
StrVal.assign(TokStart, End-1);
|
|
CurPtr = End;
|
|
return lltok::LabelStr;
|
|
}
|
|
}
|
|
|
|
// If the next character is a '.', then it is a fp value, otherwise its
|
|
// integer.
|
|
if (CurPtr[0] != '.') {
|
|
if (TokStart[0] == '0' && TokStart[1] == 'x')
|
|
return Lex0x();
|
|
if (TokStart[0] == '-' && TokStart[1] == '0' && TokStart[2] == 'x')
|
|
return LexFloatStr();
|
|
|
|
APSIntVal = APSInt(StringRef(TokStart, CurPtr - TokStart));
|
|
return lltok::APSInt;
|
|
}
|
|
|
|
++CurPtr;
|
|
|
|
// Skip over [0-9]*([eE][-+]?[0-9]+)?
|
|
while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
|
|
|
|
if (CurPtr[0] == 'e' || CurPtr[0] == 'E') {
|
|
if (isdigit(static_cast<unsigned char>(CurPtr[1])) ||
|
|
((CurPtr[1] == '-' || CurPtr[1] == '+') &&
|
|
isdigit(static_cast<unsigned char>(CurPtr[2])))) {
|
|
CurPtr += 2;
|
|
while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
|
|
}
|
|
}
|
|
|
|
StrVal.assign(TokStart, CurPtr - TokStart);
|
|
return lltok::FloatLiteral;
|
|
}
|
|
|
|
/// Lex a floating point constant starting with +.
|
|
/// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
|
|
/// HexFPLiteral [-+]?0x[0-9A-Fa-f]+.[0-9A-Fa-f]*[pP][-+]?[0-9]+
|
|
/// HexFPSpecial [-+](inf|qnan|s?nan\(0x[0-9A-Fa-f]+\))
|
|
lltok::Kind LLLexer::LexPositive() {
|
|
// If it's not numeric, check for special floating-point values.
|
|
if (!isdigit(static_cast<unsigned char>(CurPtr[0])))
|
|
return LexFloatStr();
|
|
|
|
// Skip digits.
|
|
for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
|
|
/*empty*/;
|
|
|
|
// If the first non-digit is an x, check if it's a hex FP literal. LexFloatStr
|
|
// will reanalyze TokStr..CurPtr to make sure that it's 0x and not 413x.
|
|
if (CurPtr[0] == 'x')
|
|
return LexFloatStr();
|
|
|
|
// At this point, we need a '.'.
|
|
if (CurPtr[0] != '.') {
|
|
CurPtr = TokStart + 1;
|
|
return lltok::Error;
|
|
}
|
|
|
|
++CurPtr;
|
|
|
|
// Skip over [0-9]*([eE][-+]?[0-9]+)?
|
|
while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
|
|
|
|
if (CurPtr[0] == 'e' || CurPtr[0] == 'E') {
|
|
if (isdigit(static_cast<unsigned char>(CurPtr[1])) ||
|
|
((CurPtr[1] == '-' || CurPtr[1] == '+') &&
|
|
isdigit(static_cast<unsigned char>(CurPtr[2])))) {
|
|
CurPtr += 2;
|
|
while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
|
|
}
|
|
}
|
|
|
|
StrVal.assign(TokStart, CurPtr - TokStart);
|
|
return lltok::FloatLiteral;
|
|
}
|
|
|
|
/// Lex all tokens that start with a + or - that could be a float literal.
|
|
/// HexFPLiteral [-+]?0x[0-9A-Fa-f]+.[0-9A-Fa-f]*[pP][-+]?[0-9]+
|
|
/// HexFPSpecial [-+](inf|qnan|s?nan\(0x[0-9A-Fa-f]+\))
|
|
lltok::Kind LLLexer::LexFloatStr() {
|
|
// At the point we enter this function, we may have seen a few characters
|
|
// already, but how many differs based on the entry point. Rewind to the
|
|
// beginning just in case.
|
|
CurPtr = TokStart;
|
|
|
|
// Check for optional sign.
|
|
if (*CurPtr == '-' || *CurPtr == '+')
|
|
++CurPtr;
|
|
|
|
if (*CurPtr != '0') {
|
|
// Check for keywords.
|
|
const char *LabelStart = CurPtr;
|
|
while (isLabelChar(*CurPtr))
|
|
++CurPtr;
|
|
StringRef Label(LabelStart, CurPtr - LabelStart);
|
|
|
|
// Basic special values.
|
|
if (Label == "inf") {
|
|
// Copy from the beginning, to include the sign.
|
|
StrVal.assign(TokStart, CurPtr - TokStart);
|
|
return lltok::FloatLiteral;
|
|
}
|
|
|
|
// APFloat::convertFromString doesn't support qnan, so translate it to a
|
|
// nan payload string it does support.
|
|
if (Label == "qnan") {
|
|
StrVal = *TokStart == '-' ? "-nan(0)" : "nan(0)";
|
|
return lltok::FloatLiteral;
|
|
}
|
|
|
|
// NaN with payload.
|
|
if ((Label == "nan" || Label == "snan") && *CurPtr == '(') {
|
|
const char *Payload = ++CurPtr;
|
|
while (*CurPtr && *CurPtr != ')')
|
|
++CurPtr;
|
|
|
|
// If no close parenthesis, it's a bad token, return it as an error.
|
|
if (*CurPtr++ != ')') {
|
|
CurPtr = TokStart + 1;
|
|
LexError("unclosed nan literal");
|
|
return lltok::Error;
|
|
}
|
|
|
|
StringRef PayloadStr(Payload, CurPtr - Payload);
|
|
APInt Val;
|
|
if (PayloadStr.consume_front("0x") && PayloadStr.getAsInteger(16, Val)) {
|
|
StrVal.assign(TokStart, CurPtr - TokStart);
|
|
// Drop the leading + from the string, as APFloat::convertFromString
|
|
// doesn't support leading + sign.
|
|
if (StrVal[0] == '+')
|
|
StrVal.erase(0, 1);
|
|
return lltok::FloatLiteral;
|
|
}
|
|
}
|
|
|
|
// Bad token, return it as an error.
|
|
LexError("bad payload format for nan literal");
|
|
CurPtr = TokStart + 1;
|
|
return lltok::Error;
|
|
}
|
|
++CurPtr;
|
|
|
|
if (*CurPtr++ != 'x') {
|
|
// Bad token, return it as an error.
|
|
CurPtr = TokStart + 1;
|
|
return lltok::Error;
|
|
}
|
|
|
|
if (!isxdigit(static_cast<unsigned char>(CurPtr[0]))) {
|
|
// Bad token, return it as an error.
|
|
CurPtr = TokStart + 1;
|
|
return lltok::Error;
|
|
}
|
|
|
|
while (isxdigit(static_cast<unsigned char>(CurPtr[0])))
|
|
++CurPtr;
|
|
|
|
if (*CurPtr != '.') {
|
|
// Bad token, return it as an error.
|
|
CurPtr = TokStart + 1;
|
|
return lltok::Error;
|
|
}
|
|
|
|
++CurPtr; // Eat the .
|
|
while (isxdigit(static_cast<unsigned char>(CurPtr[0])))
|
|
++CurPtr;
|
|
|
|
if (*CurPtr != 'p' && *CurPtr != 'P') {
|
|
// Bad token, return it as an error.
|
|
CurPtr = TokStart + 1;
|
|
return lltok::Error;
|
|
}
|
|
|
|
++CurPtr;
|
|
if (*CurPtr == '+' || *CurPtr == '-')
|
|
++CurPtr;
|
|
while (isdigit(static_cast<unsigned char>(CurPtr[0])))
|
|
++CurPtr;
|
|
|
|
StrVal.assign(TokStart, CurPtr - TokStart);
|
|
return lltok::FloatLiteral;
|
|
}
|