//===-- DILParser.cpp -----------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // // This implements the recursive descent parser for the Data Inspection // Language (DIL), and its helper functions, which will eventually underlie the // 'frame variable' command. The language that this parser recognizes is // described in lldb/docs/dil-expr-lang.ebnf // //===----------------------------------------------------------------------===// #include "lldb/ValueObject/DILParser.h" #include "lldb/Host/common/DiagnosticsRendering.h" #include "lldb/Symbol/CompileUnit.h" #include "lldb/Target/ExecutionContextScope.h" #include "lldb/Target/LanguageRuntime.h" #include "lldb/Target/StackFrame.h" #include "lldb/ValueObject/DILAST.h" #include "lldb/ValueObject/DILEval.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/FormatAdapters.h" #include #include #include #include #include namespace lldb_private::dil { DILDiagnosticError::DILDiagnosticError(llvm::StringRef expr, const std::string &message, uint32_t loc, uint16_t err_len) : ErrorInfo(make_error_code(std::errc::invalid_argument)) { DiagnosticDetail::SourceLocation sloc = { FileSpec{}, /*line=*/1, static_cast(loc + 1), err_len, false, /*in_user_input=*/true}; // If the error is not handled by `RenderDiagnosticDetails`, this creates an // error message that can be displayed instead. // Example: // (lldb) script lldb.frame.GetValueForVariablePath("1 + foo") // error: :1:5: use of undeclared identifier 'foo' // 1 | 1 + foo // | ^~~ auto msg = llvm::formatv(":1:{0}: {1}\n 1 | {2}\n |", loc + 1, message, expr); std::string rendered_str; llvm::raw_string_ostream rendered_os(rendered_str); rendered_os << msg.str(); rendered_os << llvm::indent(loc + 1) << "^"; if (err_len > 1) { // Underline the rest of the erroneous token after the cursor '^'. rendered_os << std::string(err_len - 1, '~'); } m_detail.source_location = sloc; m_detail.severity = lldb::eSeverityError; m_detail.message = message; m_detail.rendered = std::move(rendered_str); } static CompilerType ResolveTypeByName(const std::string &name, ExecutionContextScope &ctx_scope) { // Internally types don't have global scope qualifier in their names and // LLDB doesn't support queries with it too. llvm::StringRef name_ref(name); if (name_ref.starts_with("::")) name_ref = name_ref.drop_front(2); std::vector result_type_list; lldb::TargetSP target_sp = ctx_scope.CalculateTarget(); if (!name_ref.empty() && target_sp) { ModuleList &images = target_sp->GetImages(); TypeQuery query{ConstString(name_ref), TypeQueryOptions::e_exact_match | TypeQueryOptions::e_find_one}; TypeResults results; images.FindTypes(nullptr, query, results); const lldb::TypeSP &type_sp = results.GetFirstType(); if (type_sp) result_type_list.push_back(type_sp->GetFullCompilerType()); } if (!result_type_list.empty()) { CompilerType type = result_type_list[0]; if (type.IsValid() && type.GetTypeName().GetStringRef() == name_ref) return type; } return {}; } llvm::Expected DILParser::Parse(llvm::StringRef dil_input_expr, DILLexer lexer, std::shared_ptr frame_sp, lldb::DynamicValueType use_dynamic, lldb::DILMode mode) { llvm::Error error = llvm::Error::success(); DILParser parser(dil_input_expr, lexer, frame_sp, use_dynamic, error, mode); ASTNodeUP node_up = parser.Run(); assert(node_up && "ASTNodeUP must not contain a nullptr"); if (error) return error; return node_up; } DILParser::DILParser(llvm::StringRef dil_input_expr, DILLexer lexer, std::shared_ptr frame_sp, lldb::DynamicValueType use_dynamic, llvm::Error &error, lldb::DILMode mode) : m_ctx_scope(frame_sp), m_input_expr(dil_input_expr), m_dil_lexer(std::move(lexer)), m_error(error), m_use_dynamic(use_dynamic), m_mode(mode) {} ASTNodeUP DILParser::Run() { ASTNodeUP expr = ParseExpression(); Expect(Token::Kind::eof); return expr; } // Parse an expression. // // expression: // cast_expression // ASTNodeUP DILParser::ParseExpression() { return ParseAdditiveExpression(); } // Parse an additive_expression. // // additive_expression: // multiplicative_expression {"+" multiplicative_expression} // ASTNodeUP DILParser::ParseAdditiveExpression() { auto lhs = ParseMultiplicativeExpression(); assert(lhs && "ASTNodeUP must not contain a nullptr"); while (CurToken().IsOneOf({Token::plus, Token::minus})) { Token token = CurToken(); m_dil_lexer.Advance(); auto rhs = ParseMultiplicativeExpression(); assert(rhs && "ASTNodeUP must not contain a nullptr"); lhs = std::make_unique( token.GetLocation(), GetBinaryOpKindFromToken(token.GetKind()), std::move(lhs), std::move(rhs)); } return lhs; } // Parse a multiplicative_expression. // // multiplicative_expression: // cast_expression {"*" cast_expression} // cast_expression {"/" cast_expression} // cast_expression {"%" cast_expression} // ASTNodeUP DILParser::ParseMultiplicativeExpression() { auto lhs = ParseCastExpression(); while (CurToken().IsOneOf({Token::star, Token::slash, Token::percent})) { Token token = CurToken(); if (token.Is(Token::star) && m_mode != lldb::eDILModeFull) { BailOut("binary multiplication (*) is allowed only in DIL full mode", token.GetLocation(), token.GetSpelling().length()); return std::make_unique(); } m_dil_lexer.Advance(); auto rhs = ParseCastExpression(); assert(rhs && "ASTNodeUP must not contain a nullptr"); lhs = std::make_unique( token.GetLocation(), GetBinaryOpKindFromToken(token.GetKind()), std::move(lhs), std::move(rhs)); } return lhs; } // Parse a cast_expression. // // cast_expression: // unary_expression // "(" type_id ")" cast_expression ASTNodeUP DILParser::ParseCastExpression() { if (!CurToken().Is(Token::l_paren)) return ParseUnaryExpression(); // This could be a type cast, try parsing the contents as a type declaration. Token token = CurToken(); uint32_t loc = token.GetLocation(); // Enable lexer backtracking, so that we can rollback in case it's not // actually a type declaration. // Start tentative parsing (save token location/idx, for possible rollback). uint32_t save_token_idx = m_dil_lexer.GetCurrentTokenIdx(); // Consume the token only after enabling the backtracking. m_dil_lexer.Advance(); // Try parsing the type declaration. If the returned value is not valid, // then we should rollback and try parsing the expression. auto type_id = ParseTypeId(); if (type_id) { // Successfully parsed the type declaration. Commit the backtracked // tokens and parse the cast_expression. if (!type_id.value().IsValid()) return std::make_unique(); Expect(Token::r_paren); m_dil_lexer.Advance(); auto rhs = ParseCastExpression(); assert(rhs && "ASTNodeUP must not contain a nullptr"); return std::make_unique(loc, type_id.value(), std::move(rhs), CastKind::eNone); } // Failed to parse the contents of the parentheses as a type declaration. // Rollback the lexer and try parsing it as unary_expression. TentativeParsingRollback(save_token_idx); return ParseUnaryExpression(); } // Parse an unary_expression. // // unary_expression: // postfix_expression // unary_operator cast_expression // // unary_operator: // "&" // "*" // "+" // "-" // ASTNodeUP DILParser::ParseUnaryExpression() { if (CurToken().IsOneOf( {Token::amp, Token::star, Token::minus, Token::plus})) { Token token = CurToken(); uint32_t loc = token.GetLocation(); m_dil_lexer.Advance(); auto rhs = ParseCastExpression(); assert(rhs && "ASTNodeUP must not contain a nullptr"); switch (token.GetKind()) { case Token::star: return std::make_unique(loc, UnaryOpKind::Deref, std::move(rhs)); case Token::amp: return std::make_unique(loc, UnaryOpKind::AddrOf, std::move(rhs)); case Token::minus: return std::make_unique(loc, UnaryOpKind::Minus, std::move(rhs)); case Token::plus: return std::make_unique(loc, UnaryOpKind::Plus, std::move(rhs)); default: llvm_unreachable("invalid token kind"); } } return ParsePostfixExpression(); } // Parse a postfix_expression. // // postfix_expression: // primary_expression // postfix_expression "[" expression "]" // postfix_expression "[" expression ":" expression "]" // postfix_expression "." id_expression // postfix_expression "->" id_expression // ASTNodeUP DILParser::ParsePostfixExpression() { ASTNodeUP lhs = ParsePrimaryExpression(); assert(lhs && "ASTNodeUP must not contain a nullptr"); while (CurToken().IsOneOf({Token::l_square, Token::period, Token::arrow})) { uint32_t loc = CurToken().GetLocation(); Token token = CurToken(); switch (token.GetKind()) { case Token::l_square: { m_dil_lexer.Advance(); ASTNodeUP index = ParseExpression(); assert(index && "ASTNodeUP must not contain a nullptr"); if (CurToken().GetKind() == Token::colon) { m_dil_lexer.Advance(); ASTNodeUP last_index = ParseExpression(); assert(last_index && "ASTNodeUP must not contain a nullptr"); lhs = std::make_unique( loc, std::move(lhs), std::move(index), std::move(last_index)); } else if (CurToken().GetKind() == Token::minus) { BailOut("use of '-' for bitfield range is deprecated; use ':' instead", CurToken().GetLocation(), CurToken().GetSpelling().length()); return std::make_unique(); } else { lhs = std::make_unique(loc, std::move(lhs), std::move(index)); } Expect(Token::r_square); m_dil_lexer.Advance(); break; } case Token::period: case Token::arrow: { m_dil_lexer.Advance(); Token member_token = CurToken(); std::string member_id = ParseIdExpression(); lhs = std::make_unique( member_token.GetLocation(), std::move(lhs), token.GetKind() == Token::arrow, member_id); break; } default: llvm_unreachable("invalid token"); } } return lhs; } // Parse a primary_expression. // // primary_expression: // numeric_literal // boolean_literal // id_expression // "(" expression ")" // ASTNodeUP DILParser::ParsePrimaryExpression() { if (CurToken().IsOneOf({Token::integer_constant, Token::float_constant})) return ParseNumericLiteral(); if (CurToken().IsOneOf({Token::kw_true, Token::kw_false})) return ParseBooleanLiteral(); if (CurToken().IsOneOf( {Token::coloncolon, Token::identifier, Token::l_paren})) { // Save the source location for the diagnostics message. uint32_t loc = CurToken().GetLocation(); std::string identifier = ParseIdExpression(); if (!identifier.empty()) return std::make_unique(loc, identifier); } if (CurToken().Is(Token::l_paren)) { m_dil_lexer.Advance(); auto expr = ParseExpression(); Expect(Token::r_paren); m_dil_lexer.Advance(); return expr; } BailOut(llvm::formatv("Unexpected token: {0}", CurToken()), CurToken().GetLocation(), CurToken().GetSpelling().length()); return std::make_unique(); } // Parse nested_name_specifier. // // nested_name_specifier: // type_name "::" // namespace_name "::" // nested_name_specifier identifier "::" // std::string DILParser::ParseNestedNameSpecifier() { // The first token in nested_name_specifier is always an identifier, or // '(anonymous namespace)'. switch (CurToken().GetKind()) { case Token::l_paren: { // Anonymous namespaces need to be treated specially: They are // represented the the string '(anonymous namespace)', which has a // space in it (throwing off normal parsing) and is not actually // proper C++> Check to see if we're looking at // '(anonymous namespace)::...' // Look for all the pieces, in order: // l_paren 'anonymous' 'namespace' r_paren coloncolon if (m_dil_lexer.LookAhead(1).Is(Token::identifier) && (m_dil_lexer.LookAhead(1).GetSpelling() == "anonymous") && m_dil_lexer.LookAhead(2).Is(Token::identifier) && (m_dil_lexer.LookAhead(2).GetSpelling() == "namespace") && m_dil_lexer.LookAhead(3).Is(Token::r_paren) && m_dil_lexer.LookAhead(4).Is(Token::coloncolon)) { m_dil_lexer.Advance(4); Expect(Token::coloncolon); m_dil_lexer.Advance(); if (!CurToken().Is(Token::identifier) && !CurToken().Is(Token::l_paren)) { BailOut("Expected an identifier or anonymous namespace, but not found.", CurToken().GetLocation(), CurToken().GetSpelling().length()); } // Continue parsing the nested_namespace_specifier. std::string identifier2 = ParseNestedNameSpecifier(); return "(anonymous namespace)::" + identifier2; } return ""; } // end of special handling for '(anonymous namespace)' case Token::identifier: { // If the next token is scope ("::"), then this is indeed a // nested_name_specifier if (m_dil_lexer.LookAhead(1).Is(Token::coloncolon)) { // This nested_name_specifier is a single identifier. std::string identifier = CurToken().GetSpelling(); m_dil_lexer.Advance(1); Expect(Token::coloncolon); m_dil_lexer.Advance(); // Continue parsing the nested_name_specifier. return identifier + "::" + ParseNestedNameSpecifier(); } return ""; } default: return ""; } } // Parse a type_id. // // type_id: // type_specifier_seq [abstract_declarator] // // type_specifier_seq: // type_specifier [type_specifier] // // type_specifier: // ["::"] [nested_name_specifier] type_name // not handled for now! // builtin_typename // std::optional DILParser::ParseTypeId() { CompilerType type; auto maybe_builtin_type = ParseBuiltinType(); if (maybe_builtin_type) { type = *maybe_builtin_type; } else { // Check to see if we have a user-defined type here. // First build up the user-defined type name. std::string type_name; ParseTypeSpecifierSeq(type_name); if (type_name.empty()) return {}; type = ResolveTypeByName(type_name, *m_ctx_scope); if (!type.IsValid()) return {}; // Same-name identifiers should be preferred over typenames. if (LookupIdentifier(type_name, m_ctx_scope, m_use_dynamic)) // TODO: Make type accessible with 'class', 'struct' and 'union' keywords. return {}; // Same-name identifiers should be preferred over typenames. if (LookupGlobalIdentifier(type_name, m_ctx_scope, m_ctx_scope->CalculateTarget(), m_use_dynamic)) // TODO: Make type accessible with 'class', 'struct' and 'union' keywords return {}; } // // abstract_declarator: // ptr_operator [abstract_declarator] // std::vector ptr_operators; while (CurToken().IsOneOf({Token::star, Token::amp})) { Token tok = CurToken(); ptr_operators.push_back(std::move(tok)); m_dil_lexer.Advance(); } type = ResolveTypeDeclarators(type, ptr_operators); return type; } // Parse a built-in type // // builtin_typename: // identifer_seq // // identifier_seq // identifer [identifier_seq] // // A built-in type can be a single identifier or a space-separated // list of identifiers (e.g. "short" or "long long"). std::optional DILParser::ParseBuiltinType() { std::string type_name = ""; uint32_t save_token_idx = m_dil_lexer.GetCurrentTokenIdx(); bool first_word = true; while (CurToken().GetKind() == Token::identifier) { if (CurToken().GetSpelling() == "const" || CurToken().GetSpelling() == "volatile") continue; if (!first_word) type_name.push_back(' '); else first_word = false; type_name.append(CurToken().GetSpelling()); m_dil_lexer.Advance(); } if (type_name.size() > 0) { lldb::TargetSP target_sp = m_ctx_scope->CalculateTarget(); ConstString const_type_name(type_name.c_str()); for (auto type_system_sp : target_sp->GetScratchTypeSystems()) if (auto compiler_type = type_system_sp->GetBuiltinTypeByName(const_type_name)) return compiler_type; } TentativeParsingRollback(save_token_idx); return {}; } // Parse a type_specifier_seq. // // type_specifier_seq: // type_specifier [type_specifier_seq] // void DILParser::ParseTypeSpecifierSeq(std::string &type_name) { while (true) { std::optional err_or_string = ParseTypeSpecifier(); if (!err_or_string) break; type_name = *err_or_string; } } // Parse a type_specifier. // // type_specifier: // ["::"] [nested_name_specifier] type_name // // Returns TRUE if a type_specifier was successfully parsed at this location. // std::optional DILParser::ParseTypeSpecifier() { // The type_specifier must be a user-defined type. Try parsing a // simple_type_specifier. // Try parsing optional global scope operator. bool global_scope = false; if (CurToken().Is(Token::coloncolon)) { global_scope = true; m_dil_lexer.Advance(); } // Try parsing optional nested_name_specifier. auto nested_name_specifier = ParseNestedNameSpecifier(); // Try parsing required type_name. auto type_name_or_err = ParseTypeName(); if (!type_name_or_err) return type_name_or_err; std::string type_name = *type_name_or_err; // If there is a type_name, then this is indeed a simple_type_specifier. // Global and qualified (namespace/class) scopes can be empty, since they're // optional. In this case type_name is type we're looking for. if (!type_name.empty()) // User-defined typenames can't be combined with builtin keywords. return llvm::formatv("{0}{1}{2}", global_scope ? "::" : "", nested_name_specifier, type_name); // No type_specifier was found here. return {}; } // Parse a type_name. // // type_name: // class_name // enum_name // typedef_name // // class_name // identifier // // enum_name // identifier // // typedef_name // identifier // std::optional DILParser::ParseTypeName() { // Typename always starts with an identifier. if (CurToken().IsNot(Token::identifier)) { return std::nullopt; } // Otherwise look for a class_name, enum_name or a typedef_name. std::string identifier = CurToken().GetSpelling(); m_dil_lexer.Advance(); return identifier; } // Parse an id_expression. // // id_expression: // unqualified_id // qualified_id // // qualified_id: // ["::"] [nested_name_specifier] unqualified_id // ["::"] identifier // // identifier: // ? Token::identifier ? // std::string DILParser::ParseIdExpression() { // Try parsing optional global scope operator. bool global_scope = false; if (CurToken().Is(Token::coloncolon)) { global_scope = true; m_dil_lexer.Advance(); } // Try parsing optional nested_name_specifier. std::string nested_name_specifier = ParseNestedNameSpecifier(); // If nested_name_specifier is present, then it's qualified_id production. // Follow the first production rule. if (!nested_name_specifier.empty()) { // Parse unqualified_id and construct a fully qualified id expression. auto unqualified_id = ParseUnqualifiedId(); return llvm::formatv("{0}{1}{2}", global_scope ? "::" : "", nested_name_specifier, unqualified_id); } if (!CurToken().Is(Token::identifier)) return ""; // No nested_name_specifier, but with global scope -- this is also a // qualified_id production. Follow the second production rule. if (global_scope) { Expect(Token::identifier); std::string identifier = CurToken().GetSpelling(); m_dil_lexer.Advance(); return llvm::formatv("{0}{1}", global_scope ? "::" : "", identifier); } // This is unqualified_id production. return ParseUnqualifiedId(); } // Parse an unqualified_id. // // unqualified_id: // identifier // // identifier: // ? Token::identifier ? // std::string DILParser::ParseUnqualifiedId() { Expect(Token::identifier); std::string identifier = CurToken().GetSpelling(); m_dil_lexer.Advance(); return identifier; } CompilerType DILParser::ResolveTypeDeclarators(CompilerType type, const std::vector &ptr_operators) { // Resolve pointers/references. for (Token tk : ptr_operators) { uint32_t loc = tk.GetLocation(); if (tk.GetKind() == Token::star) { // Pointers to reference types are forbidden. if (type.IsReferenceType()) { BailOut(llvm::formatv("'type name' declared as a pointer to a " "reference of type {0}", type.TypeDescription()), loc, CurToken().GetSpelling().length()); return {}; } // Get pointer type for the base type: e.g. int* -> int**. type = type.GetPointerType(); } else if (tk.GetKind() == Token::amp) { // References to references are forbidden. // FIXME: In future we may want to allow rvalue references (i.e. &&). if (type.IsReferenceType()) { BailOut("type name declared as a reference to a reference", loc, CurToken().GetSpelling().length()); return {}; } // Get reference type for the base type: e.g. int -> int&. type = type.GetLValueReferenceType(); } } return type; } // Parse an boolean_literal. // // boolean_literal: // "true" // "false" // ASTNodeUP DILParser::ParseBooleanLiteral() { ExpectOneOf(std::vector{Token::kw_true, Token::kw_false}); uint32_t loc = CurToken().GetLocation(); bool literal_value = CurToken().Is(Token::kw_true); m_dil_lexer.Advance(); return std::make_unique(loc, literal_value); } void DILParser::BailOut(const std::string &error, uint32_t loc, uint16_t err_len) { if (m_error) // If error is already set, then the parser is in the "bail-out" mode. Don't // do anything and keep the original error. return; m_error = llvm::make_error(m_input_expr, error, loc, err_len); // Advance the lexer token index to the end of the lexed tokens vector. m_dil_lexer.ResetTokenIdx(m_dil_lexer.NumLexedTokens() - 1); } // Parse a numeric_literal. // // numeric_literal: // ? Token::integer_constant ? // ? Token::floating_constant ? // ASTNodeUP DILParser::ParseNumericLiteral() { ASTNodeUP numeric_constant; if (CurToken().Is(Token::integer_constant)) numeric_constant = ParseIntegerLiteral(); else numeric_constant = ParseFloatingPointLiteral(); if (numeric_constant->GetKind() == NodeKind::eErrorNode) { BailOut(llvm::formatv("Failed to parse token as numeric-constant: {0}", CurToken()), CurToken().GetLocation(), CurToken().GetSpelling().length()); return numeric_constant; } m_dil_lexer.Advance(); return numeric_constant; } ASTNodeUP DILParser::ParseIntegerLiteral() { Token token = CurToken(); auto spelling = token.GetSpelling(); llvm::StringRef spelling_ref = spelling; auto radix = llvm::getAutoSenseRadix(spelling_ref); IntegerTypeSuffix type = IntegerTypeSuffix::None; bool is_unsigned = false; if (spelling_ref.consume_back_insensitive("u")) is_unsigned = true; if (spelling_ref.consume_back_insensitive("ll")) type = IntegerTypeSuffix::LongLong; else if (spelling_ref.consume_back_insensitive("l")) type = IntegerTypeSuffix::Long; // Suffix 'u' can be only specified only once, before or after 'l' if (!is_unsigned && spelling_ref.consume_back_insensitive("u")) is_unsigned = true; llvm::APInt raw_value; if (!spelling_ref.getAsInteger(radix, raw_value)) return std::make_unique(token.GetLocation(), raw_value, radix, is_unsigned, type); return std::make_unique(); } ASTNodeUP DILParser::ParseFloatingPointLiteral() { Token token = CurToken(); auto spelling = token.GetSpelling(); llvm::StringRef spelling_ref = spelling; llvm::APFloat raw_float(llvm::APFloat::IEEEdouble()); if (spelling_ref.consume_back_insensitive("f")) raw_float = llvm::APFloat(llvm::APFloat::IEEEsingle()); auto StatusOrErr = raw_float.convertFromString( spelling_ref, llvm::APFloat::rmNearestTiesToEven); if (!errorToBool(StatusOrErr.takeError())) return std::make_unique(token.GetLocation(), raw_float); return std::make_unique(); } void DILParser::Expect(Token::Kind kind) { if (CurToken().IsNot(kind)) { BailOut(llvm::formatv("expected {0}, got: {1}", kind, CurToken()), CurToken().GetLocation(), CurToken().GetSpelling().length()); } } void DILParser::ExpectOneOf(std::vector kinds_vec) { if (!CurToken().IsOneOf(kinds_vec)) { BailOut(llvm::formatv("expected any of ({0}), got: {1}", llvm::iterator_range(kinds_vec), CurToken()), CurToken().GetLocation(), CurToken().GetSpelling().length()); } } } // namespace lldb_private::dil