[lldb] Add tree-sitter based Rust syntax highlighting (#181282)

This adds tree-sitter based Rust syntax highlighting to LLDB. It consists of the RustTreeSitterHighlighter plugin and the vendored Rust grammar [1], which is licensed under MIT. [1] https://github.com/tree-sitter/tree-sitter-rust
2026-02-18 13:54:14 -08:00
parent 78ff5b55fd
commit b453adff7a
13 changed files with 2568 additions and 3 deletions
--- a/lldb/source/Plugins/Highlighter/TreeSitter/CMakeLists.txt
+++ b/lldb/source/Plugins/Highlighter/TreeSitter/CMakeLists.txt
@@ -41,4 +41,5 @@ function(add_tree_sitter_grammar name source_dir binary_dir)
  )
 endfunction()

+add_subdirectory(Rust)
 add_subdirectory(Swift)
--- a/lldb/source/Plugins/Highlighter/TreeSitter/README.md
+++ b/lldb/source/Plugins/Highlighter/TreeSitter/README.md
@@ -14,3 +14,4 @@ Each plugin contains a vendored copy of the corresponding grammar in the
 ## Supported Languages

 - Swift based on [swift-tree-sitter](https://github.com/tree-sitter/swift-tree-sitter) 0.9.0
+- Rust based on [tree-sitter-rust](https://github.com/tree-sitter/tree-sitter-rust) 0.24.0
--- a/lldb/source/Plugins/Highlighter/TreeSitter/Rust/CMakeLists.txt
+++ b/lldb/source/Plugins/Highlighter/TreeSitter/Rust/CMakeLists.txt
@@ -0,0 +1,16 @@
+add_lldb_library(lldbPluginHighlighterTreeSitterRust PLUGIN
+  RustTreeSitterHighlighter.cpp
+
+  LINK_COMPONENTS
+    Support
+  LINK_LIBS
+    lldbCore
+    lldbUtility
+    lldbTreeSitter
+    tree-sitter-rust
+)
+
+add_tree_sitter_grammar(tree-sitter-rust
+  ${CMAKE_CURRENT_SOURCE_DIR}
+  ${CMAKE_CURRENT_BINARY_DIR}
+)
--- a/lldb/source/Plugins/Highlighter/TreeSitter/Rust/HighlightQuery.h.in
+++ b/lldb/source/Plugins/Highlighter/TreeSitter/Rust/HighlightQuery.h.in
@@ -0,0 +1,17 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLDB_SOURCE_PLUGINS_HIGHLIGHTER_TREESITTER_RUST_HIGHLIGHTQUERY_H
+#define LLDB_SOURCE_PLUGINS_HIGHLIGHTER_TREESITTER_RUST_HIGHLIGHTQUERY_H
+
+#include "llvm/ADT/StringRef.h"
+
+static constexpr llvm::StringLiteral highlight_query =
+    R"__(@HIGHLIGHT_QUERY@)__";
+
+#endif // LLDB_SOURCE_PLUGINS_HIGHLIGHTER_TREESITTER_RUST_HIGHLIGHTQUERY_H
--- a/lldb/source/Plugins/Highlighter/TreeSitter/Rust/RustTreeSitterHighlighter.cpp
+++ b/lldb/source/Plugins/Highlighter/TreeSitter/Rust/RustTreeSitterHighlighter.cpp
@@ -0,0 +1,43 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "RustTreeSitterHighlighter.h"
+#include "HighlightQuery.h"
+#include "lldb/Target/Language.h"
+
+LLDB_PLUGIN_DEFINE_ADV(RustTreeSitterHighlighter, HighlighterTreeSitterRust)
+
+extern "C" {
+const TSLanguage *tree_sitter_rust();
+}
+
+using namespace lldb_private;
+
+const TSLanguage *RustTreeSitterHighlighter::GetLanguage() const {
+  return tree_sitter_rust();
+}
+
+llvm::StringRef RustTreeSitterHighlighter::GetHighlightQuery() const {
+  return highlight_query;
+}
+
+Highlighter *
+RustTreeSitterHighlighter::CreateInstance(lldb::LanguageType language) {
+  if (language == lldb::eLanguageTypeRust)
+    return new RustTreeSitterHighlighter();
+  return nullptr;
+}
+
+void RustTreeSitterHighlighter::Initialize() {
+  PluginManager::RegisterPlugin(GetPluginNameStatic(), GetPluginNameStatic(),
+                                CreateInstance);
+}
+
+void RustTreeSitterHighlighter::Terminate() {
+  PluginManager::UnregisterPlugin(CreateInstance);
+}
--- a/lldb/source/Plugins/Highlighter/TreeSitter/Rust/RustTreeSitterHighlighter.h
+++ b/lldb/source/Plugins/Highlighter/TreeSitter/Rust/RustTreeSitterHighlighter.h
@@ -0,0 +1,41 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLDB_SOURCE_PLUGINS_LANGUAGE_TREESITTERCOMMON_RUSTTREESITTERHIGHLIGHTER_H
+#define LLDB_SOURCE_PLUGINS_LANGUAGE_TREESITTERCOMMON_RUSTTREESITTERHIGHLIGHTER_H
+
+#include "../TreeSitterHighlighter.h"
+#include "llvm/ADT/StringRef.h"
+
+namespace lldb_private {
+
+class RustTreeSitterHighlighter : public TreeSitterHighlighter {
+public:
+  RustTreeSitterHighlighter() = default;
+  ~RustTreeSitterHighlighter() override = default;
+
+  llvm::StringRef GetName() const override { return "tree-sitter-rust"; }
+
+  static Highlighter *CreateInstance(lldb::LanguageType language);
+
+  static void Terminate();
+  static void Initialize();
+
+  static llvm::StringRef GetPluginNameStatic() {
+    return "Tree-sitter Rust Highlighter";
+  }
+  llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
+
+protected:
+  const TSLanguage *GetLanguage() const override;
+  llvm::StringRef GetHighlightQuery() const override;
+};
+
+} // namespace lldb_private
+
+#endif // LLDB_SOURCE_PLUGINS_LANGUAGE_TREESITTERCOMMON_RUSTTREESITTERHIGHLIGHTER_H
--- a/lldb/source/Plugins/Highlighter/TreeSitter/Rust/tree-sitter-rust/LICENSE
+++ b/lldb/source/Plugins/Highlighter/TreeSitter/Rust/tree-sitter-rust/LICENSE
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2017 Maxim Sokolov
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/lldb/source/Plugins/Highlighter/TreeSitter/Rust/tree-sitter-rust/grammar.js
+++ b/lldb/source/Plugins/Highlighter/TreeSitter/Rust/tree-sitter-rust/grammar.js
--- a/lldb/source/Plugins/Highlighter/TreeSitter/Rust/tree-sitter-rust/highlights.scm
+++ b/lldb/source/Plugins/Highlighter/TreeSitter/Rust/tree-sitter-rust/highlights.scm
@@ -0,0 +1,161 @@
+; Identifiers
+
+(type_identifier) @type
+(primitive_type) @type.builtin
+(field_identifier) @property
+
+; Identifier conventions
+
+; Assume all-caps names are constants
+((identifier) @constant
+ (#match? @constant "^[A-Z][A-Z\\d_]+$'"))
+
+; Assume uppercase names are enum constructors
+((identifier) @constructor
+ (#match? @constructor "^[A-Z]"))
+
+; Assume that uppercase names in paths are types
+((scoped_identifier
+  path: (identifier) @type)
+ (#match? @type "^[A-Z]"))
+((scoped_identifier
+  path: (scoped_identifier
+    name: (identifier) @type))
+ (#match? @type "^[A-Z]"))
+((scoped_type_identifier
+  path: (identifier) @type)
+ (#match? @type "^[A-Z]"))
+((scoped_type_identifier
+  path: (scoped_identifier
+    name: (identifier) @type))
+ (#match? @type "^[A-Z]"))
+
+; Assume all qualified names in struct patterns are enum constructors. (They're
+; either that, or struct names; highlighting both as constructors seems to be
+; the less glaring choice of error, visually.)
+(struct_pattern
+  type: (scoped_type_identifier
+    name: (type_identifier) @constructor))
+
+; Function calls
+
+(call_expression
+  function: (identifier) @function)
+(call_expression
+  function: (field_expression
+    field: (field_identifier) @function.method))
+(call_expression
+  function: (scoped_identifier
+    "::"
+    name: (identifier) @function))
+
+(generic_function
+  function: (identifier) @function)
+(generic_function
+  function: (scoped_identifier
+    name: (identifier) @function))
+(generic_function
+  function: (field_expression
+    field: (field_identifier) @function.method))
+
+(macro_invocation
+  macro: (identifier) @function.macro
+  "!" @function.macro)
+
+; Function definitions
+
+(function_item (identifier) @function)
+(function_signature_item (identifier) @function)
+
+(line_comment) @comment
+(block_comment) @comment
+
+(line_comment (doc_comment)) @comment.documentation
+(block_comment (doc_comment)) @comment.documentation
+
+"(" @punctuation.bracket
+")" @punctuation.bracket
+"[" @punctuation.bracket
+"]" @punctuation.bracket
+"{" @punctuation.bracket
+"}" @punctuation.bracket
+
+(type_arguments
+  "<" @punctuation.bracket
+  ">" @punctuation.bracket)
+(type_parameters
+  "<" @punctuation.bracket
+  ">" @punctuation.bracket)
+
+"::" @punctuation.delimiter
+":" @punctuation.delimiter
+"." @punctuation.delimiter
+"," @punctuation.delimiter
+";" @punctuation.delimiter
+
+(parameter (identifier) @variable.parameter)
+
+(lifetime (identifier) @label)
+
+"as" @keyword
+"async" @keyword
+"await" @keyword
+"break" @keyword
+"const" @keyword
+"continue" @keyword
+"default" @keyword
+"dyn" @keyword
+"else" @keyword
+"enum" @keyword
+"extern" @keyword
+"fn" @keyword
+"for" @keyword
+"gen" @keyword
+"if" @keyword
+"impl" @keyword
+"in" @keyword
+"let" @keyword
+"loop" @keyword
+"macro_rules!" @keyword
+"match" @keyword
+"mod" @keyword
+"move" @keyword
+"pub" @keyword
+"raw" @keyword
+"ref" @keyword
+"return" @keyword
+"static" @keyword
+"struct" @keyword
+"trait" @keyword
+"type" @keyword
+"union" @keyword
+"unsafe" @keyword
+"use" @keyword
+"where" @keyword
+"while" @keyword
+"yield" @keyword
+(crate) @keyword
+(mutable_specifier) @keyword
+(use_list (self) @keyword)
+(scoped_use_list (self) @keyword)
+(scoped_identifier (self) @keyword)
+(super) @keyword
+
+(self) @variable.builtin
+
+(char_literal) @string
+(string_literal) @string
+(raw_string_literal) @string
+
+(boolean_literal) @constant.builtin
+(integer_literal) @constant.builtin
+(float_literal) @constant.builtin
+
+(escape_sequence) @escape
+
+(attribute_item) @attribute
+(inner_attribute_item) @attribute
+
+"*" @operator
+"&" @operator
+"'" @operator
--- a/lldb/source/Plugins/Highlighter/TreeSitter/Rust/tree-sitter-rust/scanner.c
+++ b/lldb/source/Plugins/Highlighter/TreeSitter/Rust/tree-sitter-rust/scanner.c
@@ -0,0 +1,393 @@
+#include "tree_sitter/alloc.h"
+#include "tree_sitter/parser.h"
+
+#include <wctype.h>
+
+enum TokenType {
+    STRING_CONTENT,
+    RAW_STRING_LITERAL_START,
+    RAW_STRING_LITERAL_CONTENT,
+    RAW_STRING_LITERAL_END,
+    FLOAT_LITERAL,
+    BLOCK_OUTER_DOC_MARKER,
+    BLOCK_INNER_DOC_MARKER,
+    BLOCK_COMMENT_CONTENT,
+    LINE_DOC_CONTENT,
+    ERROR_SENTINEL
+};
+
+typedef struct {
+    uint8_t opening_hash_count;
+} Scanner;
+
+void *tree_sitter_rust_external_scanner_create() { return ts_calloc(1, sizeof(Scanner)); }
+
+void tree_sitter_rust_external_scanner_destroy(void *payload) { ts_free((Scanner *)payload); }
+
+unsigned tree_sitter_rust_external_scanner_serialize(void *payload, char *buffer) {
+    Scanner *scanner = (Scanner *)payload;
+    buffer[0] = (char)scanner->opening_hash_count;
+    return 1;
+}
+
+void tree_sitter_rust_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {
+    Scanner *scanner = (Scanner *)payload;
+    scanner->opening_hash_count = 0;
+    if (length == 1) {
+        Scanner *scanner = (Scanner *)payload;
+        scanner->opening_hash_count = buffer[0];
+    }
+}
+
+static inline bool is_num_char(int32_t c) { return c == '_' || iswdigit(c); }
+
+static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
+
+static inline void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
+
+static inline bool process_string(TSLexer *lexer) {
+    bool has_content = false;
+    for (;;) {
+        if (lexer->lookahead == '\"' || lexer->lookahead == '\\') {
+            break;
+        }
+        if (lexer->eof(lexer)) {
+            return false;
+        }
+        has_content = true;
+        advance(lexer);
+    }
+    lexer->result_symbol = STRING_CONTENT;
+    lexer->mark_end(lexer);
+    return has_content;
+}
+
+static inline bool scan_raw_string_start(Scanner *scanner, TSLexer *lexer) {
+    if (lexer->lookahead == 'b' || lexer->lookahead == 'c') {
+        advance(lexer);
+    }
+    if (lexer->lookahead != 'r') {
+        return false;
+    }
+    advance(lexer);
+
+    uint8_t opening_hash_count = 0;
+    while (lexer->lookahead == '#') {
+        advance(lexer);
+        opening_hash_count++;
+    }
+
+    if (lexer->lookahead != '"') {
+        return false;
+    }
+    advance(lexer);
+    scanner->opening_hash_count = opening_hash_count;
+
+    lexer->result_symbol = RAW_STRING_LITERAL_START;
+    return true;
+}
+
+static inline bool scan_raw_string_content(Scanner *scanner, TSLexer *lexer) {
+    for (;;) {
+        if (lexer->eof(lexer)) {
+            return false;
+        }
+        if (lexer->lookahead == '"') {
+            lexer->mark_end(lexer);
+            advance(lexer);
+            unsigned hash_count = 0;
+            while (lexer->lookahead == '#' && hash_count < scanner->opening_hash_count) {
+                advance(lexer);
+                hash_count++;
+            }
+            if (hash_count == scanner->opening_hash_count) {
+                lexer->result_symbol = RAW_STRING_LITERAL_CONTENT;
+                return true;
+            }
+        } else {
+            advance(lexer);
+        }
+    }
+}
+
+static inline bool scan_raw_string_end(Scanner *scanner, TSLexer *lexer) {
+    advance(lexer);
+    for (unsigned i = 0; i < scanner->opening_hash_count; i++) {
+        advance(lexer);
+    }
+    lexer->result_symbol = RAW_STRING_LITERAL_END;
+    return true;
+}
+
+static inline bool process_float_literal(TSLexer *lexer) {
+    lexer->result_symbol = FLOAT_LITERAL;
+
+    advance(lexer);
+    while (is_num_char(lexer->lookahead)) {
+        advance(lexer);
+    }
+
+    bool has_fraction = false, has_exponent = false;
+
+    if (lexer->lookahead == '.') {
+        has_fraction = true;
+        advance(lexer);
+        if (iswalpha(lexer->lookahead)) {
+            // The dot is followed by a letter: 1.max(2) => not a float but an integer
+            return false;
+        }
+
+        if (lexer->lookahead == '.') {
+            return false;
+        }
+        while (is_num_char(lexer->lookahead)) {
+            advance(lexer);
+        }
+    }
+
+    lexer->mark_end(lexer);
+
+    if (lexer->lookahead == 'e' || lexer->lookahead == 'E') {
+        has_exponent = true;
+        advance(lexer);
+        if (lexer->lookahead == '+' || lexer->lookahead == '-') {
+            advance(lexer);
+        }
+        if (!is_num_char(lexer->lookahead)) {
+            return true;
+        }
+        advance(lexer);
+        while (is_num_char(lexer->lookahead)) {
+            advance(lexer);
+        }
+
+        lexer->mark_end(lexer);
+    }
+
+    if (!has_exponent && !has_fraction) {
+        return false;
+    }
+
+    if (lexer->lookahead != 'u' && lexer->lookahead != 'i' && lexer->lookahead != 'f') {
+        return true;
+    }
+    advance(lexer);
+    if (!iswdigit(lexer->lookahead)) {
+        return true;
+    }
+
+    while (iswdigit(lexer->lookahead)) {
+        advance(lexer);
+    }
+
+    lexer->mark_end(lexer);
+    return true;
+}
+
+static inline bool process_line_doc_content(TSLexer *lexer) {
+    lexer->result_symbol = LINE_DOC_CONTENT;
+    for (;;) {
+        if (lexer->eof(lexer)) {
+            return true;
+        }
+        if (lexer->lookahead == '\n') {
+            // Include the newline in the doc content node.
+            // Line endings are useful for markdown injection.
+            advance(lexer);
+            return true;
+        }
+        advance(lexer);
+    }
+}
+
+typedef enum {
+    LeftForwardSlash,
+    LeftAsterisk,
+    Continuing,
+} BlockCommentState;
+
+typedef struct {
+    BlockCommentState state;
+    unsigned nestingDepth;
+} BlockCommentProcessing;
+
+static inline void process_left_forward_slash(BlockCommentProcessing *processing, char current) {
+    if (current == '*') {
+        processing->nestingDepth += 1;
+    }
+    processing->state = Continuing;
+};
+
+static inline void process_left_asterisk(BlockCommentProcessing *processing, char current, TSLexer *lexer) {
+    if (current == '*') {
+        lexer->mark_end(lexer);
+        processing->state = LeftAsterisk;
+        return;
+    }
+
+    if (current == '/') {
+        processing->nestingDepth -= 1;
+    }
+
+    processing->state = Continuing;
+}
+
+static inline void process_continuing(BlockCommentProcessing *processing, char current) {
+    switch (current) {
+        case '/':
+            processing->state = LeftForwardSlash;
+            break;
+        case '*':
+            processing->state = LeftAsterisk;
+            break;
+    }
+}
+
+static inline bool process_block_comment(TSLexer *lexer, const bool *valid_symbols) {
+    char first = (char)lexer->lookahead;
+    // The first character is stored so we can safely advance inside
+    // these if blocks. However, because we only store one, we can only
+    // safely advance 1 time. Since there's a chance that an advance could
+    // happen in one state, we must advance in all states to ensure that
+    // the program ends up in a sane state prior to processing the block
+    // comment if need be.
+    if (valid_symbols[BLOCK_INNER_DOC_MARKER] && first == '!') {
+        lexer->result_symbol = BLOCK_INNER_DOC_MARKER;
+        advance(lexer);
+        return true;
+    }
+    if (valid_symbols[BLOCK_OUTER_DOC_MARKER] && first == '*') {
+        advance(lexer);
+        lexer->mark_end(lexer);
+        // If the next token is a / that means that it's an empty block comment.
+        if (lexer->lookahead == '/') {
+            return false;
+        }
+        // If the next token is a * that means that this isn't a BLOCK_OUTER_DOC_MARKER
+        // as BLOCK_OUTER_DOC_MARKER's only have 2 * not 3 or more.
+        if (lexer->lookahead != '*') {
+            lexer->result_symbol = BLOCK_OUTER_DOC_MARKER;
+            return true;
+        }
+    } else {
+        advance(lexer);
+    }
+
+    if (valid_symbols[BLOCK_COMMENT_CONTENT]) {
+        BlockCommentProcessing processing = {Continuing, 1};
+        // Manually set the current state based on the first character
+        switch (first) {
+            case '*':
+                processing.state = LeftAsterisk;
+                if (lexer->lookahead == '/') {
+                    // This case can happen in an empty doc block comment
+                    // like /*!*/. The comment has no contents, so bail.
+                    return false;
+                }
+                break;
+            case '/':
+                processing.state = LeftForwardSlash;
+                break;
+            default:
+                processing.state = Continuing;
+                break;
+        }
+
+        // For the purposes of actually parsing rust code, this
+        // is incorrect as it considers an unterminated block comment
+        // to be an error. However, for the purposes of syntax highlighting
+        // this should be considered successful as otherwise you are not able
+        // to syntax highlight a block of code prior to closing the
+        // block comment
+        while (!lexer->eof(lexer) && processing.nestingDepth != 0) {
+            // Set first to the current lookahead as that is the second character
+            // as we force an advance in the above code when we are checking if we
+            // need to handle a block comment inner or outer doc comment signifier
+            // node
+            first = (char)lexer->lookahead;
+            switch (processing.state) {
+                case LeftForwardSlash:
+                    process_left_forward_slash(&processing, first);
+                    break;
+                case LeftAsterisk:
+                    process_left_asterisk(&processing, first, lexer);
+                    break;
+                case Continuing:
+                    lexer->mark_end(lexer);
+                    process_continuing(&processing, first);
+                    break;
+                default:
+                    break;
+            }
+            advance(lexer);
+            if (first == '/' && processing.nestingDepth != 0) {
+                lexer->mark_end(lexer);
+            }
+        }
+        lexer->result_symbol = BLOCK_COMMENT_CONTENT;
+        return true;
+    }
+
+    return false;
+}
+
+bool tree_sitter_rust_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
+    // The documentation states that if the lexical analysis fails for some reason
+    // they will mark every state as valid and pass it to the external scanner
+    // However, we can't do anything to help them recover in that case so we
+    // should just fail.
+    /*
+      link: https://tree-sitter.github.io/tree-sitter/creating-parsers#external-scanners
+      If a syntax error is encountered during regular parsing, Tree-sitter’s
+      first action during error recovery will be to call the external scanner’s
+      scan function with all tokens marked valid. The scanner should detect this
+      case and handle it appropriately. One simple method of detection is to add
+      an unused token to the end of the externals array, for example
+
+      externals: $ => [$.token1, $.token2, $.error_sentinel],
+
+      then check whether that token is marked valid to determine whether
+      Tree-sitter is in error correction mode.
+    */
+    if (valid_symbols[ERROR_SENTINEL]) {
+        return false;
+    }
+
+    Scanner *scanner = (Scanner *)payload;
+
+    if (valid_symbols[BLOCK_COMMENT_CONTENT] || valid_symbols[BLOCK_INNER_DOC_MARKER] ||
+        valid_symbols[BLOCK_OUTER_DOC_MARKER]) {
+        return process_block_comment(lexer, valid_symbols);
+    }
+
+    if (valid_symbols[STRING_CONTENT] && !valid_symbols[FLOAT_LITERAL]) {
+        return process_string(lexer);
+    }
+
+    if (valid_symbols[LINE_DOC_CONTENT]) {
+        return process_line_doc_content(lexer);
+    }
+
+    while (iswspace(lexer->lookahead)) {
+        skip(lexer);
+    }
+
+    if (valid_symbols[RAW_STRING_LITERAL_START] &&
+        (lexer->lookahead == 'r' || lexer->lookahead == 'b' || lexer->lookahead == 'c')) {
+        return scan_raw_string_start(scanner, lexer);
+    }
+
+    if (valid_symbols[RAW_STRING_LITERAL_CONTENT]) {
+        return scan_raw_string_content(scanner, lexer);
+    }
+
+    if (valid_symbols[RAW_STRING_LITERAL_END] && lexer->lookahead == '"') {
+        return scan_raw_string_end(scanner, lexer);
+    }
+
+    if (valid_symbols[FLOAT_LITERAL] && iswdigit(lexer->lookahead)) {
+        return process_float_literal(lexer);
+    }
+
+    return false;
+}
--- a/lldb/source/Plugins/Highlighter/TreeSitter/Rust/tree-sitter-rust/tree-sitter.json
+++ b/lldb/source/Plugins/Highlighter/TreeSitter/Rust/tree-sitter-rust/tree-sitter.json
@@ -0,0 +1,53 @@
+{
+  "grammars": [
+    {
+      "name": "rust",
+      "camelcase": "Rust",
+      "scope": "source.rust",
+      "path": ".",
+      "file-types": [
+        "rs"
+      ],
+      "highlights": [
+        "queries/highlights.scm"
+      ],
+      "injections": [
+        "queries/injections.scm"
+      ],
+      "tags": [
+        "queries/tags.scm"
+      ],
+      "injection-regex": "rust"
+    }
+  ],
+  "metadata": {
+    "version": "0.24.0",
+    "license": "MIT",
+    "description": "Rust grammar for tree-sitter",
+    "authors": [
+      {
+        "name": "Maxim Sokolov",
+        "email": "maxim0xff@gmail.com"
+      },
+      {
+        "name": "Max Brunsfeld",
+        "email": "maxbrunsfeld@gmail.com"
+      },
+      {
+        "name": "Amaan Qureshi",
+        "email": "amaanq12@gmail.com"
+      }
+    ],
+    "links": {
+      "repository": "https://github.com/tree-sitter/tree-sitter-rust"
+    }
+  },
+  "bindings": {
+    "c": true,
+    "go": true,
+    "node": true,
+    "python": true,
+    "rust": true,
+    "swift": true
+  }
+}
--- a/lldb/unittests/Highlighter/CMakeLists.txt
+++ b/lldb/unittests/Highlighter/CMakeLists.txt
@@ -1,5 +1,8 @@
 if (LLDB_ENABLE_TREESITTER)
-  set(SWIFT_HIGHLIGHTER_PLUGIN lldbPluginHighlighterTreeSitterSwift)
+  set(TREESITTER_HIGHLIGHTER_PLUGINS
+    lldbPluginHighlighterTreeSitterRust
+    lldbPluginHighlighterTreeSitterSwift
+  )
 endif()

 add_lldb_unittest(HighlighterTests
@@ -11,5 +14,5 @@ add_lldb_unittest(HighlighterTests
    lldbPluginCPlusPlusLanguage
    lldbPluginObjCLanguage
    lldbPluginObjCPlusPlusLanguage
-    ${SWIFT_HIGHLIGHTER_PLUGIN}
+    ${TREESITTER_HIGHLIGHTER_PLUGINS}
 )
--- a/lldb/unittests/Highlighter/HighlighterTest.cpp
+++ b/lldb/unittests/Highlighter/HighlighterTest.cpp
@@ -14,9 +14,11 @@
 #include "Plugins/Language/ObjC/ObjCLanguage.h"
 #include "Plugins/Language/ObjCPlusPlus/ObjCPlusPlusLanguage.h"
 #include "lldb/Core/Highlighter.h"
+#include "lldb/Host/Config.h"
 #include "lldb/Host/FileSystem.h"

 #if LLDB_ENABLE_TREESITTER
+#include "Plugins/Highlighter/TreeSitter/Rust/RustTreeSitterHighlighter.h"
 #include "Plugins/Highlighter/TreeSitter/Swift/SwiftTreeSitterHighlighter.h"
 #endif

@@ -31,7 +33,7 @@ class HighlighterTest : public testing::Test {
  // filename.
  SubsystemRAII<FileSystem, ClangHighlighter,
 #if LLDB_ENABLE_TREESITTER
-                SwiftTreeSitterHighlighter,
+                SwiftTreeSitterHighlighter, RustTreeSitterHighlighter,
 #endif
                DefaultHighlighter, CPlusPlusLanguage, ObjCLanguage,
                ObjCPlusPlusLanguage>
@@ -59,6 +61,7 @@ TEST_F(HighlighterTest, HighlighterSelectionType) {

 #if LLDB_ENABLE_TREESITTER
  EXPECT_EQ(getName(lldb::eLanguageTypeSwift), "tree-sitter-swift");
+  EXPECT_EQ(getName(lldb::eLanguageTypeRust), "tree-sitter-rust");
 #endif

  EXPECT_EQ(getName(lldb::eLanguageTypeUnknown), "none");
@@ -440,4 +443,126 @@ TEST_F(HighlighterTest, SwiftClosures) {
  EXPECT_EQ(" <k>let</k> closure = { (x: <k>Int</k>) in return x * 2 }",
            highlightSwift(" let closure = { (x: Int) in return x * 2 }", s));
 }
+
+static std::string
+highlightRust(llvm::StringRef code, HighlightStyle style,
+              std::optional<size_t> cursor = std::optional<size_t>()) {
+  HighlighterManager mgr;
+  const Highlighter &h =
+      mgr.getHighlighterFor(lldb::eLanguageTypeRust, "main.rs");
+  return h.Highlight(style, code, cursor);
+}
+
+TEST_F(HighlighterTest, RustComments) {
+  HighlightStyle s;
+  s.comment.Set("<cc>", "</cc>");
+
+  EXPECT_EQ(" <cc>// I'm feeling lucky today</cc>",
+            highlightRust(" // I'm feeling lucky today", s));
+  EXPECT_EQ(" <cc>/* This is a\nmultiline comment */</cc>",
+            highlightRust(" /* This is a\nmultiline comment */", s));
+  EXPECT_EQ(" <cc>/* nested /* comment */ works */</cc>",
+            highlightRust(" /* nested /* comment */ works */", s));
+  EXPECT_EQ(" <cc>/// Documentation comment</cc>",
+            highlightRust(" /// Documentation comment", s));
+  EXPECT_EQ(" <cc>//! Inner doc comment</cc>",
+            highlightRust(" //! Inner doc comment", s));
+}
+
+TEST_F(HighlighterTest, RustKeywords) {
+  HighlightStyle s;
+  s.keyword.Set("<k>", "</k>");
+
+  EXPECT_EQ(" <k>let</k> x = 5;", highlightRust(" let x = 5;", s));
+  EXPECT_EQ(" <k>let</k> <k>mut</k> y = 10;",
+            highlightRust(" let mut y = 10;", s));
+  EXPECT_EQ(" <k>fn</k> foo() { <k>return</k> 42; }",
+            highlightRust(" fn foo() { return 42; }", s));
+  EXPECT_EQ(" <k>struct</k> <k>Point</k> {}",
+            highlightRust(" struct Point {}", s));
+  EXPECT_EQ(" <k>enum</k> <k>Color</k> {}", highlightRust(" enum Color {}", s));
+  EXPECT_EQ(" <k>impl</k> <k>MyStruct</k> {}",
+            highlightRust(" impl MyStruct {}", s));
+  EXPECT_EQ(" <k>trait</k> <k>MyTrait</k> {}",
+            highlightRust(" trait MyTrait {}", s));
+  EXPECT_EQ(" <k>if</k> x { }", highlightRust(" if x { }", s));
+  EXPECT_EQ(" <k>for</k> i <k>in</k> 0..10 { }",
+            highlightRust(" for i in 0..10 { }", s));
+  EXPECT_EQ(" <k>while</k> x { }", highlightRust(" while x { }", s));
+  EXPECT_EQ(" <k>match</k> x { _ => {} }",
+            highlightRust(" match x { _ => {} }", s));
+  EXPECT_EQ(" <k>pub</k> <k>fn</k> foo() {}",
+            highlightRust(" pub fn foo() {}", s));
+  EXPECT_EQ(" <k>const</k> MAX: u32 = 100;",
+            highlightRust(" const MAX: u32 = 100;", s));
+  EXPECT_EQ(" <k>static</k> GLOBAL: i32 = 0;",
+            highlightRust(" static GLOBAL: i32 = 0;", s));
+  EXPECT_EQ(" <k>if</k> <k>let</k> Some(foo) = foo_maybe {",
+            highlightRust(" if let Some(foo) = foo_maybe {", s, 0));
+}
+
+TEST_F(HighlighterTest, RustStringLiterals) {
+  HighlightStyle s;
+  s.string_literal.Set("<str>", "</str>");
+
+  EXPECT_EQ(" let s = <str>\"Hello, World!\"</str>;",
+            highlightRust(" let s = \"Hello, World!\";", s));
+  EXPECT_EQ(" let raw = <str>r\"C:\\\\path\"</str>;",
+            highlightRust(" let raw = r\"C:\\\\path\";", s));
+  EXPECT_EQ(" let raw2 = <str>r#\"He said \"hi\"\"#</str>;",
+            highlightRust(" let raw2 = r#\"He said \"hi\"\"#;", s));
+  EXPECT_EQ(" let byte_str = <str>b\"bytes\"</str>;",
+            highlightRust(" let byte_str = b\"bytes\";", s));
+}
+
+TEST_F(HighlighterTest, RustScalarLiterals) {
+  HighlightStyle s;
+  s.scalar_literal.Set("<scalar>", "</scalar>");
+
+  EXPECT_EQ(" let i = 42;", highlightRust(" let i = 42;", s));
+  EXPECT_EQ(" let hex = 0xFF;", highlightRust(" let hex = 0xFF;", s));
+  EXPECT_EQ(" let bin = 0b1010;", highlightRust(" let bin = 0b1010;", s));
+  EXPECT_EQ(" let oct = 0o77;", highlightRust(" let oct = 0o77;", s));
+  EXPECT_EQ(" let f = 3.14;", highlightRust(" let f = 3.14;", s));
+  EXPECT_EQ(" let typed = 42u32;", highlightRust(" let typed = 42u32;", s));
+  EXPECT_EQ(" let c = 'x';", highlightRust(" let c = 'x';", s));
+}
+
+TEST_F(HighlighterTest, RustIdentifiers) {
+  HighlightStyle s;
+  s.identifier.Set("<id>", "</id>");
+
+  EXPECT_EQ(" let foo = <id>bar</id>();",
+            highlightRust(" let foo = bar();", s));
+  EXPECT_EQ(" my_variable = 10;", highlightRust(" my_variable = 10;", s));
+  EXPECT_EQ(" let x: i32 = 5", highlightRust(" let x: i32 = 5", s));
+  EXPECT_EQ(" fn <id>foo</id>() -> String { }",
+            highlightRust(" fn foo() -> String { }", s));
+  EXPECT_EQ(" fn <id>foo</id><'a>(x: &'a str) {}",
+            highlightRust(" fn foo<'a>(x: &'a str) {}", s));
+  EXPECT_EQ(" struct Foo<'a> { x: &'a i32 }",
+            highlightRust(" struct Foo<'a> { x: &'a i32 }", s));
+}
+
+TEST_F(HighlighterTest, RustOperators) {
+  HighlightStyle s;
+  s.operators.Set("[", "]");
+
+  EXPECT_EQ(" 1+2-3[*]4/5", highlightRust(" 1+2-3*4/5", s));
+  EXPECT_EQ(" x && y || z", highlightRust(" x && y || z", s));
+  EXPECT_EQ(" a == b != c", highlightRust(" a == b != c", s));
+  EXPECT_EQ(" x [&]y", highlightRust(" x &y", s));
+  EXPECT_EQ(" [*]ptr", highlightRust(" *ptr", s));
+}
+
+TEST_F(HighlighterTest, RustCursorPosition) {
+  HighlightStyle s;
+  s.selected.Set("<c>", "</c>");
+
+  EXPECT_EQ("<c> </c>let x = 5;", highlightRust(" let x = 5;", s, 0));
+  EXPECT_EQ(" <c>l</c>et x = 5;", highlightRust(" let x = 5;", s, 1));
+  EXPECT_EQ(" l<c>e</c>t x = 5;", highlightRust(" let x = 5;", s, 2));
+  EXPECT_EQ(" le<c>t</c> x = 5;", highlightRust(" let x = 5;", s, 3));
+  EXPECT_EQ(" let<c> </c>x = 5;", highlightRust(" let x = 5;", s, 4));
+}
 #endif