llvm-project/clang-tools-extra/clang-tidy/add_new_check.py

#!/usr/bin/env python3
#
# ===-----------------------------------------------------------------------===#
#
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# ===-----------------------------------------------------------------------===#

import argparse
import itertools
import os
import re
import sys
import textwrap
from operator import methodcaller
from typing import Optional, Tuple, Match


# Adapts the module's CMakelist file. Returns 'True' if it could add a new
# entry and 'False' if the entry already existed.
def adapt_cmake(module_path: str, check_name_camel: str) -> bool:
    filename = os.path.join(module_path, "CMakeLists.txt")

    # The documentation files are encoded using UTF-8, however on Windows the
    # default encoding might be different (e.g. CP-1252). To make sure UTF-8 is
    # always used, use `open(filename, mode, encoding='utf8')` for reading and
    # writing files here and elsewhere.
    with open(filename, "r", encoding="utf8") as f:
        lines = f.readlines()

    cpp_file = f"{check_name_camel}.cpp"

    # Figure out whether this check already exists.
    for line in lines:
        if line.strip() == cpp_file:
            return False

    print(f"Updating {filename}...")
    with open(filename, "w", encoding="utf8", newline="\n") as f:
        cpp_found = False
        file_added = False
        for line in lines:
            cpp_line = line.strip().endswith(".cpp")
            if (not file_added) and (cpp_line or cpp_found):
                cpp_found = True
                if (line.strip() > cpp_file) or (not cpp_line):
                    f.write(f"  {cpp_file}\n")
                    file_added = True
            f.write(line)

    return True


# Adds a header for the new check.
def write_header(
    module_path: str,
    module: str,
    namespace: str,
    check_name: str,
    check_name_camel: str,
    description: str,
    lang_restrict: str,
) -> None:
    wrapped_desc = "\n".join(
        textwrap.wrap(
            description, width=80, initial_indent="/// ", subsequent_indent="/// "
        )
    )
    if lang_restrict:
        override_supported = """
  bool isLanguageVersionSupported(const LangOptions &LangOpts) const override {
    return %s;
  }""" % (
            lang_restrict % {"lang": "LangOpts"}
        )
    else:
        override_supported = ""
    filename = f"{os.path.join(module_path, check_name_camel)}.h"
    print(f"Creating {filename}...")
    with open(filename, "w", encoding="utf8", newline="\n") as f:
        header_guard = (
            f"LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_{module.upper()}_"
            f"{check_name_camel.upper()}_H"
        )
        f.write(
            """\
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef %(header_guard)s
#define %(header_guard)s

#include "../ClangTidyCheck.h"

namespace clang::tidy::%(namespace)s {

%(description)s
///
/// For the user-facing documentation see:
/// https://clang.llvm.org/extra/clang-tidy/checks/%(module)s/%(check_name)s.html
class %(check_name_camel)s : public ClangTidyCheck {
public:
  %(check_name_camel)s(StringRef Name, ClangTidyContext *Context)
      : ClangTidyCheck(Name, Context) {}
  void registerMatchers(ast_matchers::MatchFinder *Finder) override;
  void check(const ast_matchers::MatchFinder::MatchResult &Result) override;%(override_supported)s
};

} // namespace clang::tidy::%(namespace)s

#endif // %(header_guard)s
"""
            % {
                "header_guard": header_guard,
                "check_name_camel": check_name_camel,
                "check_name": check_name,
                "module": module,
                "namespace": namespace,
                "description": wrapped_desc,
                "override_supported": override_supported,
            }
        )


# Adds the implementation of the new check.
def write_implementation(
    module_path: str, module: str, namespace: str, check_name_camel: str
) -> None:
    filename = f"{os.path.join(module_path, check_name_camel)}.cpp"
    print(f"Creating {filename}...")
    with open(filename, "w", encoding="utf8", newline="\n") as f:
        f.write(
            """\
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "%(check_name)s.h"
#include "clang/ASTMatchers/ASTMatchFinder.h"

using namespace clang::ast_matchers;

namespace clang::tidy::%(namespace)s {

void %(check_name)s::registerMatchers(MatchFinder *Finder) {
  // FIXME: Add matchers.
  Finder->addMatcher(functionDecl().bind("x"), this);
}

void %(check_name)s::check(const MatchFinder::MatchResult &Result) {
  // FIXME: Add callback implementation.
  const auto *MatchedDecl = Result.Nodes.getNodeAs<FunctionDecl>("x");
  if (!MatchedDecl->getIdentifier() || MatchedDecl->getName().starts_with("awesome_"))
    return;
  diag(MatchedDecl->getLocation(), "function %%0 is insufficiently awesome")
      << MatchedDecl
      << FixItHint::CreateInsertion(MatchedDecl->getLocation(), "awesome_");
  diag(MatchedDecl->getLocation(), "insert 'awesome'", DiagnosticIDs::Note);
}

} // namespace clang::tidy::%(namespace)s
"""
            % {"check_name": check_name_camel, "namespace": namespace}
        )


# Returns the source filename that implements the module.
def get_module_filename(module_path: str, module: str) -> str:
    modulecpp = list(
        filter(
            lambda p: p.lower() == f"{module.lower()}tidymodule.cpp",
            os.listdir(module_path),
        )
    )[0]
    return os.path.join(module_path, modulecpp)


# Modifies the module to include the new check.
def adapt_module(
    module_path: str, module: str, check_name: str, check_name_camel: str
) -> None:
    filename = get_module_filename(module_path, module)
    with open(filename, "r", encoding="utf8") as f:
        lines = f.readlines()

    print(f"Updating {filename}...")
    with open(filename, "w", encoding="utf8", newline="\n") as f:
        header_added = False
        header_found = False
        check_added = False
        check_fq_name = f"{module}-{check_name}"
        check_decl = (
            f"    CheckFactories.registerCheck<{check_name_camel}>(\n"
            f'        "{check_fq_name}");\n'
        )

        lines_iter = iter(lines)
        try:
            while True:
                line = next(lines_iter)
                if not header_added:
                    if match := re.search('#include "(.*)"', line):
                        header_found = True
                        if match.group(1) > check_name_camel:
                            header_added = True
                            f.write(f'#include "{check_name_camel}.h"\n')
                    elif header_found:
                        header_added = True
                        f.write(f'#include "{check_name_camel}.h"\n')

                if not check_added:
                    if line.strip() == "}":
                        check_added = True
                        f.write(check_decl)
                    else:
                        prev_line = None
                        if match := re.search(
                            r'registerCheck<(.*)> *\( *(?:"([^"]*)")?', line
                        ):
                            current_check_name = match.group(2)
                            if current_check_name is None:
                                # If we didn't find the check name on this line, look on the
                                # next one.
                                prev_line = line
                                line = next(lines_iter)
                                match = re.search(' *"([^"]*)"', line)
                                if match:
                                    current_check_name = match.group(1)
                            assert current_check_name
                            if current_check_name > check_fq_name:
                                check_added = True
                                f.write(check_decl)
                            if prev_line:
                                f.write(prev_line)
                f.write(line)
        except StopIteration:
            pass


# Adds a release notes entry.
def add_release_notes(
    module_path: str, module: str, check_name: str, description: str
) -> None:
    wrapped_desc = "\n".join(
        textwrap.wrap(
            description, width=80, initial_indent="  ", subsequent_indent="  "
        )
    )
    check_name_dashes = f"{module}-{check_name}"
    filename = os.path.normpath(
        os.path.join(module_path, "../../docs/ReleaseNotes.rst")
    )
    with open(filename, "r", encoding="utf8") as f:
        lines = f.readlines()

    lineMatcher = re.compile("New checks")
    nextSectionMatcher = re.compile("New check aliases")
    checkMatcher = re.compile("- New :doc:`(.*)")

    print(f"Updating {filename}...")
    with open(filename, "w", encoding="utf8", newline="\n") as f:
        note_added = False
        header_found = False
        add_note_here = False

        for line in lines:
            if not note_added:
                if match_check := checkMatcher.match(line):
                    last_check = match_check.group(1)
                    if last_check > check_name_dashes:
                        add_note_here = True

                if nextSectionMatcher.match(line):
                    add_note_here = True

                if lineMatcher.match(line):
                    header_found = True
                    f.write(line)
                    continue

                if line.startswith("^^^^"):
                    f.write(line)
                    continue

                if header_found and add_note_here:
                    if not line.startswith("^^^^"):
                        f.write(
                            f"""- New :doc:`{check_name_dashes}
  <clang-tidy/checks/{module}/{check_name}>` check.

{wrapped_desc}

"""
                        )
                        note_added = True

            f.write(line)


# Adds a test for the check.
def write_test(
    module_path: str,
    module: str,
    check_name: str,
    test_extension: str,
    test_standard: Optional[str],
) -> None:
    test_standard = f"-std={test_standard}-or-later " if test_standard else ""
    check_name_dashes = f"{module}-{check_name}"
    filename = os.path.normpath(
        os.path.join(
            module_path,
            "..",
            "..",
            "test",
            "clang-tidy",
            "checkers",
            module,
            f"{check_name}.{test_extension}",
        )
    )
    print(f"Creating {filename}...")
    with open(filename, "w", encoding="utf8", newline="\n") as f:
        f.write(
            """\
// RUN: %%check_clang_tidy %(standard)s%%s %(check_name_dashes)s %%t

// FIXME: Add something that triggers the check here.
void f();
// CHECK-MESSAGES: :[[@LINE-1]]:6: warning: function 'f' is insufficiently awesome [%(check_name_dashes)s]

// FIXME: Verify the applied fix.
//   * Make the CHECK patterns specific enough and try to make verified lines
//     unique to avoid incorrect matches.
//   * Use {{}} for regular expressions.
// CHECK-FIXES: {{^}}void awesome_f();{{$}}

// FIXME: Add something that doesn't trigger the check here.
void awesome_f2();
"""
            % {"check_name_dashes": check_name_dashes, "standard": test_standard}
        )


def get_actual_filename(dirname: str, filename: str) -> str:
    if not os.path.isdir(dirname):
        return ""
    name = os.path.join(dirname, filename)
    if os.path.isfile(name):
        return name
    caselessname = filename.lower()
    for file in os.listdir(dirname):
        if file.lower() == caselessname:
            return os.path.join(dirname, file)
    return ""


# Recreates the list of checks in the docs/clang-tidy/checks directory.
def update_checks_list(clang_tidy_path: str) -> None:
    docs_dir = os.path.join(clang_tidy_path, "../docs/clang-tidy/checks")
    filename = os.path.normpath(os.path.join(docs_dir, "list.rst"))
    # Read the content of the current list.rst file
    with open(filename, "r", encoding="utf8") as f:
        lines = f.readlines()
    # Get all existing docs
    doc_files = []
    for subdir in filter(
        lambda s: os.path.isdir(os.path.join(docs_dir, s)), os.listdir(docs_dir)
    ):
        for file in filter(
            methodcaller("endswith", ".rst"), os.listdir(os.path.join(docs_dir, subdir))
        ):
            doc_files.append((subdir, file))
    doc_files.sort()

    # We couldn't find the source file from the check name, so try to find the
    # class name that corresponds to the check in the module file.
    def filename_from_module(module_name: str, check_name: str) -> str:
        module_path = os.path.join(clang_tidy_path, module_name)
        if not os.path.isdir(module_path):
            return ""
        module_file = get_module_filename(module_path, module_name)
        if not os.path.isfile(module_file):
            return ""
        with open(module_file, "r") as f:
            code = f.read()
            full_check_name = f"{module_name}-{check_name}"
            if (name_pos := code.find(f'"{full_check_name}"')) == -1:
                return ""
            if (stmt_end_pos := code.find(";", name_pos)) == -1:
                return ""
            if (stmt_start_pos := code.rfind(";", 0, name_pos)) == -1 and (
                stmt_start_pos := code.rfind("{", 0, name_pos)
            ) == -1:
                return ""
            stmt = code[stmt_start_pos + 1 : stmt_end_pos]
            matches = re.search(r'registerCheck<([^>:]*)>\(\s*"([^"]*)"\s*\)', stmt)
            if matches and matches[2] == full_check_name:
                class_name = matches[1]
                if "::" in class_name:
                    parts = class_name.split("::")
                    class_name = parts[-1]
                    class_path = os.path.join(
                        clang_tidy_path, module_name, "..", *parts[0:-1]
                    )
                else:
                    class_path = os.path.join(clang_tidy_path, module_name)
                return get_actual_filename(class_path, f"{class_name}.cpp")

        return ""

    # Examine code looking for a c'tor definition to get the base class name.
    def get_base_class(code: str, check_file: str) -> str:
        check_class_name = os.path.splitext(os.path.basename(check_file))[0]
        ctor_pattern = rf"{check_class_name}\([^:]*\)\s*:\s*([A-Z][A-Za-z0-9]*Check)\("
        matches = re.search(rf"\s+{check_class_name}::{ctor_pattern}", code)

        # The constructor might be inline in the header.
        if not matches:
            header_file = f"{os.path.splitext(check_file)[0]}.h"
            if not os.path.isfile(header_file):
                return ""
            with open(header_file, encoding="utf8") as f:
                code = f.read()
            matches = re.search(rf" {ctor_pattern}", code)

        if matches and matches[1] != "ClangTidyCheck":
            return matches[1]
        return ""

    # Some simple heuristics to figure out if a check has an autofix or not.
    def has_fixits(code: str) -> bool:
        for needle in [
            "FixItHint",
            "ReplacementText",
            "fixit",
            "FixIt",
            "TransformerClangTidyCheck",
        ]:
            if needle in code:
                return True
        return False

    # Try to figure out of the check supports fixits.
    def has_auto_fix(check_name: str) -> str:
        dirname, _, check_name = check_name.partition("-")

        check_file = get_actual_filename(
            os.path.join(clang_tidy_path, dirname),
            f"{get_camel_check_name(check_name)}.cpp",
        )
        if not os.path.isfile(check_file):
            # Some older checks don't end with 'Check.cpp'
            check_file = get_actual_filename(
                os.path.join(clang_tidy_path, dirname),
                f"{get_camel_name(check_name)}.cpp",
            )
            if not os.path.isfile(check_file):
                # Some checks aren't in a file based on the check name.
                check_file = filename_from_module(dirname, check_name)
                if not (check_file and os.path.isfile(check_file)):
                    return ""

        with open(check_file, encoding="utf8") as f:
            code = f.read()
            if has_fixits(code):
                return ' "Yes"'

        if base_class := get_base_class(code, check_file):
            base_file = os.path.join(clang_tidy_path, dirname, f"{base_class}.cpp")
            if os.path.isfile(base_file):
                with open(base_file, encoding="utf8") as f:
                    code = f.read()
                    if has_fixits(code):
                        return ' "Yes"'

        return ""

    def detect_alias_target(check_name: str, content: str) -> Optional[str]:
        """Return the :doc: target for non-redirect alias pages.

        This recognizes pages that keep their own documentation content, but
        whose paragraph explicitly states that the current check is an
        alias of another check.
        """
        paragraphs = [
            re.sub(r"\s+", " ", paragraph.strip())
            for paragraph in re.split(r"\n\s*\n", content)
            if paragraph.strip()
        ]

        self_alias = re.compile(
            r"^This check is an alias(?: of check| for)\b",
            re.IGNORECASE,
        )
        named_alias = re.compile(
            rf"^The\s+`?{re.escape(check_name)}(?:\s+check)?`?"
            rf"(?:\s+check)?\s+is\s+an\s+alias,?\s+please\s+see\b",
            re.IGNORECASE,
        )

        for paragraph in paragraphs:
            if self_alias.search(paragraph) or named_alias.search(paragraph):
                if match := re.search(r":doc:`[^`<]+?<([^>]+)>`", paragraph):
                    return match.group(1)
                if match := re.search(r"`[^`<]+?<(.+?)\.html(?:#[^>]+)?>`_", paragraph):
                    return match.group(1)
        return None

    def process_doc(doc_file: Tuple[str, str]) -> Tuple[str, Optional[str]]:
        check_name = f"{doc_file[0]}-{doc_file[1].replace('.rst', '')}"

        with open(os.path.join(docs_dir, *doc_file), "r", encoding="utf8") as doc:
            content = doc.read()

            if match := re.search(".*:orphan:.*", content):
                # Orphan page, don't list it.
                return "", None

            return check_name, detect_alias_target(check_name, content)

    def format_link(doc_file: Tuple[str, str]) -> str:
        check_name, match = process_doc(doc_file)
        if not match and check_name and not check_name.startswith("clang-analyzer-"):
            return (
                f"   :doc:`{check_name} <{doc_file[0]}/{doc_file[1].replace('.rst', '')}>`,"
                f"{has_auto_fix(check_name)}\n"
            )
        else:
            return ""

    def format_link_alias(doc_file: Tuple[str, str]) -> str:
        check_name, match = process_doc(doc_file)
        is_clang_analyzer = check_name.startswith("clang-analyzer-")
        if not check_name or (not match and not is_clang_analyzer):
            return ""

        module = doc_file[0]
        check_file = doc_file[1].replace(".rst", "")
        if is_clang_analyzer:
            title = f"Clang Static Analyzer {check_file}"
            # Clang Static Analyzer aliases still need the external redirect
            # target so list.rst can link to the upstream analyzer docs.
            with open(os.path.join(docs_dir, *doc_file), "r", encoding="utf8") as doc:
                content = doc.read()
            redirect = re.search(
                r".*:http-equiv=refresh: \d+;URL=(.*).html(.*)", content
            )
            # Preserve the anchor in checkers.html from group 2.
            target = (
                "" if not redirect else f"{redirect.group(1)}.html{redirect.group(2)}"
            )
            autofix = ""
            ref_begin = ""
            ref_end = "_"
        else:
            # Match neighbour or current-directory doc targets.
            redirect_parts = re.search(r"^(?:\.\./([^/]+)/)?([^/]+)$", match)
            assert redirect_parts
            redirect_module = redirect_parts[1] or module
            title = f"{redirect_module}-{redirect_parts[2]}"
            target = f"{redirect_module}/{redirect_parts[2]}"
            autofix = has_auto_fix(title)
            ref_begin = ":doc:"
            ref_end = ""

        if target:
            # The checker is just a redirect.
            return (
                f"   :doc:`{check_name} <{module}/{check_file}>`, "
                f"{ref_begin}`{title} <{target}>`{ref_end},{autofix}\n"
            )

        # The checker is just a alias without redirect.
        return f"   :doc:`{check_name} <{module}/{check_file}>`, {title},{autofix}\n"

    print(f"Updating {filename}...")
    with open(filename, "w", encoding="utf8", newline="\n") as f:
        for line in lines:
            f.write(line)
            if line.strip() == ".. csv-table::":
                # We dump the checkers
                f.write('   :header: "Name", "Offers fixes"\n\n')
                f.writelines(map(format_link, doc_files))
                # and the aliases
                f.write("\nCheck aliases\n-------------\n\n")
                f.write(".. csv-table::\n")
                f.write('   :header: "Name", "Redirect", "Offers fixes"\n\n')
                f.writelines(map(format_link_alias, doc_files))
                break


# Adds a documentation for the check.
def write_docs(module_path: str, module: str, check_name: str) -> None:
    check_name_dashes = f"{module}-{check_name}"
    filename = os.path.normpath(
        os.path.join(
            module_path, "../../docs/clang-tidy/checks/", module, f"{check_name}.rst"
        )
    )
    print(f"Creating {filename}...")
    with open(filename, "w", encoding="utf8", newline="\n") as f:
        f.write(
            """.. title:: clang-tidy - %(check_name_dashes)s

%(check_name_dashes)s
%(underline)s

FIXME: Describe what patterns does the check detect and why. Give examples.
"""
            % {
                "check_name_dashes": check_name_dashes,
                "underline": "=" * len(check_name_dashes),
            }
        )


def get_camel_name(check_name: str) -> str:
    return "".join(map(lambda elem: elem.capitalize(), check_name.split("-")))


def get_camel_check_name(check_name: str) -> str:
    return f"{get_camel_name(check_name)}Check"


def main() -> None:
    language_to_extension = {
        "c": "c",
        "c++": "cpp",
        "objc": "m",
        "objc++": "mm",
    }
    cpp_language_to_requirements = {
        "c++98": "CPlusPlus",
        "c++11": "CPlusPlus11",
        "c++14": "CPlusPlus14",
        "c++17": "CPlusPlus17",
        "c++20": "CPlusPlus20",
        "c++23": "CPlusPlus23",
        "c++26": "CPlusPlus26",
    }
    c_language_to_requirements = {
        "c99": None,
        "c11": "C11",
        "c17": "C17",
        "c23": "C23",
        "c27": "C2Y",
    }
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--update-docs",
        action="store_true",
        help="just update the list of documentation files, then exit",
    )
    parser.add_argument(
        "--language",
        help="language to use for new check (defaults to c++)",
        choices=language_to_extension.keys(),
        default=None,
        metavar="LANG",
    )
    parser.add_argument(
        "--description",
        "-d",
        help="short description of what the check does",
        default="FIXME: Write a short description",
        type=str,
    )
    parser.add_argument(
        "--standard",
        help="Specify a specific version of the language",
        choices=list(
            itertools.chain(
                cpp_language_to_requirements.keys(), c_language_to_requirements.keys()
            )
        ),
        default=None,
    )
    parser.add_argument(
        "module",
        nargs="?",
        help="module directory under which to place the new tidy check (e.g., misc)",
    )
    parser.add_argument(
        "check", nargs="?", help="name of new tidy check to add (e.g. foo-do-the-stuff)"
    )
    args = parser.parse_args()

    if args.update_docs:
        update_checks_list(os.path.dirname(sys.argv[0]))
        return

    if not args.module or not args.check:
        print("Module and check must be specified.")
        parser.print_usage()
        return

    module = args.module
    check_name = args.check
    check_name_camel = get_camel_check_name(check_name)
    if check_name.startswith(module):
        print(
            f'Check name "{check_name}" must not start with the module "{module}". Exiting.'
        )
        return
    clang_tidy_path = os.path.dirname(sys.argv[0])
    module_path = os.path.join(clang_tidy_path, module)

    if not adapt_cmake(module_path, check_name_camel):
        return

    # Map module names to namespace names that don't conflict with widely used top-level namespaces.
    if module == "llvm":
        namespace = f"{module}_check"
    else:
        namespace = module

    description = args.description
    if not description.endswith("."):
        description += "."

    language = args.language

    if args.standard:
        if args.standard in cpp_language_to_requirements:
            if language and language != "c++":
                raise ValueError("C++ standard chosen when language is not C++")
            language = "c++"
        elif args.standard in c_language_to_requirements:
            if language and language != "c":
                raise ValueError("C standard chosen when language is not C")
            language = "c"

    if not language:
        language = "c++"

    language_restrict = None

    if language == "c":
        language_restrict = "!%(lang)s.CPlusPlus"
        if extra := c_language_to_requirements.get(args.standard, None):
            language_restrict += f" && %(lang)s.{extra}"
    elif language == "c++":
        language_restrict = (
            f"%(lang)s.{cpp_language_to_requirements.get(args.standard, 'CPlusPlus')}"
        )
    elif language in ["objc", "objc++"]:
        language_restrict = "%(lang)s.ObjC"
    else:
        raise ValueError(f"Unsupported language '{language}' was specified")

    write_header(
        module_path,
        module,
        namespace,
        check_name,
        check_name_camel,
        description,
        language_restrict,
    )
    write_implementation(module_path, module, namespace, check_name_camel)
    adapt_module(module_path, module, check_name, check_name_camel)
    add_release_notes(module_path, module, check_name, description)
    test_extension = language_to_extension[language]
    write_test(module_path, module, check_name, test_extension, args.standard)
    write_docs(module_path, module, check_name)
    update_checks_list(clang_tidy_path)
    print("Done. Now it's your turn!")


if __name__ == "__main__":
    main()