trezor-firmware/tools/pyright_tool.py

#!/usr/bin/env python3
"""
Wrapper around pyright type checking to allow for easy ignore of specific error messages.
Thanks to it the `# type: ignore` does not affect the whole line,
so other problems at the same line cannot be masked by it.

Features:
- ignores specific pyright errors based on substring or regex
- reports empty `# type: ignore`s (without ignore reason in `[]`)
- reports unused `# type: ignore`s (for example after pyright is updated)
- allows for ignoring some errors in the whole file - see `FILE_SPECIFIC_IGNORES` variable
- allows for error aliases - see `ALIASES` variable

Usage:
- there are multiple options how to ignore/silence a pyright error:
    1 - "# type: ignore [<error_substring>]"
        - put it as a comment to the line we want to ignore
        - "# type: ignore [<error1>;;<error2>;;...]" if there are more than one errors on that line
        - also regex patterns are valid substrings
    2 - "# pyright: off" / "# pyright: on"
        - all errors in block of code between these marks will be ignored
    3 - FILE_SPECIFIC_IGNORES
        - ignore specific rules (defined by pyright) or error substrings in the whole file
    4 - ALIASES
        - create an alias for a common error and use is with option 1 - "# type: ignore [<error_alias>]"

Running the script:
- see all script argument by calling `python pyright_tool.py --help`
- only directories with existing `pyrightconfig.json` can be tested - see `--dir` flag

Simplified program flow (as it happens in PyrightTool.run()):
- extract and validate pyright config data from pyrightconfig.json
- collect all the pyright errors by actually running the pyright itself
- extract type-ignore information for all the files pyright was analyzing
- loop through all the pyright errors and try to match them against all the type-ignore rules
- if there are some unmatched errors, report them and exit with nonzero value
- also report unused ignores and other inconsistencies
"""

from __future__ import annotations

import argparse
import json
import os
import re
import subprocess
import sys
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Final, TypedDict, TYPE_CHECKING

if TYPE_CHECKING:
    LineIgnores = list[LineIgnore]

    FileIgnores = dict[str, LineIgnores]
    FileSpecificIgnores = dict[str, list[FileSpecificIgnore]]

    PyrightOffIgnores = list[PyrightOffIgnore]
    FilePyrightOffIgnores = dict[str, PyrightOffIgnores]


class RangeDetail(TypedDict):
    line: int
    character: int


class Range(TypedDict):
    start: RangeDetail
    end: RangeDetail


class Error(TypedDict):
    file: str
    severity: str
    message: str
    range: Range
    rule: str


class Summary(TypedDict):
    filesAnalyzed: int
    errorCount: int
    warningCount: int
    informationCount: int
    timeInSec: float


class PyrightResults(TypedDict):
    version: str
    time: str
    generalDiagnostics: list[Error]
    summary: Summary


@dataclass
class IgnoreStatement:
    substring: str
    already_used: bool = False


@dataclass
class LineIgnore:
    line_no: int
    ignore_statements: list[IgnoreStatement]


@dataclass
class FileSpecificIgnore:
    rule: str = ""
    substring: str = ""
    already_used: bool = False

    def __post_init__(self) -> None:
        if self.rule and self.substring:
            raise ValueError("Only one of rule|substring should be set")


@dataclass
class PyrightOffIgnore:
    start_line: int
    end_line: int
    already_used: bool = False


parser = argparse.ArgumentParser()
parser.add_argument(
    "--dev", action="store_true", help="Creating the error file and not deleting it"
)
parser.add_argument(
    "--test",
    action="store_true",
    help="Reusing existing error file and not deleting it",
)
parser.add_argument("--log", action="store_true", help="Log details")
parser.add_argument(
    "--dir",
    help="Directory which to test, relative to the repository root. When empty, taking the directory of this file.",
    default="",
)
args = parser.parse_args()

if args.dev:
    should_generate_error_file = True
    should_delete_error_file = False
    print("Running in dev mode, creating the file and not deleting it")
elif args.test:
    should_generate_error_file = False
    should_delete_error_file = False
    print("Running in test mode, will reuse existing error file")
else:
    should_generate_error_file = True
    should_delete_error_file = True

SHOULD_GENERATE_ERROR_FILE = should_generate_error_file
SHOULD_DELETE_ERROR_FILE = should_delete_error_file
SHOULD_LOG = args.log

if args.dir:
    # Need to change the os directory to find all the files correctly
    # Repository root + the wanted directory.
    HERE = Path(__file__).resolve().parent.parent / args.dir
    if not HERE.is_dir():
        raise RuntimeError(f"Could not find directory {args.dir} under {HERE}")
    os.chdir(HERE)
else:
    # Directory of this file
    HERE = Path(__file__).resolve().parent

# TODO: move into a JSON or other config file
# Files need to have a relative location to the directory being tested
# Example (when checking `python` directory):
# "tools/helloworld.py": [
#     FileSpecificIgnore(rule="reportMissingParameterType"),
#     FileSpecificIgnore(substring="cannot be assigned to parameter"),
# ],
FILE_SPECIFIC_IGNORES: FileSpecificIgnores = {}


# Allowing for more readable ignore of common problems, with an easy-to-understand alias
ALIASES: dict[str, str] = {
    "awaitable-is-generator": 'Return type of generator function must be "Generator" or "Iterable"',
    "obscured-by-same-name": "is obscured by a declaration of the same name",
    "int-into-enum": 'Expression of type "int.*" cannot be assigned to return type ".*"',
}


class PyrightTool:
    ON_PATTERN: Final = "# pyright: on"
    OFF_PATTERN: Final = "# pyright: off"
    IGNORE_PATTERN: Final = "# type: ignore"
    IGNORE_DELIMITER: Final = ";;"

    original_pyright_results: PyrightResults
    all_files_to_check: set[str]
    all_pyright_ignores: FileIgnores
    pyright_off_ignores: FilePyrightOffIgnores
    real_errors: list[Error]
    unused_ignores: list[str]
    inconsistencies: list[str] = []

    def __init__(
        self,
        pyright_config_file: str | Path,
        *,
        file_specific_ignores: FileSpecificIgnores | None = None,
        aliases: dict[str, str] | None = None,
        error_file: str | Path = "temp_error_file.json",
        should_generate_error_file: bool = True,
        should_delete_error_file: bool = True,
        should_log: bool = False,
    ) -> None:
        self.pyright_config_file = pyright_config_file
        self.file_specific_ignores = file_specific_ignores or {}
        self.aliases = aliases or {}
        self.error_file = error_file
        self.should_generate_error_file = should_generate_error_file
        self.should_delete_error_file = should_delete_error_file
        self.should_log = should_log

        self.count_of_ignored_errors = 0

        self.check_input_correctness()

    def check_input_correctness(self) -> None:
        """Verify the input data structures are correct."""
        # Checking for correct file_specific_ignores structure
        for file, ignores in self.file_specific_ignores.items():
            for ignore in ignores:
                if not isinstance(ignore, FileSpecificIgnore):
                    raise RuntimeError(
                        "All items of file_specific_ignores must be FileSpecificIgnore classes. "
                        f"Got {ignore} - type {type(ignore)}"
                    )
            # Also putting substrings at the beginning of ignore-lists, so they are matched before rules
            # (Not to leave them potentially unused when error would be matched by a rule instead)
            self.file_specific_ignores[file].sort(
                key=lambda x: x.substring, reverse=True
            )

        # Checking for correct aliases (dict[str, str] type)
        for alias, full_substring in self.aliases.items():
            if not isinstance(alias, str) or not isinstance(full_substring, str):
                raise RuntimeError(
                    "All alias keys and values must be strings. "
                    f"Got {alias} (type {type(alias)}), {full_substring} (type {type(full_substring)}"
                )

    def run(self) -> None:
        """Main function, putting together all logic and evaluating result."""
        self.pyright_config_data = self.get_and_validate_pyright_config_data()

        self.original_pyright_results = self.get_original_pyright_results()

        self.all_files_to_check = self.get_all_files_to_check()
        self.all_pyright_ignores = self.get_all_pyright_ignores()
        self.pyright_off_ignores = self.get_pyright_off_ignores()

        self.real_errors = self.get_all_real_errors()
        self.unused_ignores = self.get_unused_ignores()

        self.evaluate_final_result()

    def evaluate_final_result(self) -> None:
        """Reporting results to the user/CI (printing stuff, deciding exit value)."""
        print(
            f"\nIgnored {self.count_of_ignored_errors} custom-defined errors "
            f"from {len(self.all_pyright_ignores)} files."
        )

        if self.unused_ignores:
            print("\nWARNING: there are unused ignores!")
            for unused_ignore in self.unused_ignores:
                print(unused_ignore)

        if self.inconsistencies:
            print("\nWARNING: there are inconsistencies!")
            for inconsistency in self.inconsistencies:
                print(inconsistency)

        if not self.real_errors:
            print("\nSUCCESS: Everything is fine!")
            if self.unused_ignores or self.inconsistencies:
                print("But we have unused ignores or inconsistencies!")
                sys.exit(2)
            else:
                sys.exit(0)
        else:
            print("\nERROR: We have issues!\n")
            for error in self.real_errors:
                print(self.get_human_readable_error_string(error))
            print(f"Found {len(self.real_errors)} issues above")
            if self.unused_ignores or self.inconsistencies:
                print("And we have unused ignores or inconsistencies!")
            sys.exit(1)

    def get_and_validate_pyright_config_data(self) -> dict[str, Any]:
        """Verify that pyrightconfig exists and has correct data."""
        if not os.path.isfile(self.pyright_config_file):
            raise RuntimeError(
                f"Pyright config file under {self.pyright_config_file} does not exist! "
                "Tool relies on its existence, please create it."
            )

        try:
            config_data = json.loads(open(self.pyright_config_file, "r").read())
        except json.decoder.JSONDecodeError as err:
            raise RuntimeError(
                f"Pyright config under {self.pyright_config_file} does not contain valid JSON! Err: {err}"
            ) from None

        # enableTypeIgnoreComments MUST be set to False, otherwise the "type: ignore"s
        # will affect the original pyright result - and we need it to grab all the errors
        # so we can handle them on our own
        if (
            "enableTypeIgnoreComments" not in config_data
            or config_data["enableTypeIgnoreComments"]
        ):
            raise RuntimeError(
                f"Please set '\"enableTypeIgnoreComments\": true' in {self.pyright_config_file}. "
                "Otherwise the tool will not work as expected."
            )

        return config_data

    def get_original_pyright_results(self) -> PyrightResults:
        """Extract all information from pyright.

        `pyright --outputjson` will return all the results in
        nice JSON format with `generalDiagnostics` array storing
        all the errors - schema described in PyrightResults
        """
        if self.should_generate_error_file:
            cmd = f"pyright -p {self.pyright_config_file} --outputjson > {self.error_file}"
            exit_code = subprocess.call(cmd, shell=True)
            # Checking if there was no non-type-checking error when running the above command
            # Exit code 0 = all fine, no type-checking issues in pyright
            # Exit code 1 = pyright has found some type-checking issues (expected)
            # All other exit codes mean something non-type-related got wrong (or pyright was not found)
            # https://github.com/microsoft/pyright/blob/main/docs/command-line.md#pyright-exit-codes
            if exit_code not in (0, 1):
                raise RuntimeError(
                    f"Running '{cmd}' produced a non-expected exit code (see output above)."
                )

            if not os.path.isfile(self.error_file):
                raise RuntimeError(
                    f"Pyright error file under {self.error_file} was not generated by running '{cmd}'."
                )

        try:
            pyright_results: PyrightResults = json.loads(
                open(self.error_file, "r").read()
            )
        except FileNotFoundError:
            raise RuntimeError(
                f"Error file under {self.error_file} does not exist!"
            ) from None
        except json.decoder.JSONDecodeError as err:
            raise RuntimeError(
                f"Error file under {self.error_file} does not contain valid JSON! Err: {err}"
            ) from None

        if self.should_delete_error_file:
            os.remove(self.error_file)

        return pyright_results

    def get_all_real_errors(self) -> list[Error]:
        """Analyze all pyright errors and discard all that should be ignored.

        Ignores can be different:
        - as per "# type: ignore [<error_substring>]" comment
        - as per "file_specific_ignores"
        - as per "# pyright: off" mark
        """
        real_errors: list[Error] = []
        for error in self.original_pyright_results["generalDiagnostics"]:
            # Special handling of cycle import issues, which have different format
            if "range" not in error:
                error["range"] = {"start": {"line": 0}}
                error["rule"] = "cycleImport"
                real_errors.append(error)
                continue

            file_path = error["file"]
            error_message = error["message"]
            line_no = error["range"]["start"]["line"]

            # Checking for "# type: ignore [<error_substring>]" comment
            if self.should_ignore_per_inline_substring(
                file_path, error_message, line_no
            ):
                self.count_of_ignored_errors += 1
                self.log_ignore(error, "error substring matched")
                continue

            # Checking in file_specific_ignores
            if self.should_ignore_file_specific_error(file_path, error):
                self.count_of_ignored_errors += 1
                self.log_ignore(error, "file specific error")
                continue

            # Checking for "# pyright: off" mark
            if self.is_line_in_pyright_off_block(file_path, line_no):
                self.count_of_ignored_errors += 1
                self.log_ignore(error, "pyright disabled for this line")
                continue

            real_errors.append(error)

        return real_errors

    def get_all_files_to_check(self) -> set[str]:
        """Get all files to be analyzed by pyright, based on its config."""
        all_files: set[str] = set()

        if "include" in self.pyright_config_data:
            for dir_or_file in self.pyright_config_data["include"]:
                for file in self.get_all_py_files_recursively(dir_or_file):
                    all_files.add(file)
        else:
            # "include" is missing, we should analyze all files in current dir
            for file in self.get_all_py_files_recursively("."):
                all_files.add(file)

        if "exclude" in self.pyright_config_data:
            for dir_or_file in self.pyright_config_data["exclude"]:
                for file in self.get_all_py_files_recursively(dir_or_file):
                    if file in all_files:
                        all_files.remove(file)

        return all_files

    @staticmethod
    def get_all_py_files_recursively(dir_or_file: str) -> set[str]:
        """Return all python files in certain directory (or the file itself)."""
        if os.path.isfile(dir_or_file):
            return set(str(HERE / dir_or_file))

        all_files: set[str] = set()
        for root, _, files in os.walk(dir_or_file):
            for file in files:
                if file.endswith(".py"):
                    all_files.add(str(HERE / os.path.join(root, file)))

        return all_files

    def get_all_pyright_ignores(self) -> FileIgnores:
        """Get ignore information from all the files to be analyzed."""
        file_ignores: FileIgnores = {}
        for file in self.all_files_to_check:
            ignores = self.get_inline_type_ignores_from_file(file)
            if ignores:
                file_ignores[file] = ignores

        return file_ignores

    def get_pyright_off_ignores(self) -> FilePyrightOffIgnores:
        """Get ignore information based on `# pyright: on/off` marks."""
        pyright_off_ignores: FilePyrightOffIgnores = {}
        for file in self.all_files_to_check:
            ignores = self.find_pyright_off_from_file(file)
            if ignores:
                pyright_off_ignores[file] = ignores

        return pyright_off_ignores

    def get_unused_ignores(self) -> list[str]:
        """Evaluate if there are no ignores not matched by pyright errors."""
        unused_ignores: list[str] = []

        # type: ignore
        for file, file_ignores in self.all_pyright_ignores.items():
            for line_ignore in file_ignores:
                for ignore_statement in line_ignore.ignore_statements:
                    if not ignore_statement.already_used:
                        unused_ignores.append(
                            f"File {file}:{line_ignore.line_no + 1} has unused ignore. "
                            f"Substring: {ignore_statement.substring}"
                        )

        # Pyright: off
        for file, file_ignores in self.pyright_off_ignores.items():
            for off_ignore in file_ignores:
                if not off_ignore.already_used:
                    unused_ignores.append(
                        f"File {file} has unused # pyright: off ignore between lines "
                        f"{off_ignore.start_line + 1} and {off_ignore.end_line + 1}."
                    )

        # File-specific
        for file, file_ignores in self.file_specific_ignores.items():
            for ignore_object in file_ignores:
                if not ignore_object.already_used:
                    if ignore_object.substring:
                        unused_ignores.append(
                            f"File {file} has unused specific ignore substring. "
                            f"Substring: {ignore_object.substring}"
                        )
                    elif ignore_object.rule:
                        unused_ignores.append(
                            f"File {file} has unused specific ignore rule. "
                            f"Rule: {ignore_object.rule}"
                        )

        return unused_ignores

    def should_ignore_per_inline_substring(
        self, file: str, error_message: str, line_no: int
    ) -> bool:
        """Check if line should be ignored based on inline substring/regex."""
        if file not in self.all_pyright_ignores:
            return False

        for ignore_index, ignore in enumerate(self.all_pyright_ignores[file]):
            if line_no == ignore.line_no:
                for substring_index, ignore_statement in enumerate(
                    ignore.ignore_statements
                ):
                    # Supporting both text substrings and regex patterns
                    if ignore_statement.substring in error_message or re.search(
                        ignore_statement.substring, error_message
                    ):
                        # Marking this ignore to be used (so we can identify unused ignores)
                        self.all_pyright_ignores[file][ignore_index].ignore_statements[
                            substring_index
                        ].already_used = True
                        return True

        return False

    def should_ignore_file_specific_error(self, file: str, error: Error) -> bool:
        """Check if line should be ignored based on file-specific ignores."""
        if file not in self.file_specific_ignores:
            return False

        for ignore_object in self.file_specific_ignores[file]:
            if ignore_object.rule:
                if error["rule"] == ignore_object.rule:
                    ignore_object.already_used = True
                    return True
            elif ignore_object.substring:
                # Supporting both text substrings and regex patterns
                if ignore_object.substring in error["message"] or re.search(
                    ignore_object.substring, error["message"]
                ):
                    ignore_object.already_used = True
                    return True

        return False

    def is_line_in_pyright_off_block(self, file: str, line_no: int) -> bool:
        """Check if line should be ignored based on `# pyright: off` mark."""
        if file not in self.pyright_off_ignores:
            return False

        for off_ignore in self.pyright_off_ignores[file]:
            if off_ignore.start_line < line_no < off_ignore.end_line:
                off_ignore.already_used = True
                return True

        return False

    def find_pyright_off_from_file(self, file: str) -> PyrightOffIgnores:
        """Get sections in file to be ignored based on `# pyright: off`."""
        pyright_off_ignores: PyrightOffIgnores = []
        with open(file, "r") as f:
            pyright_off = False
            start_line = 0
            index = 0
            for index, line in enumerate(f):
                if self.OFF_PATTERN in line and not pyright_off:
                    start_line = index
                    pyright_off = True
                elif self.ON_PATTERN in line and pyright_off:
                    pyright_off_ignores.append(PyrightOffIgnore(start_line, index))
                    pyright_off = False

            if pyright_off:
                pyright_off_ignores.append(PyrightOffIgnore(start_line, index))

        return pyright_off_ignores

    def get_inline_type_ignores_from_file(self, file: str) -> LineIgnores:
        """Get all type ignore lines and statements from a certain file."""
        ignores: LineIgnores = []
        with open(file, "r") as f:
            for index, line in enumerate(f):
                if self.IGNORE_PATTERN in line:
                    ignore_statements = self.get_ignore_statements(line)
                    if not ignore_statements:
                        self.inconsistencies.append(
                            f"There is an empty `{self.IGNORE_PATTERN}` in {file}:{index+1}"
                        )
                    else:
                        ignores.append(LineIgnore(index, ignore_statements))

        return ignores

    def get_ignore_statements(self, line: str) -> list[IgnoreStatement]:
        """Extract error substrings to be ignored from a certain line."""
        # Extracting content of [error_substring(s)] after the ignore comment
        ignore_part = line.split(self.IGNORE_PATTERN, maxsplit=2)[1]
        ignore_content = re.search(r"\[(.*)\]", ignore_part)

        # We should not be using empty `# type: ignore` without content in []
        # Notifying the parent function that we should do something about it
        if not ignore_content:
            return []

        # There might be more than one substring
        statement_substrings = ignore_content.group(1).split(self.IGNORE_DELIMITER)

        # When finding aliases, replacing them with a real substring
        statement_substrings = [self.aliases.get(ss, ss) for ss in statement_substrings]

        return [IgnoreStatement(substr) for substr in statement_substrings]

    def log_ignore(self, error: Error, reason: str) -> None:
        """Print the action of ignoring certain error into the console."""
        if self.should_log:
            err = self.get_human_readable_error_string(error)
            print(f"\nError ignored. Reason: {reason}.\nErr: {err}")

    @staticmethod
    def get_human_readable_error_string(error: Error) -> str:
        """Transform error object to a string readable by human."""
        file = error["file"]
        message = error["message"]
        rule = error.get("rule", "No specific rule")
        line = error["range"]["start"]["line"]

        # Need to add +1 to the line, as it is zero-based index
        return f"{file}:{line + 1}: - error: {message} ({rule})\n"


if __name__ == "__main__":
    tool = PyrightTool(
        pyright_config_file=HERE / "pyrightconfig.json",
        file_specific_ignores={
            str(HERE / k): v for k, v in FILE_SPECIFIC_IGNORES.items()
        },
        aliases=ALIASES,
        error_file="errors_for_pyright_temp.json",
        should_generate_error_file=SHOULD_GENERATE_ERROR_FILE,
        should_delete_error_file=SHOULD_DELETE_ERROR_FILE,
        should_log=SHOULD_LOG,
    )
    tool.run()