trezor-firmware/tests/ui_tests/common.py

from __future__ import annotations

import hashlib
import json
import re
import shutil
import typing as t
import warnings
from copy import deepcopy
from dataclasses import asdict, dataclass, field
from difflib import SequenceMatcher
from functools import cached_property
from itertools import zip_longest
from pathlib import Path

import pytest
from PIL import Image
from typing_extensions import Self

from trezorlib.debuglink import TrezorClientDebugLink as Client

UI_TESTS_DIR = Path(__file__).resolve().parent
SCREENS_DIR = UI_TESTS_DIR / "screens"
IMAGES_DIR = SCREENS_DIR / "all_images"
FIXTURES_FILE = UI_TESTS_DIR / "fixtures.json"

# fixtures.json are structured as follows:
# {
#   "model": {
#     "group": {
#       "test_name": "hash",
#       ...
#  }}}...
# IOW, FixturesType = dict[<model>, dict[<group>, dict[<test_name>, <hash>]]]
FixturesType = t.NewType("FixturesType", "dict[str, dict[str, dict[str, str]]]")

FIXTURES: FixturesType = FixturesType({})


def get_fixtures() -> FixturesType:
    global FIXTURES
    if not FIXTURES and FIXTURES_FILE.exists():
        FIXTURES = FixturesType(json.loads(FIXTURES_FILE.read_text()))

    return FIXTURES


def prepare_fixtures(
    results: t.Iterable[TestResult],
    remove_missing: bool = False,
) -> tuple[FixturesType, set[TestCase]]:
    """Prepare contents of fixtures.json"""
    # set up brand new contents
    grouped_tests: dict[tuple[str, str], dict[str, str]] = {}
    for result in results:
        idx = result.test.model, result.test.group
        group = grouped_tests.setdefault(idx, {})
        group[result.test.fixtures_name] = result.actual_hash

    missing_tests: set[TestCase] = set()

    # merge with previous fixtures
    fixtures = deepcopy(get_fixtures())
    for (model, group), new_content in grouped_tests.items():
        # for every model/group, update the data with the new content
        current_content = fixtures.setdefault(model, {}).setdefault(group, {})
        if remove_missing:
            new_tests = set(new_content.keys())
            old_tests = set(current_content.keys())
            missing_tests |= {
                TestCase(model, group, test) for test in old_tests - new_tests
            }
            current_content.clear()

        current_content.update(new_content)

    return fixtures, missing_tests


def write_fixtures(
    results: t.Iterable[TestResult],
    remove_missing: bool = False,
    dest: Path = FIXTURES_FILE,
) -> None:
    global FIXTURES
    content, _ = prepare_fixtures(results, remove_missing)
    dest.write_text(json.dumps(content, indent=0, sort_keys=True) + "\n")
    FIXTURES = FixturesType({})  # reset the cache


def _rename_records(screen_path: Path) -> None:
    IMAGES_DIR.mkdir(exist_ok=True)
    # rename screenshots
    for index, record in enumerate(sorted(screen_path.iterdir())):
        record.replace(screen_path / f"{index:08}.png")


def screens_and_hashes(screen_path: Path) -> tuple[list[Path], list[str]]:
    if not screen_path.exists():
        return [], []

    paths: list[Path] = []
    hashes: list[str] = []
    for file in sorted(screen_path.iterdir()):
        paths.append(file)
        hashes.append(_get_image_hash(file))
    return paths, hashes


def _get_image_hash(png_file: Path) -> str:
    return hashlib.sha256(_get_bytes_from_png(png_file)).hexdigest()


def _get_bytes_from_png(png_file: Path) -> bytes:
    """Decode a PNG file into bytes representing all the pixels.

    Is necessary because Linux and Mac are using different PNG encoding libraries,
    and we need the file hashes to be the same on both platforms.
    """
    return Image.open(str(png_file)).tobytes()


def _hash_files(path: Path) -> str:
    files = path.iterdir()
    hasher = hashlib.sha256()
    for file in sorted(files):
        hasher.update(_get_bytes_from_png(file))

    return hasher.digest().hex()


def get_last_call_test_result(request: pytest.FixtureRequest) -> bool | None:
    # if test did not finish, e.g. interrupted by Ctrl+C, the pytest_runtest_makereport
    # did not create the attribute we need
    if not hasattr(request.node, "rep_call"):
        return None

    return request.node.rep_call.passed  # type: ignore


def _get_test_name_and_group(node_id: str) -> tuple[str, str]:
    test_path, func_id = node_id.split("::", maxsplit=1)
    assert test_path.endswith(".py")

    # tests / device_tests / bitcoin / test_signtx.py
    _tests, group_name, *path_in_group = test_path.split("/")

    # remove ::TestClass:: if present because it is usually the same as the test file name
    func_id = re.sub(r"::.*?::", "-", func_id)

    test_path_prefix = "-".join(path_in_group)
    new_name = f"{test_path_prefix}::{func_id}"
    new_name = new_name.replace("/", "-")
    # Test item name is usually function name, but when parametrization is used,
    # parameters are also part of the name. Some functions have very long parameter
    # names (tx hashes etc) that run out of maximum allowable filename length, so
    # we limit the name to first 100 chars. This is not a problem with txhashes.
    if len(new_name) <= 100:
        return new_name, group_name

    differentiator = hashlib.sha256(new_name.encode()).hexdigest()
    shortened_name = new_name[:91] + "-" + differentiator[:8]
    return shortened_name, group_name


def screens_diff(
    expected_hashes: list[str], actual_hashes: list[str]
) -> t.Iterator[tuple[str | None, str | None]]:
    diff = SequenceMatcher(
        None, expected_hashes, actual_hashes, autojunk=False
    ).get_opcodes()
    # Example diff result:
    # [('equal', 0, 1, 0, 1), ('replace', 1, 2, 1, 3), ('equal', 2, 6, 3, 7)]
    # For situation when:
    # - first screen is the same for both
    # - second screen has changes and there is new third screen
    # - rest is the same
    for _tag, i1, i2, j1, j2 in diff:
        # tag is one of "replace", "delete", "equal", "insert"
        # i1, i2 and j1, j2 are slice indexes for expected/actual respectively
        # The output of get_opcodes is an ordered sequence of instructions
        # for converting expected to actual. By taking the subslices and zipping
        # together, we get the equal subsequences aligned and Nones at deletion
        # or insertion points.
        expected_slice = expected_hashes[i1:i2]
        actual_slice = actual_hashes[j1:j2]
        yield from zip_longest(expected_slice, actual_slice, fillvalue=None)


@dataclass(frozen=True)
class TestCase:
    model: str
    group: str
    name: str

    @classmethod
    def build(cls, client: Client, request: pytest.FixtureRequest) -> Self:
        name, group = _get_test_name_and_group(request.node.nodeid)
        return cls(
            model=f"T{client.features.model}",
            name=name,
            group=group,
        )

    @property
    def id(self) -> str:
        return f"{self.model}-{self.group}-{self.name}"

    @property
    def fixtures_name(self) -> str:
        return f"{self.model}_{self.name}"

    @property
    def dir(self) -> Path:
        return SCREENS_DIR / self.id

    @property
    def screen_text_file(self) -> Path:
        return self.dir / "screens.txt"

    @property
    def actual_dir(self) -> Path:
        return self.dir / "actual"

    @cached_property
    def actual_screens(self) -> tuple[list[Path], list[str]]:
        _rename_records(self.actual_dir)
        return screens_and_hashes(self.actual_dir)

    @property
    def recorded_dir(self) -> Path:
        return self.dir / "recorded"

    @cached_property
    def recorded_screens(self) -> tuple[list[Path], list[str]]:
        return screens_and_hashes(self.recorded_dir)

    def build_result(self, request: pytest.FixtureRequest) -> TestResult:
        _rename_records(self.actual_dir)
        result = TestResult(
            test=self,
            passed=get_last_call_test_result(request),
            actual_hash=_hash_files(self.actual_dir),
            images=self.actual_screens[1],
        )
        result.save_metadata()
        return result


@dataclass
class TestResult:
    test: TestCase
    passed: bool | None
    actual_hash: str
    images: list[str]
    expected_hash: str | None = field(default=None)

    def __post_init__(self) -> None:
        if self.expected_hash is None:
            self.expected_hash = (
                get_fixtures()
                .get(self.test.model, {})
                .get(self.test.group, {})
                .get(self.test.fixtures_name)
            )

    def save_metadata(self) -> None:
        metadata = asdict(self)
        (self.test.dir / "metadata.json").write_text(
            json.dumps(metadata, indent=2, sort_keys=True) + "\n"
        )

    def succeeded_in_ui_comparison(self) -> bool:
        return self.actual_hash == self.expected_hash

    @classmethod
    def load(cls, testdir: Path) -> Self:
        metadata = json.loads((testdir / "metadata.json").read_text())
        test = TestCase(
            model=metadata["test"]["model"],
            group=metadata["test"]["group"],
            name=metadata["test"]["name"],
        )
        return cls(
            test=test,
            passed=metadata["passed"],
            actual_hash=metadata["actual_hash"],
            expected_hash=metadata["expected_hash"],
            images=metadata["images"],
        )

    @classmethod
    def recent_results(cls) -> t.Iterator[Self]:
        for testdir in sorted(SCREENS_DIR.iterdir()):
            meta = testdir / "metadata.json"
            if not meta.exists():
                continue
            yield cls.load(testdir)

    @classmethod
    def recent_ui_failures(cls) -> t.Iterator[Self]:
        """Returning just the results that resulted in UI failure."""
        for result in cls.recent_results():
            if not result.succeeded_in_ui_comparison():
                yield result

    def store_recorded(self) -> None:
        self.expected_hash = self.actual_hash
        shutil.rmtree(self.test.recorded_dir, ignore_errors=True)
        shutil.copytree(
            self.test.actual_dir,
            self.test.recorded_dir,
            symlinks=True,
        )

    def diff_lines(self) -> t.Iterable[tuple[str | None, str | None]]:
        _, expected_hashes = self.test.recorded_screens
        if not expected_hashes:
            warnings.warn("No recorded screens found, is this a new test?")
        _, actual_hashes = self.test.actual_screens

        return screens_diff(expected_hashes, actual_hashes)
feat(tests): revamp UI test harness 2023-01-27 14:13:12 +00:00			`from __future__ import annotations`

			`import hashlib`
			`import json`
			`import re`
			`import shutil`
			`import typing as t`
			`import warnings`
			`from copy import deepcopy`
			`from dataclasses import asdict, dataclass, field`
			`from difflib import SequenceMatcher`
			`from functools import cached_property`
			`from itertools import zip_longest`
			`from pathlib import Path`

			`import pytest`
			`from PIL import Image`
			`from typing_extensions import Self`

			`from trezorlib.debuglink import TrezorClientDebugLink as Client`

			`UI_TESTS_DIR = Path(__file__).resolve().parent`
			`SCREENS_DIR = UI_TESTS_DIR / "screens"`
			`IMAGES_DIR = SCREENS_DIR / "all_images"`
			`FIXTURES_FILE = UI_TESTS_DIR / "fixtures.json"`

			`# fixtures.json are structured as follows:`
			`# {`
			`# "model": {`
			`# "group": {`
			`# "test_name": "hash",`
			`# ...`
			`# }}}...`
			`# IOW, FixturesType = dict[<model>, dict[<group>, dict[<test_name>, <hash>]]]`
			`FixturesType = t.NewType("FixturesType", "dict[str, dict[str, dict[str, str]]]")`

			`FIXTURES: FixturesType = FixturesType({})`


			`def get_fixtures() -> FixturesType:`
			`global FIXTURES`
			`if not FIXTURES and FIXTURES_FILE.exists():`
			`FIXTURES = FixturesType(json.loads(FIXTURES_FILE.read_text()))`

			`return FIXTURES`


			`def prepare_fixtures(`
			`results: t.Iterable[TestResult],`
			`remove_missing: bool = False,`
			`) -> tuple[FixturesType, set[TestCase]]:`
			`"""Prepare contents of fixtures.json"""`
			`# set up brand new contents`
			`grouped_tests: dict[tuple[str, str], dict[str, str]] = {}`
			`for result in results:`
			`idx = result.test.model, result.test.group`
			`group = grouped_tests.setdefault(idx, {})`
			`group[result.test.fixtures_name] = result.actual_hash`

chore(tests): small naming changes and docs [no changelog] 2023-02-03 15:00:15 +00:00			`missing_tests: set[TestCase] = set()`
feat(tests): revamp UI test harness 2023-01-27 14:13:12 +00:00
			`# merge with previous fixtures`
			`fixtures = deepcopy(get_fixtures())`
			`for (model, group), new_content in grouped_tests.items():`
			`# for every model/group, update the data with the new content`
			`current_content = fixtures.setdefault(model, {}).setdefault(group, {})`
			`if remove_missing:`
			`new_tests = set(new_content.keys())`
			`old_tests = set(current_content.keys())`
			`missing_tests \|= {`
			`TestCase(model, group, test) for test in old_tests - new_tests`
			`}`
			`current_content.clear()`

			`current_content.update(new_content)`

			`return fixtures, missing_tests`


			`def write_fixtures(`
			`results: t.Iterable[TestResult],`
			`remove_missing: bool = False,`
			`dest: Path = FIXTURES_FILE,`
			`) -> None:`
			`global FIXTURES`
			`content, _ = prepare_fixtures(results, remove_missing)`
			`dest.write_text(json.dumps(content, indent=0, sort_keys=True) + "\n")`
fix(tests): display missed testcases in the UI report 2023-03-20 13:57:32 +00:00			`FIXTURES = FixturesType({}) # reset the cache`
feat(tests): revamp UI test harness 2023-01-27 14:13:12 +00:00

			`def _rename_records(screen_path: Path) -> None:`
			`IMAGES_DIR.mkdir(exist_ok=True)`
			`# rename screenshots`
			`for index, record in enumerate(sorted(screen_path.iterdir())):`
			`record.replace(screen_path / f"{index:08}.png")`


			`def screens_and_hashes(screen_path: Path) -> tuple[list[Path], list[str]]:`
			`if not screen_path.exists():`
			`return [], []`

chore(tests): small naming changes and docs [no changelog] 2023-02-03 15:00:15 +00:00			`paths: list[Path] = []`
			`hashes: list[str] = []`
feat(tests): revamp UI test harness 2023-01-27 14:13:12 +00:00			`for file in sorted(screen_path.iterdir()):`
			`paths.append(file)`
			`hashes.append(_get_image_hash(file))`
			`return paths, hashes`


			`def _get_image_hash(png_file: Path) -> str:`
			`return hashlib.sha256(_get_bytes_from_png(png_file)).hexdigest()`


			`def _get_bytes_from_png(png_file: Path) -> bytes:`
			`"""Decode a PNG file into bytes representing all the pixels.`

			`Is necessary because Linux and Mac are using different PNG encoding libraries,`
			`and we need the file hashes to be the same on both platforms.`
			`"""`
			`return Image.open(str(png_file)).tobytes()`


			`def _hash_files(path: Path) -> str:`
			`files = path.iterdir()`
			`hasher = hashlib.sha256()`
			`for file in sorted(files):`
			`hasher.update(_get_bytes_from_png(file))`

			`return hasher.digest().hex()`


			`def get_last_call_test_result(request: pytest.FixtureRequest) -> bool \| None:`
			`# if test did not finish, e.g. interrupted by Ctrl+C, the pytest_runtest_makereport`
			`# did not create the attribute we need`
			`if not hasattr(request.node, "rep_call"):`
			`return None`

			`return request.node.rep_call.passed # type: ignore`


			`def _get_test_name_and_group(node_id: str) -> tuple[str, str]:`
			`test_path, func_id = node_id.split("::", maxsplit=1)`
			`assert test_path.endswith(".py")`

			`# tests / device_tests / bitcoin / test_signtx.py`
			`_tests, group_name, *path_in_group = test_path.split("/")`

			`# remove ::TestClass:: if present because it is usually the same as the test file name`
			`func_id = re.sub(r"::.*?::", "-", func_id)`

			`test_path_prefix = "-".join(path_in_group)`
			`new_name = f"{test_path_prefix}::{func_id}"`
			`new_name = new_name.replace("/", "-")`
			`# Test item name is usually function name, but when parametrization is used,`
			`# parameters are also part of the name. Some functions have very long parameter`
			`# names (tx hashes etc) that run out of maximum allowable filename length, so`
			`# we limit the name to first 100 chars. This is not a problem with txhashes.`
			`if len(new_name) <= 100:`
			`return new_name, group_name`

			`differentiator = hashlib.sha256(new_name.encode()).hexdigest()`
			`shortened_name = new_name[:91] + "-" + differentiator[:8]`
			`return shortened_name, group_name`


			`def screens_diff(`
			`expected_hashes: list[str], actual_hashes: list[str]`
			`) -> t.Iterator[tuple[str \| None, str \| None]]:`
			`diff = SequenceMatcher(`
			`None, expected_hashes, actual_hashes, autojunk=False`
			`).get_opcodes()`
chore(tests): small naming changes and docs [no changelog] 2023-02-03 15:00:15 +00:00			`# Example diff result:`
			`# [('equal', 0, 1, 0, 1), ('replace', 1, 2, 1, 3), ('equal', 2, 6, 3, 7)]`
			`# For situation when:`
			`# - first screen is the same for both`
			`# - second screen has changes and there is new third screen`
			`# - rest is the same`
feat(tests): revamp UI test harness 2023-01-27 14:13:12 +00:00			`for _tag, i1, i2, j1, j2 in diff:`
			`# tag is one of "replace", "delete", "equal", "insert"`
			`# i1, i2 and j1, j2 are slice indexes for expected/actual respectively`
			`# The output of get_opcodes is an ordered sequence of instructions`
			`# for converting expected to actual. By taking the subslices and zipping`
			`# together, we get the equal subsequences aligned and Nones at deletion`
			`# or insertion points.`
			`expected_slice = expected_hashes[i1:i2]`
			`actual_slice = actual_hashes[j1:j2]`
			`yield from zip_longest(expected_slice, actual_slice, fillvalue=None)`


			`@dataclass(frozen=True)`
			`class TestCase:`
			`model: str`
			`group: str`
			`name: str`

			`@classmethod`
			`def build(cls, client: Client, request: pytest.FixtureRequest) -> Self:`
			`name, group = _get_test_name_and_group(request.node.nodeid)`
			`return cls(`
			`model=f"T{client.features.model}",`
			`name=name,`
			`group=group,`
			`)`

			`@property`
			`def id(self) -> str:`
			`return f"{self.model}-{self.group}-{self.name}"`

			`@property`
			`def fixtures_name(self) -> str:`
			`return f"{self.model}_{self.name}"`

			`@property`
			`def dir(self) -> Path:`
			`return SCREENS_DIR / self.id`

feat(tests): allow for saving text layout of all screens during UI tests [no changelog] 2023-01-18 10:57:32 +00:00			`@property`
			`def screen_text_file(self) -> Path:`
			`return self.dir / "screens.txt"`

feat(tests): revamp UI test harness 2023-01-27 14:13:12 +00:00			`@property`
			`def actual_dir(self) -> Path:`
			`return self.dir / "actual"`

			`@cached_property`
			`def actual_screens(self) -> tuple[list[Path], list[str]]:`
			`_rename_records(self.actual_dir)`
			`return screens_and_hashes(self.actual_dir)`

			`@property`
			`def recorded_dir(self) -> Path:`
			`return self.dir / "recorded"`

			`@cached_property`
			`def recorded_screens(self) -> tuple[list[Path], list[str]]:`
			`return screens_and_hashes(self.recorded_dir)`

			`def build_result(self, request: pytest.FixtureRequest) -> TestResult:`
			`_rename_records(self.actual_dir)`
			`result = TestResult(`
			`test=self,`
			`passed=get_last_call_test_result(request),`
			`actual_hash=_hash_files(self.actual_dir),`
			`images=self.actual_screens[1],`
			`)`
			`result.save_metadata()`
			`return result`


			`@dataclass`
			`class TestResult:`
			`test: TestCase`
			`passed: bool \| None`
			`actual_hash: str`
			`images: list[str]`
			`expected_hash: str \| None = field(default=None)`

			`def __post_init__(self) -> None:`
			`if self.expected_hash is None:`
			`self.expected_hash = (`
			`get_fixtures()`
			`.get(self.test.model, {})`
			`.get(self.test.group, {})`
			`.get(self.test.fixtures_name)`
			`)`

			`def save_metadata(self) -> None:`
			`metadata = asdict(self)`
			`(self.test.dir / "metadata.json").write_text(`
			`json.dumps(metadata, indent=2, sort_keys=True) + "\n"`
			`)`

feat(tests): generate UI report with all unique differing screens [no changelog] 2023-01-18 14:08:59 +00:00			`def succeeded_in_ui_comparison(self) -> bool:`
			`return self.actual_hash == self.expected_hash`

feat(tests): revamp UI test harness 2023-01-27 14:13:12 +00:00			`@classmethod`
			`def load(cls, testdir: Path) -> Self:`
			`metadata = json.loads((testdir / "metadata.json").read_text())`
			`test = TestCase(`
			`model=metadata["test"]["model"],`
			`group=metadata["test"]["group"],`
			`name=metadata["test"]["name"],`
			`)`
			`return cls(`
			`test=test,`
			`passed=metadata["passed"],`
			`actual_hash=metadata["actual_hash"],`
			`expected_hash=metadata["expected_hash"],`
			`images=metadata["images"],`
			`)`

			`@classmethod`
chore(tests): small naming changes and docs [no changelog] 2023-02-03 15:00:15 +00:00			`def recent_results(cls) -> t.Iterator[Self]:`
feat(tests): revamp UI test harness 2023-01-27 14:13:12 +00:00			`for testdir in sorted(SCREENS_DIR.iterdir()):`
			`meta = testdir / "metadata.json"`
			`if not meta.exists():`
			`continue`
			`yield cls.load(testdir)`

feat(tests): generate UI report with all unique differing screens [no changelog] 2023-01-18 14:08:59 +00:00			`@classmethod`
			`def recent_ui_failures(cls) -> t.Iterator[Self]:`
			`"""Returning just the results that resulted in UI failure."""`
			`for result in cls.recent_results():`
			`if not result.succeeded_in_ui_comparison():`
			`yield result`

feat(tests): revamp UI test harness 2023-01-27 14:13:12 +00:00			`def store_recorded(self) -> None:`
			`self.expected_hash = self.actual_hash`
			`shutil.rmtree(self.test.recorded_dir, ignore_errors=True)`
			`shutil.copytree(`
			`self.test.actual_dir,`
			`self.test.recorded_dir,`
			`symlinks=True,`
			`)`

			`def diff_lines(self) -> t.Iterable[tuple[str \| None, str \| None]]:`
			`_, expected_hashes = self.test.recorded_screens`
			`if not expected_hashes:`
			`warnings.warn("No recorded screens found, is this a new test?")`
			`_, actual_hashes = self.test.actual_screens`

			`return screens_diff(expected_hashes, actual_hashes)`