From e15662e12d8259b2ed7e227cf1aef17eccc48a33 Mon Sep 17 00:00:00 2001
From: matejcik <ja@matejcik.cz>
Date: Fri, 27 Jan 2023 15:13:12 +0100
Subject: [PATCH] feat(tests): revamp UI test harness

---
 ci/prepare_ui_artifacts.py                    |  30 +-
 tests/conftest.py                             |  90 ++---
 tests/show_results.py                         |  27 +-
 tests/ui_tests/__init__.py                    | 303 +++++++----------
 tests/ui_tests/common.py                      | 308 ++++++++++++++++++
 tests/ui_tests/reporting/__main__.py          |  17 +
 tests/ui_tests/reporting/download.py          |  13 +-
 tests/ui_tests/reporting/html.py              |  86 +++--
 tests/ui_tests/reporting/master_diff.py       | 261 +++++++++++++++
 .../ui_tests/reporting/report_master_diff.py  | 206 ------------
 tests/ui_tests/reporting/testreport.py        | 251 +++++++-------
 tests/ui_tests/{reporting => }/reports/.keep  |   0
 tests/update_fixtures.py                      |  15 +-
 13 files changed, 932 insertions(+), 675 deletions(-)
 create mode 100644 tests/ui_tests/common.py
 create mode 100644 tests/ui_tests/reporting/__main__.py
 create mode 100644 tests/ui_tests/reporting/master_diff.py
 delete mode 100644 tests/ui_tests/reporting/report_master_diff.py
 rename tests/ui_tests/{reporting => }/reports/.keep (100%)

diff --git a/ci/prepare_ui_artifacts.py b/ci/prepare_ui_artifacts.py
index 0efa556a2..15f253bc2 100644
--- a/ci/prepare_ui_artifacts.py
+++ b/ci/prepare_ui_artifacts.py
@@ -5,24 +5,24 @@ from pathlib import Path
 ROOT = Path(__file__).resolve().parent.parent
 sys.path.insert(0, str(ROOT))
 # Needed for setup purposes, filling the FILE_HASHES dict
-from tests.ui_tests import read_fixtures  # isort:skip
+from tests.ui_tests.common import TestResult, _hash_files, get_fixtures  # isort:skip
 
-read_fixtures()
-from tests.ui_tests import _hash_files, FILE_HASHES, SCREENS_DIR  # isort:skip
 
-# As in CI we are running T1 and TT tests separately, there will
-# always be the other model missing.
-# Therefore, choosing just the cases for our model.
-if len(sys.argv) > 1 and sys.argv[1].upper() == "T1":
-    model = "T1"
-else:
-    model = "TT"
-model_file_hashes = {k: v for k, v in FILE_HASHES.items() if k.startswith(f"{model}_")}
+FIXTURES = get_fixtures()
 
-for test_case, expected_hash in model_file_hashes.items():
-    recorded_dir = SCREENS_DIR / test_case / "recorded"
-    actual_hash = _hash_files(recorded_dir)
+for result in TestResult.recent_tests():
+    if not result.passed or result.expected_hash != result.actual_hash:
+        print("WARNING: skipping failed test", result.test.id)
+        continue
+
+    actual_hash = _hash_files(result.test.actual_dir)
+    expected_hash = (
+        FIXTURES.get(result.test.model, {})
+        .get(result.test.group, {})
+        .get(result.test.fixtures_name)
+    )
+    assert result.expected_hash == actual_hash
     assert expected_hash == actual_hash
     shutil.make_archive(
-        str(ROOT / "ci/ui_test_records" / actual_hash), "zip", recorded_dir
+        str(ROOT / "ci/ui_test_records" / actual_hash), "zip", result.test.actual_dir
     )
diff --git a/tests/conftest.py b/tests/conftest.py
index fbea5c795..66d704b43 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -30,7 +30,6 @@ from trezorlib.transport import enumerate_devices, get_transport
 from . import ui_tests
 from .device_handler import BackgroundDeviceHandler
 from .emulators import EmulatorWrapper
-from .ui_tests.reporting import testreport
 
 if TYPE_CHECKING:
     from trezorlib._internal.emulator import Emulator
@@ -209,13 +208,13 @@ def client(
     if not setup_params["uninitialized"]:
         debuglink.load_device(
             _raw_client,
-            mnemonic=setup_params["mnemonic"],
-            pin=setup_params["pin"],
+            mnemonic=setup_params["mnemonic"],  # type: ignore
+            pin=setup_params["pin"],  # type: ignore
             passphrase_protection=use_passphrase,
             label="test",
             language="en-US",
-            needs_backup=setup_params["needs_backup"],
-            no_backup=setup_params["no_backup"],
+            needs_backup=setup_params["needs_backup"],  # type: ignore
+            no_backup=setup_params["no_backup"],  # type: ignore
         )
 
         if request.node.get_closest_marker("experimental"):
@@ -243,38 +242,18 @@ def _is_main_runner(session_or_request: pytest.Session | pytest.FixtureRequest)
 
 
 def pytest_sessionstart(session: pytest.Session) -> None:
-    ui_tests.read_fixtures()
-    if session.config.getoption("ui") and _is_main_runner(session):
-        testreport.clear_dir()
-        # Preparing a new empty file for UI diff
-        ui_tests.FIXTURES_DIFF.write_bytes(b"")
-
-
-def _should_write_ui_report(exitstatus: pytest.ExitCode) -> bool:
-    # generate UI report and check missing only if pytest is exitting cleanly
-    # I.e., the test suite passed or failed (as opposed to ctrl+c break, internal error,
-    # etc.)
-    return exitstatus in (pytest.ExitCode.OK, pytest.ExitCode.TESTS_FAILED)
+    if session.config.getoption("ui"):
+        ui_tests.setup(main_runner=_is_main_runner(session))
 
 
 def pytest_sessionfinish(session: pytest.Session, exitstatus: pytest.ExitCode) -> None:
-    if not _should_write_ui_report(exitstatus):
-        return
-
-    missing = session.config.getoption("ui_check_missing")
     test_ui = session.config.getoption("ui")
-
-    if test_ui == "test":
-        if missing and ui_tests.list_missing():
-            session.exitstatus = pytest.ExitCode.TESTS_FAILED
-        ui_tests.write_fixtures_suggestion(missing)
-        testreport.generate_reports()
-    elif test_ui == "record":
-        if exitstatus == pytest.ExitCode.OK:
-            ui_tests.write_fixtures(missing)
-        else:
-            ui_tests.write_fixtures_suggestion(missing, only_passed_tests=True)
-        testreport.generate_reports()
+    if test_ui and _is_main_runner(session):
+        session.exitstatus = ui_tests.sessionfinish(
+            exitstatus,
+            test_ui,  # type: ignore
+            bool(session.config.getoption("ui_check_missing")),
+        )
 
 
 def pytest_terminal_summary(
@@ -284,40 +263,13 @@ def pytest_terminal_summary(
     println("")
 
     ui_option = config.getoption("ui")
-    missing_tests = ui_tests.list_missing()
-    if ui_option and _should_write_ui_report(exitstatus) and missing_tests:
-        println(f"{len(missing_tests)} expected UI tests did not run.")
-        if config.getoption("ui_check_missing"):
-            println("-------- List of missing tests follows: --------")
-            for test in missing_tests:
-                println("\t" + test)
-
-            if ui_option == "test":
-                println("UI test failed.")
-            elif ui_option == "record":
-                println("Removing missing tests from record.")
-            println("")
-
-    if ui_option == "test" and _should_write_ui_report(exitstatus):
-        println("\n-------- Suggested fixtures.json diff: --------")
-        print("See", ui_tests.SUGGESTION_FILE)
-        println("")
-
-    if ui_option == "record" and exitstatus != pytest.ExitCode.OK:
-        println(
-            f"\n-------- WARNING! Recording to {ui_tests.HASH_FILE.name} was disabled due to failed tests. --------"
+    if ui_option:
+        ui_tests.terminal_summary(
+            terminalreporter.write_line,
+            ui_option,  # type: ignore
+            bool(config.getoption("ui_check_missing")),
+            exitstatus,
         )
-        print("See", ui_tests.SUGGESTION_FILE, "for suggestions for ONLY PASSED tests.")
-        println("")
-
-    if _should_write_ui_report(exitstatus):
-        println("-------- UI tests summary: --------")
-        println("Run ./tests/show_results.py to open test summary")
-        println("")
-
-        println("-------- Accepting all recent UI changes: --------")
-        println("Run ./tests/update_fixtures.py to apply all changes")
-        println("")
 
 
 def pytest_addoption(parser: "Parser") -> None:
@@ -389,7 +341,7 @@ def pytest_runtest_setup(item: pytest.Item) -> None:
 
 
 @pytest.hookimpl(tryfirst=True, hookwrapper=True)
-def pytest_runtest_makereport(item: pytest.Item, call) -> None:
+def pytest_runtest_makereport(item: pytest.Item, call) -> Generator:
     # Make test results available in fixtures.
     # See https://docs.pytest.org/en/latest/example/simple.html#making-test-result-information-available-in-fixtures
     # The device_handler fixture uses this as 'request.node.rep_call.passed' attribute,
@@ -400,12 +352,12 @@ def pytest_runtest_makereport(item: pytest.Item, call) -> None:
 
 
 @pytest.fixture
-def device_handler(client: Client, request: pytest.FixtureRequest) -> None:
+def device_handler(client: Client, request: pytest.FixtureRequest) -> Generator:
     device_handler = BackgroundDeviceHandler(client)
     yield device_handler
 
     # get call test result
-    test_res = ui_tests.get_last_call_test_result(request)
+    test_res = ui_tests.common.get_last_call_test_result(request)
 
     if test_res is None:
         return
diff --git a/tests/show_results.py b/tests/show_results.py
index 95f2b785f..00ec77404 100755
--- a/tests/show_results.py
+++ b/tests/show_results.py
@@ -13,13 +13,12 @@ from urllib.parse import unquote
 
 import click
 
+from ui_tests.common import SCREENS_DIR, TestResult, write_fixtures
+from ui_tests.reporting import testreport  # noqa: E402
+
 ROOT = Path(__file__).resolve().parent.parent
-UI_TESTS_PATH = ROOT / "tests" / "ui_tests"
-TEST_RESULT_PATH = UI_TESTS_PATH / "reporting" / "reports" / "test"
-FIXTURES_PATH = ROOT / "tests" / "ui_tests" / "fixtures.json"
 
-sys.path.append(str(UI_TESTS_PATH))
-from reporting import testreport  # noqa: E402
+sys.path.append(str(ROOT / "tests"))
 
 
 class NoCacheRequestHandler(http.server.SimpleHTTPRequestHandler):
@@ -49,7 +48,7 @@ class NoCacheRequestHandler(http.server.SimpleHTTPRequestHandler):
         path = posixpath.normpath(path)
         words = path.split("/")
         words = filter(None, words)
-        path = str(TEST_RESULT_PATH)  # XXX this is the only modified line
+        path = str(testreport.TESTREPORT_PATH)  # XXX this is the only modified line
         for word in words:
             if os.path.dirname(word) or word in (os.curdir, os.pardir):
                 # Ignore components that are not a simple file/directory name
@@ -58,15 +57,15 @@ class NoCacheRequestHandler(http.server.SimpleHTTPRequestHandler):
         if trailing_slash:
             path += "/"
         return path
-    
+
     def do_GET(self) -> None:
         if self.path in ("/", "/index.html"):
             testreport.index()
-        
+
         return super().do_GET()
 
     def do_POST(self) -> None:
-        if self.path == "/fixtures.json" and FIXTURES_PATH.exists():
+        if self.path == "/fixtures.json":
 
             length = int(self.headers.get("content-length"))
             field_data = self.rfile.read(length)
@@ -76,12 +75,10 @@ class NoCacheRequestHandler(http.server.SimpleHTTPRequestHandler):
             test_hash = data.get("hash")
 
             if test_name is not None and test_hash is not None:
-                with open(FIXTURES_PATH, "r") as jsonFile:
-                    fixtures = json.load(jsonFile)
-                fixtures[test_name] = test_hash
-                with open(FIXTURES_PATH, "w") as jsonFile:
-                    json.dump(fixtures, jsonFile, indent=0)
-                    jsonFile.write("\n")
+                test_path = SCREENS_DIR / test_name
+                result = TestResult.load(test_path)
+                assert result.actual_hash == test_hash
+                write_fixtures([result])
 
             self.send_response(200)
             self.send_header("Content-Type", "text/plain")
diff --git a/tests/ui_tests/__init__.py b/tests/ui_tests/__init__.py
index 9bcf543b6..95c043fa4 100644
--- a/tests/ui_tests/__init__.py
+++ b/tests/ui_tests/__init__.py
@@ -1,125 +1,46 @@
-import hashlib
-import json
-import re
+from __future__ import annotations
+
 import shutil
 from contextlib import contextmanager
-from pathlib import Path
-from typing import Dict, Generator, Optional, Set
+from typing import Callable, Generator
 
 import pytest
 from _pytest.outcomes import Failed
-from PIL import Image
 
 from trezorlib.debuglink import TrezorClientDebugLink as Client
 
+from . import common
+from .common import SCREENS_DIR, UI_TESTS_DIR, TestCase, TestResult
 from .reporting import testreport
 
-UI_TESTS_DIR = Path(__file__).resolve().parent
-SCREENS_DIR = UI_TESTS_DIR / "screens"
-HASH_FILE = UI_TESTS_DIR / "fixtures.json"
-SUGGESTION_FILE = UI_TESTS_DIR / "fixtures.suggestion.json"
-FIXTURES_DIFF = UI_TESTS_DIR / "fixtures.json.diff"
-FILE_HASHES: Dict[str, str] = {}
-ACTUAL_HASHES: Dict[str, str] = {}
-PROCESSED: Set[str] = set()
-FAILED_TESTS: Set[str] = set()
-
-# T1/TT, to be set in screen_recording(), as we do not know it beforehand
-# TODO: it is not the cleanest, we could create a class out of this file
-MODEL = ""
-
-
-def get_test_name(node_id: str) -> str:
-    # Test item name is usually function name, but when parametrization is used,
-    # parameters are also part of the name. Some functions have very long parameter
-    # names (tx hashes etc) that run out of maximum allowable filename length, so
-    # we limit the name to first 100 chars. This is not a problem with txhashes.
-    new_name = node_id.replace("tests/device_tests/", "")
-    # remove ::TestClass:: if present because it is usually the same as the test file name
-    new_name = re.sub(r"::.*?::", "-", new_name)
-    new_name = new_name.replace("/", "-")  # in case there is "/"
-    if len(new_name) <= 100:
-        return new_name
-    return new_name[:91] + "-" + hashlib.sha256(new_name.encode()).hexdigest()[:8]
-
-
-def _process_recorded(screen_path: Path, test_name: str) -> None:
-    # calculate hash
-    actual_hash = _hash_files(screen_path)
-    FILE_HASHES[test_name] = actual_hash
-    ACTUAL_HASHES[test_name] = actual_hash
-    _rename_records(screen_path)
-    testreport.recorded(screen_path, test_name, actual_hash)
-
-
-def _rename_records(screen_path: Path) -> None:
-    # rename screenshots
-    for index, record in enumerate(sorted(screen_path.iterdir())):
-        record.replace(screen_path / f"{index:08}.png")
-
+FIXTURES_SUGGESTION_FILE = UI_TESTS_DIR / "fixtures.suggestion.json"
 
-def _hash_files(path: Path) -> str:
-    files = path.iterdir()
-    hasher = hashlib.sha256()
-    for file in sorted(files):
-        hasher.update(_get_bytes_from_png(str(file)))
-
-    return hasher.digest().hex()
-
-
-def _get_bytes_from_png(png_file: str) -> bytes:
-    """Decode a PNG file into bytes representing all the pixels.
-
-    Is necessary because Linux and Mac are using different PNG encoding libraries,
-    and we need the file hashes to be the same on both platforms.
-    """
-    return Image.open(png_file).tobytes()
 
+def _process_recorded(result: TestResult) -> None:
+    # calculate hash
+    result.store_recorded()
+    testreport.recorded(result)
 
-def _process_tested(fixture_test_path: Path, test_name: str) -> None:
-    actual_path = fixture_test_path / "actual"
-    actual_hash = _hash_files(actual_path)
-    ACTUAL_HASHES[test_name] = actual_hash
-
-    _rename_records(actual_path)
-
-    expected_hash = FILE_HASHES.get(test_name)
-    if expected_hash is None:
-        pytest.fail(f"Hash of {test_name} not found in fixtures.json")
 
-    if actual_hash != expected_hash:
-        assert expected_hash is not None
-        file_path = testreport.failed(
-            fixture_test_path, test_name, actual_hash, expected_hash
+def _process_tested(result: TestResult) -> None:
+    if result.expected_hash is None:
+        file_path = testreport.missing(result)
+        pytest.fail(
+            f"Hash of {result.test.id} not found in fixtures.json\n"
+            f"Expected:  {result.expected_hash}\n"
+            f"Actual:    {result.actual_hash}\n"
+            f"Diff file: {file_path}"
         )
-
-        # Writing the diff to a file, so that we can process it later
-        # Appending a new JSON object, not having to regenerate the
-        # whole file (which could cause issues with multiple processes/threads)
-        with open(FIXTURES_DIFF, "a") as f:
-            diff = {
-                "test_name": test_name,
-                "actual_hash": actual_hash,
-            }
-            f.write(json.dumps(diff) + "\n")
-
+    elif result.actual_hash != result.expected_hash:
+        file_path = testreport.failed(result)
         pytest.fail(
-            f"Hash of {test_name} differs.\n"
-            f"Expected:  {expected_hash}\n"
-            f"Actual:    {actual_hash}\n"
+            f"Hash of {result.test.id} differs\n"
+            f"Expected:  {result.expected_hash}\n"
+            f"Actual:    {result.actual_hash}\n"
             f"Diff file: {file_path}"
         )
     else:
-        testreport.passed(fixture_test_path, test_name, actual_hash)
-
-
-def get_last_call_test_result(request: pytest.FixtureRequest) -> Optional[bool]:
-    # if test did not finish, e.g. interrupted by Ctrl+C, the pytest_runtest_makereport
-    # did not create the attribute we need
-    if not hasattr(request.node, "rep_call"):
-        return None
-
-    return request.node.rep_call.passed
+        testreport.passed(result)
 
 
 @contextmanager
@@ -131,30 +52,15 @@ def screen_recording(
         yield
         return
 
-    test_name = get_test_name(request.node.nodeid)
-
-    # Differentiating test names between T1 and TT
-    # Making the model global for other functions
-    global MODEL
-    MODEL = f"T{client.features.model}"
-
-    test_name = f"{MODEL}_{test_name}"
-
-    screens_test_path = SCREENS_DIR / test_name
-
-    if test_ui == "record":
-        screen_path = screens_test_path / "recorded"
-    else:
-        screen_path = screens_test_path / "actual"
+    testcase = TestCase.build(client, request)
+    testcase.dir.mkdir(exist_ok=True, parents=True)
 
-    if not screens_test_path.exists():
-        screens_test_path.mkdir()
     # remove previous files
-    shutil.rmtree(screen_path, ignore_errors=True)
-    screen_path.mkdir()
+    shutil.rmtree(testcase.actual_dir, ignore_errors=True)
+    testcase.actual_dir.mkdir()
 
     try:
-        client.debug.start_recording(str(screen_path))
+        client.debug.start_recording(str(testcase.actual_dir))
         yield
     finally:
         client.ensure_open()
@@ -164,99 +70,116 @@ def screen_recording(
         client.init_device()
         client.debug.stop_recording()
 
-    PROCESSED.add(test_name)
-    if get_last_call_test_result(request) is False:
-        FAILED_TESTS.add(test_name)
-
+    result = testcase.build_result(request)
     if test_ui == "record":
-        _process_recorded(screen_path, test_name)
+        _process_recorded(result)
     else:
-        _process_tested(screens_test_path, test_name)
+        _process_tested(result)
 
 
-def list_missing() -> Set[str]:
-    # Only listing the ones for the current model
-    relevant_cases = {
-        case for case in FILE_HASHES.keys() if case.startswith(f"{MODEL}_")
-    }
-    return relevant_cases - PROCESSED
+def setup(main_runner: bool) -> None:
+    # clear metadata and "actual" recordings before current run, keep "recorded" around
+    if main_runner:
+        for meta in SCREENS_DIR.glob("*/metadata.json"):
+            meta.unlink()
+            shutil.rmtree(meta.parent / "actual", ignore_errors=True)
 
+    # clear testreport
+    testreport.setup(main_runner)
 
-def read_fixtures() -> None:
-    if not HASH_FILE.exists():
-        raise ValueError("File fixtures.json not found.")
-    global FILE_HASHES
-    FILE_HASHES = json.loads(HASH_FILE.read_text())
 
+def list_missing() -> set[str]:
+    # Only listing the ones for the current model
+    _, missing = common.prepare_fixtures(TestResult.recent_tests(), remove_missing=True)
+    return {test.id for test in missing}
 
-def write_fixtures(remove_missing: bool) -> None:
-    HASH_FILE.write_text(_get_fixtures_content(FILE_HASHES, remove_missing))
 
+def update_fixtures(remove_missing: bool = False) -> int:
+    """Update the fixtures.json file with the actual hashes from the latest run.
 
-def write_fixtures_suggestion(
-    remove_missing: bool, only_passed_tests: bool = False
-) -> None:
-    SUGGESTION_FILE.write_text(
-        _get_fixtures_content(ACTUAL_HASHES, remove_missing, only_passed_tests)
-    )
+    Used in --ui=record and in update_fixtures.py
+    """
+    results = list(TestResult.recent_tests())
+    for result in results:
+        result.store_recorded()
 
+    common.write_fixtures(results, remove_missing=remove_missing)
+    return len(results)
 
-def update_fixtures_with_diff() -> int:
-    """Update the fixtures.json file with the actual hashes from the diff file.
 
-    Use-case is that the UI test run will generate the differing hashes,
-    and with this function we can simply update the fixtures.json file
-    without having to call the UI tests again in recording mode.
-    """
-    if not FIXTURES_DIFF.exists():
-        raise ValueError(f"File {FIXTURES_DIFF} not found.")
+def _should_write_ui_report(exitstatus: pytest.ExitCode) -> bool:
+    # generate UI report and check missing only if pytest is exitting cleanly
+    # I.e., the test suite passed or failed (as opposed to ctrl+c break, internal error,
+    # etc.)
+    return exitstatus in (pytest.ExitCode.OK, pytest.ExitCode.TESTS_FAILED)
 
-    read_fixtures()
 
-    changes_amount = 0
-    with open(FIXTURES_DIFF) as f:
-        for line in f:
-            changes_amount += 1
-            diff = json.loads(line)
-            FILE_HASHES[diff["test_name"]] = diff["actual_hash"]
+def terminal_summary(
+    println: Callable[[str], None],
+    ui_option: str,
+    check_missing: bool,
+    exitstatus: pytest.ExitCode,
+) -> None:
+    println("")
+
+    normal_exit = _should_write_ui_report(exitstatus)
+    missing_tests = list_missing()
+    if ui_option and normal_exit and missing_tests:
+        println(f"{len(missing_tests)} expected UI tests did not run.")
+        if check_missing:
+            println("-------- List of missing tests follows: --------")
+            for test in missing_tests:
+                println("\t" + test)
+
+            if ui_option == "test":
+                println("UI test failed.")
+            elif ui_option == "record":
+                println("Removing missing tests from record.")
+            println("")
+
+    if ui_option == "record" and exitstatus != pytest.ExitCode.OK:
+        println(
+            "\n-------- WARNING! Recording to fixtures.json was disabled due to failed tests. --------"
+        )
+        println("")
 
-    write_fixtures(remove_missing=False)
+    if normal_exit:
+        println("-------- UI tests summary: --------")
+        println("Run ./tests/show_results.py to open test summary")
+        println("")
 
-    # Returning the amount of updated hashes
-    return changes_amount
+        println("-------- Accepting all recent UI changes: --------")
+        println("Run ./tests/update_fixtures.py to apply all changes")
+        println("")
 
 
-def _get_fixtures_content(
-    fixtures: Dict[str, str], remove_missing: bool, only_passed_tests: bool = False
-) -> str:
-    if remove_missing:
-        # Not removing the ones for different model
-        nonrelevant_cases = {
-            f: h for f, h in FILE_HASHES.items() if not f.startswith(f"{MODEL}_")
-        }
+def sessionfinish(
+    exitstatus: pytest.ExitCode, test_ui: str, check_missing: bool
+) -> pytest.ExitCode:
+    if not _should_write_ui_report(exitstatus):
+        return exitstatus
 
-        filtered_processed_tests = PROCESSED
-        if only_passed_tests:
-            filtered_processed_tests = PROCESSED - FAILED_TESTS
+    testreport.generate_reports()
+    if test_ui == "test" and check_missing and list_missing():
+        common.write_fixtures(
+            TestResult.recent_tests(),
+            remove_missing=True,
+            dest=FIXTURES_SUGGESTION_FILE,
+        )
+        return pytest.ExitCode.TESTS_FAILED
 
-        processed_fixtures = {i: fixtures[i] for i in filtered_processed_tests}
-        fixtures = {**nonrelevant_cases, **processed_fixtures}
-    else:
-        fixtures = fixtures
+    if test_ui == "record" and exitstatus == pytest.ExitCode.OK:
+        update_fixtures(check_missing)
 
-    return json.dumps(fixtures, indent="", sort_keys=True) + "\n"
+    return exitstatus
 
 
 def main() -> None:
-    read_fixtures()
-    for record in SCREENS_DIR.iterdir():
-        if not (record / "actual").exists():
-            continue
-
+    for result in TestResult.recent_tests():
         try:
-            _process_tested(record, record.name)
-            print("PASSED:", record.name)
+            _process_tested(result)
+            print("PASSED:", result.test.id)
         except Failed:
-            print("FAILED:", record.name)
+            print("FAILED:", result.test.id)
 
     testreport.generate_reports()
diff --git a/tests/ui_tests/common.py b/tests/ui_tests/common.py
new file mode 100644
index 000000000..df85bb06f
--- /dev/null
+++ b/tests/ui_tests/common.py
@@ -0,0 +1,308 @@
+from __future__ import annotations
+
+import hashlib
+import json
+import re
+import shutil
+import typing as t
+import warnings
+from copy import deepcopy
+from dataclasses import asdict, dataclass, field
+from difflib import SequenceMatcher
+from functools import cached_property
+from itertools import zip_longest
+from pathlib import Path
+
+import pytest
+from PIL import Image
+from typing_extensions import Self
+
+from trezorlib.debuglink import TrezorClientDebugLink as Client
+
+UI_TESTS_DIR = Path(__file__).resolve().parent
+SCREENS_DIR = UI_TESTS_DIR / "screens"
+IMAGES_DIR = SCREENS_DIR / "all_images"
+FIXTURES_FILE = UI_TESTS_DIR / "fixtures.json"
+
+# fixtures.json are structured as follows:
+# {
+#   "model": {
+#     "group": {
+#       "test_name": "hash",
+#       ...
+#  }}}...
+# IOW, FixturesType = dict[<model>, dict[<group>, dict[<test_name>, <hash>]]]
+FixturesType = t.NewType("FixturesType", "dict[str, dict[str, dict[str, str]]]")
+
+FIXTURES: FixturesType = FixturesType({})
+
+
+def get_fixtures() -> FixturesType:
+    global FIXTURES
+    if not FIXTURES and FIXTURES_FILE.exists():
+        FIXTURES = FixturesType(json.loads(FIXTURES_FILE.read_text()))
+
+    return FIXTURES
+
+
+def prepare_fixtures(
+    results: t.Iterable[TestResult],
+    remove_missing: bool = False,
+) -> tuple[FixturesType, set[TestCase]]:
+    """Prepare contents of fixtures.json"""
+    # set up brand new contents
+    grouped_tests: dict[tuple[str, str], dict[str, str]] = {}
+    for result in results:
+        idx = result.test.model, result.test.group
+        group = grouped_tests.setdefault(idx, {})
+        group[result.test.fixtures_name] = result.actual_hash
+
+    missing_tests = set()
+
+    # merge with previous fixtures
+    fixtures = deepcopy(get_fixtures())
+    for (model, group), new_content in grouped_tests.items():
+        # for every model/group, update the data with the new content
+        current_content = fixtures.setdefault(model, {}).setdefault(group, {})
+        if remove_missing:
+            new_tests = set(new_content.keys())
+            old_tests = set(current_content.keys())
+            missing_tests |= {
+                TestCase(model, group, test) for test in old_tests - new_tests
+            }
+            current_content.clear()
+
+        current_content.update(new_content)
+
+    return fixtures, missing_tests
+
+
+def write_fixtures(
+    results: t.Iterable[TestResult],
+    remove_missing: bool = False,
+    dest: Path = FIXTURES_FILE,
+) -> None:
+    global FIXTURES
+    content, _ = prepare_fixtures(results, remove_missing)
+    FIXTURES = FixturesType(content)
+    dest.write_text(json.dumps(content, indent=0, sort_keys=True) + "\n")
+
+
+def _rename_records(screen_path: Path) -> None:
+    IMAGES_DIR.mkdir(exist_ok=True)
+    # rename screenshots
+    for index, record in enumerate(sorted(screen_path.iterdir())):
+        record.replace(screen_path / f"{index:08}.png")
+
+
+def screens_and_hashes(screen_path: Path) -> tuple[list[Path], list[str]]:
+    if not screen_path.exists():
+        return [], []
+
+    hashes = []
+    paths = []
+    for file in sorted(screen_path.iterdir()):
+        paths.append(file)
+        if len(file.stem) == 32:
+            try:
+                hashes.append(bytes.fromhex(file.stem))
+                continue
+            except ValueError:
+                pass
+        hashes.append(_get_image_hash(file))
+    return paths, hashes
+
+
+def _get_image_hash(png_file: Path) -> str:
+    return hashlib.sha256(_get_bytes_from_png(png_file)).hexdigest()
+
+
+def _get_bytes_from_png(png_file: Path) -> bytes:
+    """Decode a PNG file into bytes representing all the pixels.
+
+    Is necessary because Linux and Mac are using different PNG encoding libraries,
+    and we need the file hashes to be the same on both platforms.
+    """
+    return Image.open(str(png_file)).tobytes()
+
+
+def _hash_files(path: Path) -> str:
+    files = path.iterdir()
+    hasher = hashlib.sha256()
+    for file in sorted(files):
+        hasher.update(_get_bytes_from_png(file))
+
+    return hasher.digest().hex()
+
+
+def get_last_call_test_result(request: pytest.FixtureRequest) -> bool | None:
+    # if test did not finish, e.g. interrupted by Ctrl+C, the pytest_runtest_makereport
+    # did not create the attribute we need
+    if not hasattr(request.node, "rep_call"):
+        return None
+
+    return request.node.rep_call.passed  # type: ignore
+
+
+def _get_test_name_and_group(node_id: str) -> tuple[str, str]:
+    test_path, func_id = node_id.split("::", maxsplit=1)
+    assert test_path.endswith(".py")
+
+    # tests / device_tests / bitcoin / test_signtx.py
+    _tests, group_name, *path_in_group = test_path.split("/")
+
+    # remove ::TestClass:: if present because it is usually the same as the test file name
+    func_id = re.sub(r"::.*?::", "-", func_id)
+
+    test_path_prefix = "-".join(path_in_group)
+    new_name = f"{test_path_prefix}::{func_id}"
+    new_name = new_name.replace("/", "-")
+    # Test item name is usually function name, but when parametrization is used,
+    # parameters are also part of the name. Some functions have very long parameter
+    # names (tx hashes etc) that run out of maximum allowable filename length, so
+    # we limit the name to first 100 chars. This is not a problem with txhashes.
+    if len(new_name) <= 100:
+        return new_name, group_name
+
+    differentiator = hashlib.sha256(new_name.encode()).hexdigest()
+    shortened_name = new_name[:91] + "-" + differentiator[:8]
+    return shortened_name, group_name
+
+
+def screens_diff(
+    expected_hashes: list[str], actual_hashes: list[str]
+) -> t.Iterator[tuple[str | None, str | None]]:
+    diff = SequenceMatcher(
+        None, expected_hashes, actual_hashes, autojunk=False
+    ).get_opcodes()
+    for _tag, i1, i2, j1, j2 in diff:
+        # tag is one of "replace", "delete", "equal", "insert"
+        # i1, i2 and j1, j2 are slice indexes for expected/actual respectively
+        # The output of get_opcodes is an ordered sequence of instructions
+        # for converting expected to actual. By taking the subslices and zipping
+        # together, we get the equal subsequences aligned and Nones at deletion
+        # or insertion points.
+        expected_slice = expected_hashes[i1:i2]
+        actual_slice = actual_hashes[j1:j2]
+        yield from zip_longest(expected_slice, actual_slice, fillvalue=None)
+
+
+@dataclass(frozen=True)
+class TestCase:
+    model: str
+    group: str
+    name: str
+
+    @classmethod
+    def build(cls, client: Client, request: pytest.FixtureRequest) -> Self:
+        name, group = _get_test_name_and_group(request.node.nodeid)
+        return cls(
+            model=f"T{client.features.model}",
+            name=name,
+            group=group,
+        )
+
+    @property
+    def id(self) -> str:
+        return f"{self.model}-{self.group}-{self.name}"
+
+    @property
+    def fixtures_name(self) -> str:
+        return f"{self.model}_{self.name}"
+
+    @property
+    def dir(self) -> Path:
+        return SCREENS_DIR / self.id
+
+    @property
+    def actual_dir(self) -> Path:
+        return self.dir / "actual"
+
+    @cached_property
+    def actual_screens(self) -> tuple[list[Path], list[str]]:
+        _rename_records(self.actual_dir)
+        return screens_and_hashes(self.actual_dir)
+
+    @property
+    def recorded_dir(self) -> Path:
+        return self.dir / "recorded"
+
+    @cached_property
+    def recorded_screens(self) -> tuple[list[Path], list[str]]:
+        return screens_and_hashes(self.recorded_dir)
+
+    def build_result(self, request: pytest.FixtureRequest) -> TestResult:
+        _rename_records(self.actual_dir)
+        result = TestResult(
+            test=self,
+            passed=get_last_call_test_result(request),
+            actual_hash=_hash_files(self.actual_dir),
+            images=self.actual_screens[1],
+        )
+        result.save_metadata()
+        return result
+
+
+@dataclass
+class TestResult:
+    test: TestCase
+    passed: bool | None
+    actual_hash: str
+    images: list[str]
+    expected_hash: str | None = field(default=None)
+
+    def __post_init__(self) -> None:
+        if self.expected_hash is None:
+            self.expected_hash = (
+                get_fixtures()
+                .get(self.test.model, {})
+                .get(self.test.group, {})
+                .get(self.test.fixtures_name)
+            )
+
+    def save_metadata(self) -> None:
+        metadata = asdict(self)
+        (self.test.dir / "metadata.json").write_text(
+            json.dumps(metadata, indent=2, sort_keys=True) + "\n"
+        )
+
+    @classmethod
+    def load(cls, testdir: Path) -> Self:
+        metadata = json.loads((testdir / "metadata.json").read_text())
+        test = TestCase(
+            model=metadata["test"]["model"],
+            group=metadata["test"]["group"],
+            name=metadata["test"]["name"],
+        )
+        return cls(
+            test=test,
+            passed=metadata["passed"],
+            actual_hash=metadata["actual_hash"],
+            expected_hash=metadata["expected_hash"],
+            images=metadata["images"],
+        )
+
+    @classmethod
+    def recent_tests(cls) -> t.Iterator[Self]:
+        for testdir in sorted(SCREENS_DIR.iterdir()):
+            meta = testdir / "metadata.json"
+            if not meta.exists():
+                continue
+            yield cls.load(testdir)
+
+    def store_recorded(self) -> None:
+        self.expected_hash = self.actual_hash
+        shutil.rmtree(self.test.recorded_dir, ignore_errors=True)
+        shutil.copytree(
+            self.test.actual_dir,
+            self.test.recorded_dir,
+            symlinks=True,
+        )
+
+    def diff_lines(self) -> t.Iterable[tuple[str | None, str | None]]:
+        _, expected_hashes = self.test.recorded_screens
+        if not expected_hashes:
+            warnings.warn("No recorded screens found, is this a new test?")
+        _, actual_hashes = self.test.actual_screens
+
+        return screens_diff(expected_hashes, actual_hashes)
diff --git a/tests/ui_tests/reporting/__main__.py b/tests/ui_tests/reporting/__main__.py
new file mode 100644
index 000000000..f9aac4bfd
--- /dev/null
+++ b/tests/ui_tests/reporting/__main__.py
@@ -0,0 +1,17 @@
+import click
+
+from . import master_diff
+
+
+@click.group()
+def cli():
+    pass
+
+
+@cli.command(name="master-diff")
+def do_master_diff():
+    master_diff.main()
+
+
+if __name__ == "__main__":
+    cli()
diff --git a/tests/ui_tests/reporting/download.py b/tests/ui_tests/reporting/download.py
index 73484d135..cb160a72d 100644
--- a/tests/ui_tests/reporting/download.py
+++ b/tests/ui_tests/reporting/download.py
@@ -1,9 +1,10 @@
-import json
+from __future__ import annotations
+
 import urllib.error
 import urllib.request
 import zipfile
 from pathlib import Path
-from typing import Dict
+from typing import Any
 
 import requests
 
@@ -17,6 +18,7 @@ def fetch_recorded(hash: str, path: Path) -> None:
     zip_dest = path / "recorded.zip"
 
     try:
+        print("downloading", zip_src)
         urllib.request.urlretrieve(zip_src, zip_dest)
     except urllib.error.HTTPError:
         raise RuntimeError(f"No such recorded collection was found on '{zip_src}'.")
@@ -27,12 +29,7 @@ def fetch_recorded(hash: str, path: Path) -> None:
     zip_dest.unlink()
 
 
-def fetch_fixtures_master() -> Dict[str, str]:
+def fetch_fixtures_master() -> dict[str, Any]:
     r = requests.get(FIXTURES_MASTER)
     r.raise_for_status()
     return r.json()
-
-
-def fetch_fixtures_current() -> Dict[str, str]:
-    with open(FIXTURES_CURRENT) as f:
-        return json.loads(f.read())
diff --git a/tests/ui_tests/reporting/html.py b/tests/ui_tests/reporting/html.py
index 7f21d9e3d..793bd0fb9 100644
--- a/tests/ui_tests/reporting/html.py
+++ b/tests/ui_tests/reporting/html.py
@@ -1,15 +1,30 @@
-import base64
-import filecmp
-from itertools import zip_longest
+from __future__ import annotations
+
+import shutil
 from pathlib import Path
-from typing import Dict, List, Optional
+from typing import Iterable
 
 from dominate import document
 from dominate.tags import a, i, img, table, td, th, tr
+from PIL import Image
+
+from ..common import UI_TESTS_DIR
+
+_IMAGE_DIR = UI_TESTS_DIR / "images"
+
+
+def set_image_dir(path: Path) -> None:
+    global _IMAGE_DIR
+    _IMAGE_DIR = path
+
+
+def store_images(screens: Iterable[Path], hashes: Iterable[str]) -> None:
+    for screen, hash in zip(screens, hashes):
+        shutil.copy(screen, _IMAGE_DIR / f"{hash}.png")
 
 
 def report_links(
-    tests: List[Path], reports_path: Path, actual_hashes: Dict[str, str] = None
+    tests: list[Path], reports_path: Path, actual_hashes: dict[str, str] | None = None
 ) -> None:
     if actual_hashes is None:
         actual_hashes = {}
@@ -31,42 +46,53 @@ def write(fixture_test_path: Path, doc: document, filename: str) -> Path:
     return fixture_test_path / filename
 
 
-def image_column(src: Path, image_width: Optional[int] = None) -> None:
+def image_column(hash: str | None, cur_dir: Path) -> None:
     """Put image into table as one cell."""
     with td():
-        if src:
-            image_raw(src, image_width)
+        if hash:
+            image_link(hash, cur_dir)
         else:
             i("missing")
 
 
-def image_raw(src: Path, image_width: Optional[int] = None) -> None:
-    """Display image on the screen"""
-    # open image file
-    image = src.read_bytes()
-    # encode image as base64
-    image = base64.b64encode(image)
-    # convert output to str
-    image = image.decode()
-    # img(src=src.relative_to(fixture_test_path))
+def _relative_path(cur_dir: Path, path_to: Path) -> str:
+    """Find best relative path to refer to path_to from cur_dir."""
+    cur_dir = cur_dir.resolve()
+    path_to = path_to.resolve()
+    if not cur_dir.is_dir():
+        cur_dir = cur_dir.parent
+
+    common = cur_dir
+    while common not in path_to.parents:
+        common = common.parent
+    ascent = len(cur_dir.parts) - len(common.parts)
+    relpath = path_to.relative_to(common)
+    components = [".."] * ascent + list(relpath.parts)
+    return "/".join(components)
+
+
+def image_link(hash: str, cur_dir: Path, title: str = "") -> None:
+    """Put image into table as one cell."""
+    path = _IMAGE_DIR / f"{hash}.png"
+    im = Image.open(path)
+    width = im.width
+    if width < 240:
+        width *= 2
+
     img(
-        src="data:image/png;base64, " + image,
-        style=f"width: {image_width}px; image-rendering: pixelated;"
-        if image_width
-        else "",
+        src=_relative_path(cur_dir, path),
+        style=f"width: {width}px; image-rendering: pixelated;",
+        title=title,
+        loading="lazy",
     )
 
 
-def diff_table(
-    left_screens: List[Path],
-    right_screens: List[Path],
-    image_width: Optional[int] = None,
-) -> None:
-    for left, right in zip_longest(left_screens, right_screens):
-        if left and right and filecmp.cmp(right, left):
+def diff_table(diff: Iterable[tuple[str | None, str | None]], cur_dir: Path) -> None:
+    for left, right in diff:
+        if left == right:
             background = "white"
         else:
             background = "red"
         with tr(bgcolor=background):
-            image_column(left, image_width)
-            image_column(right, image_width)
+            image_column(left, cur_dir)
+            image_column(right, cur_dir)
diff --git a/tests/ui_tests/reporting/master_diff.py b/tests/ui_tests/reporting/master_diff.py
new file mode 100644
index 000000000..0c706d37d
--- /dev/null
+++ b/tests/ui_tests/reporting/master_diff.py
@@ -0,0 +1,261 @@
+from __future__ import annotations
+
+import shutil
+import tempfile
+from contextlib import contextmanager
+from pathlib import Path
+from typing import Any
+
+import dominate
+from dominate.tags import br, h1, h2, hr, i, p, table, td, th, tr
+
+from ..common import (
+    SCREENS_DIR,
+    FixturesType,
+    get_fixtures,
+    screens_and_hashes,
+    screens_diff,
+)
+from . import download, html
+from .testreport import REPORTS_PATH
+
+MASTERDIFF_PATH = REPORTS_PATH / "master_diff"
+IMAGES_PATH = MASTERDIFF_PATH / "images"
+
+
+def _preprocess_master_compat(master_fixtures: dict[str, Any]) -> FixturesType:
+    if all(isinstance(v, str) for v in master_fixtures.values()):
+        # old format, convert to new format
+        new_fixtures = {}
+        for key, val in master_fixtures.items():
+            model, _test = key.split("_", maxsplit=1)
+            groups_by_model = new_fixtures.setdefault(model, {})
+            default_group = groups_by_model.setdefault("device_tests", {})
+            default_group[key] = val
+        return FixturesType(new_fixtures)
+    else:
+        return FixturesType(master_fixtures)
+
+
+def get_diff() -> tuple[dict[str, str], dict[str, str], dict[str, tuple[str, str]]]:
+    master = _preprocess_master_compat(download.fetch_fixtures_master())
+    current = get_fixtures()
+
+    removed = {}
+    added = {}
+    diff = {}
+
+    for model in master.keys() | current.keys():
+        master_groups = master.get(model, {})
+        current_groups = current.get(model, {})
+        for group in master_groups.keys() | current_groups.keys():
+            master_tests = master_groups.get(group, {})
+            current_tests = current_groups.get(group, {})
+
+            print(f"checking model {model}, group {group}...")
+
+            def testname(test: str) -> str:
+                assert test.startswith(model + "_")
+                test = test[len(model) + 1 :]
+                return f"{model}-{group}-{test}"
+
+            # removed items
+            removed_here = {
+                testname(test): master_tests[test]
+                for test in (master_tests.keys() - current_tests.keys())
+            }
+            # added items
+            added_here = {
+                testname(test): current_tests[test]
+                for test in (current_tests.keys() - master_tests.keys())
+            }
+            # items in both branches
+            same = master_tests.items() - removed_here.items() - added_here.items()
+            # create the diff
+            diff_here = {}
+            for master_test, master_hash in same:
+                if current_tests.get(master_test) == master_hash:
+                    continue
+                diff_here[testname(master_test)] = (
+                    master_tests[master_test],
+                    current_tests[master_test],
+                )
+
+            removed.update(removed_here)
+            added.update(added_here)
+            diff.update(diff_here)
+            print(f"  removed: {len(removed_here)}")
+            print(f"  added: {len(added_here)}")
+            print(f"  diff: {len(diff_here)}")
+
+    return removed, added, diff
+
+
+def removed(screens_path: Path, test_name: str) -> Path:
+    doc = dominate.document(title=test_name)
+    screens, hashes = screens_and_hashes(screens_path)
+    html.store_images(screens, hashes)
+
+    with doc:
+        h1(test_name)
+        p(
+            "This UI test has been removed from fixtures.json.",
+            style="color: red; font-weight: bold;",
+        )
+        hr()
+
+        with table(border=1):
+            with tr():
+                th("Removed files")
+
+            for hash in hashes:
+                with tr():
+                    html.image_column(hash, MASTERDIFF_PATH / "removed")
+
+    return html.write(MASTERDIFF_PATH / "removed", doc, test_name + ".html")
+
+
+def added(screens_path: Path, test_name: str) -> Path:
+    doc = dominate.document(title=test_name)
+    screens, hashes = screens_and_hashes(screens_path)
+    html.store_images(screens, hashes)
+
+    with doc:
+        h1(test_name)
+        p(
+            "This UI test has been added to fixtures.json.",
+            style="color: green; font-weight: bold;",
+        )
+        hr()
+
+        with table(border=1):
+            with tr():
+                th("Added files")
+
+            for hash in hashes:
+                with tr():
+                    html.image_column(hash, MASTERDIFF_PATH / "added")
+
+    return html.write(MASTERDIFF_PATH / "added", doc, test_name + ".html")
+
+
+def diff(
+    master_screens_path: Path,
+    current_screens_path: Path,
+    test_name: str,
+    master_hash: str,
+    current_hash: str,
+) -> Path:
+    doc = dominate.document(title=test_name)
+    master_screens, master_hashes = screens_and_hashes(master_screens_path)
+    current_screens, current_hashes = screens_and_hashes(current_screens_path)
+    html.store_images(master_screens, master_hashes)
+    html.store_images(current_screens, current_hashes)
+
+    with doc:
+        h1(test_name)
+        p("This UI test differs from master.", style="color: grey; font-weight: bold;")
+        with table():
+            with tr():
+                td("Master:")
+                td(master_hash, style="color: red;")
+            with tr():
+                td("Current:")
+                td(current_hash, style="color: green;")
+        hr()
+
+        with table(border=1, width=600):
+            with tr():
+                th("Master")
+                th("Current branch")
+
+            html.diff_table(
+                screens_diff(master_hashes, current_hashes), MASTERDIFF_PATH / "diff"
+            )
+
+    return html.write(MASTERDIFF_PATH / "diff", doc, test_name + ".html")
+
+
+def index() -> Path:
+    removed = list((MASTERDIFF_PATH / "removed").iterdir())
+    added = list((MASTERDIFF_PATH / "added").iterdir())
+    diff = list((MASTERDIFF_PATH / "diff").iterdir())
+
+    title = "UI changes from master"
+    doc = dominate.document(title=title)
+
+    with doc:
+        h1("UI changes from master")
+        hr()
+
+        h2("Removed:", style="color: red;")
+        i("UI fixtures that have been removed:")
+        html.report_links(removed, MASTERDIFF_PATH)
+        br()
+        hr()
+
+        h2("Added:", style="color: green;")
+        i("UI fixtures that have been added:")
+        html.report_links(added, MASTERDIFF_PATH)
+        br()
+        hr()
+
+        h2("Differs:", style="color: grey;")
+        i("UI fixtures that have been modified:")
+        html.report_links(diff, MASTERDIFF_PATH)
+
+    return html.write(MASTERDIFF_PATH, doc, "index.html")
+
+
+def create_dirs() -> None:
+    # delete the reports dir to clear previous entries and create folders
+    shutil.rmtree(MASTERDIFF_PATH, ignore_errors=True)
+    MASTERDIFF_PATH.mkdir(parents=True)
+    (MASTERDIFF_PATH / "removed").mkdir()
+    (MASTERDIFF_PATH / "added").mkdir()
+    (MASTERDIFF_PATH / "diff").mkdir()
+    IMAGES_PATH.mkdir(exist_ok=True)
+
+
+def create_reports() -> None:
+    removed_tests, added_tests, diff_tests = get_diff()
+
+    @contextmanager
+    def tmpdir():
+        with tempfile.TemporaryDirectory(prefix="trezor-records-") as temp_dir:
+            yield Path(temp_dir)
+
+    for test_name, test_hash in removed_tests.items():
+        with tmpdir() as temp_dir:
+            download.fetch_recorded(test_hash, temp_dir)
+            removed(temp_dir, test_name)
+
+    for test_name, test_hash in added_tests.items():
+        path = SCREENS_DIR / test_name / "actual"
+        if not path.exists():
+            raise RuntimeError("Folder does not exist, has it been recorded?", path)
+        added(path, test_name)
+
+    for test_name, (master_hash, current_hash) in diff_tests.items():
+        with tmpdir() as master_screens:
+            download.fetch_recorded(master_hash, master_screens)
+
+            current_screens = SCREENS_DIR / test_name / "actual"
+            if not current_screens.exists():
+                raise RuntimeError(
+                    "Folder does not exist, did the test run?", current_screens
+                )
+            diff(
+                master_screens,
+                current_screens,
+                test_name,
+                master_hash,
+                current_hash,
+            )
+
+
+def main() -> None:
+    create_dirs()
+    html.set_image_dir(IMAGES_PATH)
+    create_reports()
+    index()
diff --git a/tests/ui_tests/reporting/report_master_diff.py b/tests/ui_tests/reporting/report_master_diff.py
deleted file mode 100644
index 314a8d287..000000000
--- a/tests/ui_tests/reporting/report_master_diff.py
+++ /dev/null
@@ -1,206 +0,0 @@
-import shutil
-import sys
-import tempfile
-from contextlib import contextmanager
-from pathlib import Path
-from typing import Dict, Sequence, Tuple
-
-import dominate
-from dominate.tags import br, h1, h2, hr, i, p, table, td, th, tr
-
-# These are imported directly because this script is run directly, isort gets confused by that.
-import download  # isort:skip
-import html  # isort:skip
-
-REPORTS_PATH = Path(__file__).resolve().parent / "reports" / "master_diff"
-RECORDED_SCREENS_PATH = Path(__file__).resolve().parent.parent / "screens"
-
-
-def get_diff(
-    test_prefixes: Sequence[str],
-) -> Tuple[Dict[str, str], Dict[str, str], Dict[str, str]]:
-    master = download.fetch_fixtures_master()
-    current = download.fetch_fixtures_current()
-
-    def matches_prefix(name: str) -> bool:
-        return any(name.startswith(prefix) for prefix in test_prefixes)
-
-    master = {name: value for name, value in master.items() if matches_prefix(name)}
-    current = {name: value for name, value in current.items() if matches_prefix(name)}
-
-    # removed items
-    removed = {test: master[test] for test in (master.keys() - current.keys())}
-    # added items
-    added = {test: current[test] for test in (current.keys() - master.keys())}
-    # items in both branches
-    same = master.items() - removed.items() - added.items()
-    # create the diff
-    diff = dict()
-    for master_test, master_hash in same:
-        if current.get(master_test) == master_hash:
-            continue
-        diff[master_test] = master[master_test], current[master_test]
-
-    return removed, added, diff
-
-
-def removed(screens_path: Path, test_name: str) -> Path:
-    doc = dominate.document(title=test_name)
-    screens = sorted(screens_path.iterdir())
-
-    with doc:
-        h1(test_name)
-        p(
-            "This UI test has been removed from fixtures.json.",
-            style="color: red; font-weight: bold;",
-        )
-        hr()
-
-        with table(border=1):
-            with tr():
-                th("Removed files")
-
-            for screen in screens:
-                with tr():
-                    html.image_column(screen)
-
-    return html.write(REPORTS_PATH / "removed", doc, test_name + ".html")
-
-
-def added(screens_path: Path, test_name: str) -> Path:
-    doc = dominate.document(title=test_name)
-    screens = sorted(screens_path.iterdir())
-
-    with doc:
-        h1(test_name)
-        p(
-            "This UI test has been added to fixtures.json.",
-            style="color: green; font-weight: bold;",
-        )
-        hr()
-
-        with table(border=1):
-            with tr():
-                th("Added files")
-
-            for screen in screens:
-                with tr():
-                    html.image_column(screen)
-
-    return html.write(REPORTS_PATH / "added", doc, test_name + ".html")
-
-
-def diff(
-    master_screens_path: Path,
-    current_screens_path: Path,
-    test_name: str,
-    master_hash: str,
-    current_hash: str,
-) -> Path:
-    doc = dominate.document(title=test_name)
-    master_screens = sorted(master_screens_path.iterdir())
-    current_screens = sorted(current_screens_path.iterdir())
-
-    with doc:
-        h1(test_name)
-        p("This UI test differs from master.", style="color: grey; font-weight: bold;")
-        with table():
-            with tr():
-                td("Master:")
-                td(master_hash, style="color: red;")
-            with tr():
-                td("Current:")
-                td(current_hash, style="color: green;")
-        hr()
-
-        with table(border=1, width=600):
-            with tr():
-                th("Master")
-                th("Current branch")
-
-            html.diff_table(master_screens, current_screens)
-
-    return html.write(REPORTS_PATH / "diff", doc, test_name + ".html")
-
-
-def index() -> Path:
-    removed = list((REPORTS_PATH / "removed").iterdir())
-    added = list((REPORTS_PATH / "added").iterdir())
-    diff = list((REPORTS_PATH / "diff").iterdir())
-
-    title = "UI changes from master"
-    doc = dominate.document(title=title)
-
-    with doc:
-        h1("UI changes from master")
-        hr()
-
-        h2("Removed:", style="color: red;")
-        i("UI fixtures that have been removed:")
-        html.report_links(removed, REPORTS_PATH)
-        br()
-        hr()
-
-        h2("Added:", style="color: green;")
-        i("UI fixtures that have been added:")
-        html.report_links(added, REPORTS_PATH)
-        br()
-        hr()
-
-        h2("Differs:", style="color: grey;")
-        i("UI fixtures that have been modified:")
-        html.report_links(diff, REPORTS_PATH)
-
-    return html.write(REPORTS_PATH, doc, "index.html")
-
-
-def create_dirs() -> None:
-    # delete the reports dir to clear previous entries and create folders
-    shutil.rmtree(REPORTS_PATH, ignore_errors=True)
-    REPORTS_PATH.mkdir()
-    (REPORTS_PATH / "removed").mkdir()
-    (REPORTS_PATH / "added").mkdir()
-    (REPORTS_PATH / "diff").mkdir()
-
-
-def create_reports(test_prefixes: Sequence[str]) -> None:
-    removed_tests, added_tests, diff_tests = get_diff(test_prefixes)
-
-    @contextmanager
-    def tmpdir():
-        with tempfile.TemporaryDirectory(prefix="trezor-records-") as temp_dir:
-            yield Path(temp_dir)
-
-    for test_name, test_hash in removed_tests.items():
-        with tmpdir() as temp_dir:
-            download.fetch_recorded(test_hash, temp_dir)
-            removed(temp_dir, test_name)
-
-    for test_name, test_hash in added_tests.items():
-        path = RECORDED_SCREENS_PATH / test_name / "actual"
-        if not path.exists():
-            raise RuntimeError("Folder does not exist, has it been recorded?", path)
-        added(path, test_name)
-
-    for test_name, (master_hash, current_hash) in diff_tests.items():
-        with tmpdir() as master_screens:
-            download.fetch_recorded(master_hash, master_screens)
-
-            current_screens = RECORDED_SCREENS_PATH / test_name / "actual"
-            if not current_screens.exists():
-                raise RuntimeError(
-                    "Folder does not exist, has it been recorded?", current_screens
-                )
-            diff(
-                master_screens,
-                current_screens,
-                test_name,
-                master_hash,
-                current_hash,
-            )
-
-
-if __name__ == "__main__":
-    create_dirs()
-    create_reports(sys.argv[1:] or [""])
-    index()
diff --git a/tests/ui_tests/reporting/testreport.py b/tests/ui_tests/reporting/testreport.py
index 4c496a011..932bc718b 100644
--- a/tests/ui_tests/reporting/testreport.py
+++ b/tests/ui_tests/reporting/testreport.py
@@ -1,47 +1,33 @@
-import hashlib
+from __future__ import annotations
+
 import shutil
 from collections import defaultdict
 from datetime import datetime
-from distutils.dir_util import copy_tree
 from pathlib import Path
-from typing import Dict, List, Set
 
 import dominate
 import dominate.tags as t
 from dominate.tags import a, div, h1, h2, hr, p, span, strong, table, th, tr
 from dominate.util import text
 
+from ..common import UI_TESTS_DIR, TestCase, TestResult
 from . import download, html
 
 HERE = Path(__file__).resolve().parent
-REPORTS_PATH = HERE / "reports" / "test"
-RECORDED_SCREENS_PATH = Path(__file__).resolve().parent.parent / "screens"
+REPORTS_PATH = UI_TESTS_DIR / "reports"
+TESTREPORT_PATH = REPORTS_PATH / "test"
+IMAGES_PATH = TESTREPORT_PATH / "images"
 
 STYLE = (HERE / "testreport.css").read_text()
 SCRIPT = (HERE / "testreport.js").read_text()
-SCREENSHOTS_WIDTH_PX_TO_DISPLAY = {
-    "T1": 128 * 2,  # original is 128px
-    "TT": 240,  # original is 240px
-    "TR": 128 * 2,  # original is 128px
-}
 
 # These two html files are referencing each other
 ALL_SCREENS = "all_screens.html"
 ALL_UNIQUE_SCREENS = "all_unique_screens.html"
 
-ACTUAL_HASHES: Dict[str, str] = {}
-
-
-def _image_width(test_name: str) -> int:
-    """Return the width of the image to display for the given test name.
-
-    Is model-specific. Model is at the beginning of each test-case.
-    """
-    return SCREENSHOTS_WIDTH_PX_TO_DISPLAY[test_name[:2]]
-
 
 def document(
-    title: str, actual_hash: str = None, index: bool = False
+    title: str, actual_hash: str | None = None, index: bool = False
 ) -> dominate.document:
     doc = dominate.document(title=title)
     style = t.style()
@@ -59,7 +45,7 @@ def document(
     return doc
 
 
-def _header(test_name: str, expected_hash: str, actual_hash: str) -> None:
+def _header(test_name: str, expected_hash: str | None, actual_hash: str) -> None:
     h1(test_name)
     with div():
         if actual_hash == expected_hash:
@@ -67,28 +53,43 @@ def _header(test_name: str, expected_hash: str, actual_hash: str) -> None:
                 "This test succeeded on UI comparison.",
                 style="color: green; font-weight: bold;",
             )
+        elif expected_hash is None:
+            p(
+                "This test is new and has no expected hash.",
+                style="color: blue; font-weight: bold;",
+            )
         else:
             p(
                 "This test failed on UI comparison.",
                 style="color: red; font-weight: bold;",
             )
-        p("Expected: ", expected_hash)
+        p("Expected: ", expected_hash or "(new test case)")
         p("Actual: ", actual_hash)
     hr()
 
 
-def clear_dir() -> None:
+def setup(main_runner: bool) -> None:
     """Delete and create the reports dir to clear previous entries."""
-    shutil.rmtree(REPORTS_PATH, ignore_errors=True)
-    REPORTS_PATH.mkdir()
-    (REPORTS_PATH / "failed").mkdir()
-    (REPORTS_PATH / "passed").mkdir()
+    if main_runner:
+        shutil.rmtree(TESTREPORT_PATH, ignore_errors=True)
+        TESTREPORT_PATH.mkdir()
+        (TESTREPORT_PATH / "failed").mkdir()
+        (TESTREPORT_PATH / "passed").mkdir()
+        (TESTREPORT_PATH / "new").mkdir()
+        IMAGES_PATH.mkdir(parents=True)
+
+    html.set_image_dir(IMAGES_PATH)
 
 
 def index() -> Path:
     """Generate index.html with all the test results - lists of failed and passed tests."""
-    passed_tests = list((REPORTS_PATH / "passed").iterdir())
-    failed_tests = list((REPORTS_PATH / "failed").iterdir())
+    passed_tests = list((TESTREPORT_PATH / "passed").iterdir())
+    failed_tests = list((TESTREPORT_PATH / "failed").iterdir())
+    new_tests = list((TESTREPORT_PATH / "new").iterdir())
+
+    actual_hashes = {
+        result.test.id: result.actual_hash for result in TestResult.recent_tests()
+    }
 
     title = "UI Test report " + datetime.now().strftime("%Y-%m-%d %H:%M:%S")
     doc = document(title=title, index=True)
@@ -120,15 +121,18 @@ def index() -> Path:
                     t.span("marked BAD", style="color: darkred")
                     t.button("clear", onclick="resetState('bad')")
 
-        html.report_links(failed_tests, REPORTS_PATH, ACTUAL_HASHES)
+        html.report_links(failed_tests, TESTREPORT_PATH, actual_hashes)
+
+        h2("New tests", style="color: blue;")
+        html.report_links(new_tests, TESTREPORT_PATH)
 
         h2("Passed", style="color: green;")
-        html.report_links(passed_tests, REPORTS_PATH)
+        html.report_links(passed_tests, TESTREPORT_PATH)
 
-    return html.write(REPORTS_PATH, doc, "index.html")
+    return html.write(TESTREPORT_PATH, doc, "index.html")
 
 
-def all_screens(test_case_dirs: List[Path]) -> Path:
+def all_screens() -> Path:
     """Generate an HTML file for all the screens from the current test run.
 
     Shows all test-cases at one place.
@@ -141,24 +145,33 @@ def all_screens(test_case_dirs: List[Path]) -> Path:
         hr()
 
         count = 0
-        for test_case_dir in test_case_dirs:
-            test_case_name = test_case_dir.name
-            h2(test_case_name, id=test_case_name)
-            actual_dir = test_case_dir / "actual"
-            for png in sorted(actual_dir.rglob("*.png")):
+        result_count = 0
+        for result in TestResult.recent_tests():
+            result_count += 1
+            h2(result.test.id, id=result.test.id)
+            for image in result.images:
                 # Including link to each image to see where else it occurs.
-                png_hash = _img_hash(png)
-                with a(href=f"{ALL_UNIQUE_SCREENS}#{png_hash}"):
-                    html.image_raw(png, _image_width(test_case_name))
+                with a(href=f"{ALL_UNIQUE_SCREENS}#{image}"):
+                    html.image_link(image, TESTREPORT_PATH)
                 count += 1
 
-        h2(f"{count} screens from {len(test_case_dirs)} testcases.")
+        h2(f"{count} screens from {result_count} testcases.")
 
-    return html.write(REPORTS_PATH, doc, ALL_SCREENS)
+    return html.write(TESTREPORT_PATH, doc, ALL_SCREENS)
 
 
-def all_unique_screens(test_case_dirs: List[Path]) -> Path:
+def all_unique_screens() -> Path:
     """Generate an HTML file with all the unique screens from the current test run."""
+    results = TestResult.recent_tests()
+    result_count = 0
+    test_cases = defaultdict(list)
+    for result in results:
+        result_count += 1
+        for image in result.images:
+            test_cases[image].append(result.test.id)
+
+    test_case_pairs = sorted(test_cases.items(), key=lambda x: len(x[1]), reverse=True)
+
     title = "All unique screens"
     doc = dominate.document(title=title)
 
@@ -166,106 +179,66 @@ def all_unique_screens(test_case_dirs: List[Path]) -> Path:
         h1("All unique screens")
         hr()
 
-        screen_hashes: Dict[str, List[Path]] = defaultdict(list)
-        hash_images: Dict[str, Path] = {}
-
-        # Adding all unique images onto the page
-        for test_case_dir in test_case_dirs:
-            actual_dir = test_case_dir / "actual"
-            for png in sorted(actual_dir.rglob("*.png")):
-                png_hash = _img_hash(png)
-                if png_hash not in screen_hashes:
-                    # Adding link to the appropriate hash, where other testcases
-                    # with the same hash (screen) are listed.
-                    with a(href=f"#{png_hash}"):
-                        with span(id=png_hash[:8]):
-                            html.image_raw(png, _image_width(test_case_dir.name))
-
-                screen_hashes[png_hash].append(test_case_dir)
-                hash_images[png_hash] = png
+        for hash, tests in test_case_pairs:
+            # Adding link to the appropriate hash, where other testcases
+            # with the same hash (screen) are listed.
+            with a(href=f"#{hash}"):
+                with span(id="l-" + hash):
+                    html.image_link(
+                        hash, TESTREPORT_PATH, title=f"{len(tests)} testcases)"
+                    )
 
         # Adding all screen hashes together with links to testcases having these screens.
-        for png_hash, test_cases in screen_hashes.items():
-            h2(png_hash)
-            with div(id=png_hash):
-                # Showing the exact image as well (not magnifying it)
-                with a(href=f"#{png_hash[:8]}"):
-                    html.image_raw(hash_images[png_hash])
-                for case in test_cases:
+        for hash, tests in test_case_pairs:
+            h2(hash)
+            with div(id=hash):
+                with a(href=f"#l-{hash}"):
+                    html.image_link(hash, TESTREPORT_PATH)
+                for case in tests:
                     # Adding link to each test-case
-                    with a(href=f"{ALL_SCREENS}#{case.name}"):
-                        p(case.name.split("/")[-1])
+                    with a(href=f"{ALL_SCREENS}#{case}"):
+                        p(case)
 
-        h2(f"{len(screen_hashes)} unique screens from {len(test_case_dirs)} testcases.")
+        h2(f"{len(test_case_pairs)} unique screens from {result_count} testcases.")
 
-    return html.write(REPORTS_PATH, doc, ALL_UNIQUE_SCREENS)
+    return html.write(TESTREPORT_PATH, doc, ALL_UNIQUE_SCREENS)
 
 
 def generate_reports() -> None:
     """Generate HTML reports for the test."""
+    html.set_image_dir(IMAGES_PATH)
     index()
+    all_screens()
+    all_unique_screens()
 
-    # To only get screens from the last running test-cases,
-    # we need to get the list of all directories with screenshots.
-    current_testcases = _get_testcases_dirs()
-    all_screens(current_testcases)
-    all_unique_screens(current_testcases)
-
-
-def _img_hash(img: Path) -> str:
-    """Return the hash of the image."""
-    content = img.read_bytes()
-    return hashlib.md5(content).hexdigest()
-
-
-def _get_testcases_dirs() -> List[Path]:
-    """Get the list of test-cases dirs that the current test was running."""
-    current_testcases = _get_all_current_testcases()
-    all_test_cases_dirs = [
-        case
-        for case in (RECORDED_SCREENS_PATH).iterdir()
-        if case.name in current_testcases
-    ]
-    return sorted(all_test_cases_dirs)
-
-
-def _get_all_current_testcases() -> Set[str]:
-    """Get names of all current test-cases.
 
-    Equals to the names of HTML files in the reports dir.
-    """
-    passed_tests = list((REPORTS_PATH / "passed").glob("*.html"))
-    failed_tests = list((REPORTS_PATH / "failed").glob("*.html"))
-    return {test.stem for test in (passed_tests + failed_tests)}
+def _copy_deduplicated(test: TestCase) -> None:
+    """Copy the actual screenshots to the deduplicated dir."""
+    html.store_images(*test.actual_screens)
+    html.store_images(*test.recorded_screens)
 
 
-def failed(
-    fixture_test_path: Path, test_name: str, actual_hash: str, expected_hash: str
-) -> Path:
+def failed(result: TestResult) -> Path:
     """Generate an HTML file for a failed test-case.
 
     Compares the actual screenshots to the expected ones.
     """
-    ACTUAL_HASHES[test_name] = actual_hash
-
-    doc = document(title=test_name, actual_hash=actual_hash)
-    recorded_path = fixture_test_path / "recorded"
-    actual_path = fixture_test_path / "actual"
-
     download_failed = False
 
-    if not recorded_path.exists():
-        recorded_path.mkdir()
-    try:
-        download.fetch_recorded(expected_hash, recorded_path)
-    except Exception:
-        download_failed = True
+    if not result.test.recorded_dir.exists():
+        result.test.recorded_dir.mkdir()
+
+    if result.expected_hash:
+        try:
+            download.fetch_recorded(result.expected_hash, result.test.recorded_dir)
+        except Exception:
+            download_failed = True
 
-    recorded_screens = sorted(recorded_path.iterdir())
-    actual_screens = sorted(actual_path.iterdir())
+    _copy_deduplicated(result.test)
 
+    doc = document(title=result.test.id, actual_hash=result.actual_hash)
     with doc:
-        _header(test_name, expected_hash, actual_hash)
+        _header(result.test.id, result.expected_hash, result.actual_hash)
 
         with div(id="markbox", _class="script-hidden"):
             p("Click a button to mark the test result as:")
@@ -284,39 +257,39 @@ def failed(
                 th("Expected")
                 th("Actual")
 
-            html.diff_table(
-                recorded_screens,
-                actual_screens,
-                _image_width(test_name),
-            )
+            html.diff_table(result.diff_lines(), TESTREPORT_PATH / "failed")
 
-    return html.write(REPORTS_PATH / "failed", doc, test_name + ".html")
+    return html.write(TESTREPORT_PATH / "failed", doc, result.test.id + ".html")
 
 
-def passed(fixture_test_path: Path, test_name: str, actual_hash: str) -> Path:
+def passed(result: TestResult) -> Path:
     """Generate an HTML file for a passed test-case."""
-    copy_tree(str(fixture_test_path / "actual"), str(fixture_test_path / "recorded"))
+    return recorded(result, header="Passed")
+
 
-    return recorded(fixture_test_path / "actual", test_name, actual_hash)
+def missing(result: TestResult) -> Path:
+    """Generate an HTML file for a newly seen test-case."""
+    return recorded(result, header="New testcase")
 
 
-def recorded(fixture_test_path: Path, test_name: str, actual_hash: str) -> Path:
+def recorded(result: TestResult, header: str = "Recorded") -> Path:
     """Generate an HTML file for a passed test-case.
 
     Shows all the screens from it in exact order.
     """
-    doc = document(title=test_name)
-    actual_screens = sorted(fixture_test_path.iterdir())
+    _copy_deduplicated(result.test)
+
+    doc = document(title=result.test.id)
 
     with doc:
-        _header(test_name, actual_hash, actual_hash)
+        _header(result.test.id, result.actual_hash, result.actual_hash)
 
         with table(border=1):
             with tr():
-                th("Recorded")
+                th(header)
 
-            for screen in actual_screens:
+            for screen in result.images:
                 with tr():
-                    html.image_column(screen, _image_width(test_name))
+                    html.image_column(screen, TESTREPORT_PATH / "new")
 
-    return html.write(REPORTS_PATH / "passed", doc, test_name + ".html")
+    return html.write(TESTREPORT_PATH / "new", doc, result.test.id + ".html")
diff --git a/tests/ui_tests/reporting/reports/.keep b/tests/ui_tests/reports/.keep
similarity index 100%
rename from tests/ui_tests/reporting/reports/.keep
rename to tests/ui_tests/reports/.keep
diff --git a/tests/update_fixtures.py b/tests/update_fixtures.py
index 3f4848e1f..2632a49b7 100755
--- a/tests/update_fixtures.py
+++ b/tests/update_fixtures.py
@@ -1,7 +1,16 @@
 #!/usr/bin/env python3
+import click
 
-from ui_tests import update_fixtures_with_diff
+from ui_tests import update_fixtures
 
-changes_amount = update_fixtures_with_diff()
 
-print(f"{changes_amount} hashes updated in fixtures.json file.")
+@click.command()
+@click.option("-r", "--remove-missing", is_flag=True, help="Remove missing tests")
+def main(remove_missing: bool) -> None:
+    """Update fixtures file with results from latest test run."""
+    changes_amount = update_fixtures(remove_missing)
+    print(f"Updated fixtures.json with data from {changes_amount} tests.")
+
+
+if __name__ == "__main__":
+    main()