From 5572b932fff83b7a23ae5b109b06a74cf27f7485 Mon Sep 17 00:00:00 2001
From: Martin Milata <martin@martinmilata.cz>
Date: Tue, 13 Feb 2024 23:49:41 +0100
Subject: [PATCH] docs(core): document translations blob format

[skip_ci]
---
 docs/SUMMARY.md                |  1 +
 docs/core/misc/index.md        |  1 +
 docs/core/misc/translations.md | 74 ++++++++++++++++++++++++++++++++++
 3 files changed, 76 insertions(+)
 create mode 100644 docs/core/misc/translations.md

diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md
index 9795b1cf8..a9de23f72 100644
--- a/docs/SUMMARY.md
+++ b/docs/SUMMARY.md
@@ -18,6 +18,7 @@
     - [SLIP-39](core/misc/slip0039.md)
     - [Exceptions usage](core/misc/exceptions.md)
     - [Memory fragmentation management](core/misc/fragmentation.md)
+    - [Translation data format](core/misc/translations.md)
     - [DISC1](core/misc/disc1.md)
 - [Legacy](legacy/index.md)
     - [Firmware format](legacy/firmware-format.md)
diff --git a/docs/core/misc/index.md b/docs/core/misc/index.md
index 6e29523b7..fe2ea83d9 100644
--- a/docs/core/misc/index.md
+++ b/docs/core/misc/index.md
@@ -7,3 +7,4 @@ Topics that do not fit elsewhere:
 - [Exceptions usage](exceptions.md)
 - [Memory fragmentation management](fragmentation.md)
 - [Running Trezor firmware on STM32F429I-DISC1](disc1.md)
+- [Translation data format](translations.md)
diff --git a/docs/core/misc/translations.md b/docs/core/misc/translations.md
new file mode 100644
index 000000000..7c3c7b47d
--- /dev/null
+++ b/docs/core/misc/translations.md
@@ -0,0 +1,74 @@
+# Translations blob format
+
+| offset | length             | name              | description                                       | hash   |
+|-------:|-------------------:|-------------------|---------------------------------------------------|--------|
+| 0x0000 |                  6 | magic             | blob magic `TRTR00`                               |        |
+| 0x0006 |                  2 | container\_len    | total length (up to padding)                      |        |
+| 0x0008 |                  2 | header\_len       | header length                                     |        |
+| 0x000A |                  2 | header\_magic     | header magic `TR`                                 |        |
+| 0x000C |                  8 | language\_tag     | BCP 47 language tag (e.g. `cs-CZ`, `en-US`, ...)  | header |
+| 0x0014 |                  4 | version           | 4 bytes of version (major, minor, patch, build)   | header |
+| 0x0018 |                  2 | data\_len         | length of the raw data, i.e. translations + fonts | header |
+| 0x001A |                 32 | data\_hash        | SHA-256 hash of the data                          | header |
+| 0x003A |  `header_len - 48` | ignored           | reserved for forward compatibility                | header |
+|      ? |                  2 | proof\_len        | length of merkle proof and signature in bytes     |        |
+|      ? |                  1 | proof\_count      | number of merkle proof items following            |        |
+|      ? | `proof_count * 20` | proof             | array of sha256 hashes                            |        |
+|      ? |                  1 | sig\_mask         | CoSi signature mask                               |        |
+|      ? |                 64 | sig               | ed25519 CoSi signature of merkle root             |        |
+|      ? |                  2 | translations\_len | length of the translated strings                  | data   |
+|      ? | `translations_len` | translations      | translated string data                            | data   |
+|      ? |                  2 | fonts\_len        | length of the font data                           | data   |
+|      ? |        `fonts_len` | fonts             | font data                                         | data   |
+|      ? |                  ? | padding           | `0xff` bytes padding to flash sector boundary     |        |
+
+## Translation data
+
+Offsets refer to the strings field, up to the following offset. First offset is
+always 0, following offset must always be equal or greater (equal denotes empty
+string).
+
+| offset | length                               | name              | description                                            |
+|-------:|-------------------------------------:|-------------------|--------------------------------------------------------|
+| 0x0000 | 2                                    | count             | number of offsets, excluding the sentinel              |
+| 0x0002 | 2                                    | offset[0]         | offset of string id 0 in the `strings` field           |
+| ...    | 2                                    | ...               |                                                        |
+| ?      | 2                                    | offset[count - 1] | offset of string id `count - 1` in the `strings` field |
+| ?      | 2                                    | offset[count]     | offset past the last element                           |
+| ?      | `translations_len - 2 * (count + 2)` | strings           | concatenation of UTF-8 strings                         |
+
+## Fonts
+
+Ids must be in increasing order, offsets must be in non-decreasing order. First
+offset must be 0.
+
+| offset | length                               | name              | description                                                 |
+|-------:|-------------------------------------:|-------------------|-------------------------------------------------------------|
+| 0x0000 | 2                                    | count             | number of items in the offset table, excluding the sentinel |
+| 0x0002 | 2                                    | id[0]             | numeric id of the first font                                |
+| 0x0004 | 2                                    | offset[0]         | offset of the first font in the `fonts` field               |
+| ...    | ...                                  | ...               |                                                             |
+| ?      | ?                                    | id[count - 1]     | numeric id of the last font                                 |
+| ?      | ?                                    | offset[count - 1] | offset of the last font in the `fonts` field                |
+| ?      | ?                                    | sentinel\_id      | sentinel `0xffff`                                           |
+| ?      | ?                                    | sentinel\_offset  | offset past the end of last element                         |
+|        | ?                                    | fonts             | concatenation of fonts, format defined in the next section  |
+| ?      | 0-3                                  | padding           | padding (any value) for alignment purposes                  |
+
+## Font data
+
+The format is exactly the same as the previous table, the only difference is
+the interpretation of the payload.
+
+| offset | length                               | name              | description                                                 |
+|-------:|-------------------------------------:|-------------------|-------------------------------------------------------------|
+| 0x0000 | 2                                    | count             | number of items in the offset table, excluding the sentinel |
+| 0x0002 | 2                                    | id[0]             | id (Unicode code point) of the first glyph                  |
+| 0x0004 | 2                                    | offset[0]         | offset of the first glyph in the `glyphs` field             |
+| ...    | ...                                  | ...               |                                                             |
+| ?      | ?                                    | id[count - 1]     | id (Unicode code point) of the last glyph                   |
+| ?      | ?                                    | offset[count - 1] | offset of the last glyph in the `glyphs` field              |
+| ?      | ?                                    | sentinel\_id      | sentinel `0xffff`                                           |
+| ?      | ?                                    | sentinel\_offset  | offset past the end of last element                         |
+|        | ?                                    | glyphs            | concatenation of glyph bitmaps                              |
+| ?      | 0-3                                  | padding           | padding (any value) for alignment purposes                  |