From 76e73f684a497779d0515fef56f9a6813e60133d Mon Sep 17 00:00:00 2001 From: cepetr Date: Fri, 7 Feb 2025 11:50:53 +0100 Subject: [PATCH] feat(core): introduce hardware jpeg decoder [no changelog] --- core/embed/gfx/bitblt/dma2d_bitblt.h | 9 +- core/embed/gfx/bitblt/stm32/dma2d_bitblt.c | 61 +++ core/embed/gfx/inc/gfx/jpegdec.h | 127 +++++ core/embed/gfx/jpegdec/stm32u5/jpegdec.c | 474 ++++++++++++++++++ core/embed/gfx/terminal.c | 1 + core/embed/rust/Cargo.toml | 10 +- core/embed/rust/build.rs | 14 +- core/embed/rust/src/trezorhal/jpegdec.rs | 204 ++++++++ core/embed/rust/src/trezorhal/mod.rs | 3 + core/embed/rust/src/ui/component/mod.rs | 16 +- .../rust/src/ui/shape/cache/drawing_cache.rs | 39 +- .../rust/src/ui/shape/cache/jpeg_cache.rs | 2 +- core/embed/rust/src/ui/shape/cache/mod.rs | 2 +- core/embed/rust/src/ui/shape/jpeg.rs | 82 +-- core/embed/rust/src/ui/shape/mod.rs | 4 +- core/embed/rust/trezorhal.h | 4 + .../sys/syscall/stm32/syscall_dispatch.c | 29 ++ .../embed/sys/syscall/stm32/syscall_numbers.h | 11 +- core/embed/sys/syscall/stm32/syscall_stubs.c | 36 ++ .../sys/syscall/stm32/syscall_verifiers.c | 44 ++ .../sys/syscall/stm32/syscall_verifiers.h | 19 +- .../models/T3W1/trezor_t3w1_revA.py | 6 + .../models/T3W1/trezor_t3w1_revA0.py | 10 +- .../models/T3W1/trezor_t3w1_revB.py | 6 + core/site_scons/ui/ui_bolt.py | 2 +- core/site_scons/ui/ui_delizia.py | 2 +- 26 files changed, 1148 insertions(+), 69 deletions(-) create mode 100644 core/embed/gfx/inc/gfx/jpegdec.h create mode 100644 core/embed/gfx/jpegdec/stm32u5/jpegdec.c create mode 100644 core/embed/rust/src/trezorhal/jpegdec.rs diff --git a/core/embed/gfx/bitblt/dma2d_bitblt.h b/core/embed/gfx/bitblt/dma2d_bitblt.h index 2f6bca6871..e98263301b 100644 --- a/core/embed/gfx/bitblt/dma2d_bitblt.h +++ b/core/embed/gfx/bitblt/dma2d_bitblt.h @@ -17,8 +17,7 @@ * along with this program. If not, see . */ -#ifndef TREZORHAL_DMA2D_BITBLT_H -#define TREZORHAL_DMA2D_BITBLT_H +#pragma once #include @@ -50,4 +49,8 @@ bool dma2d_rgba8888_copy_rgba8888(const gfx_bitblt_t* bb); bool dma2d_rgba8888_blend_mono4(const gfx_bitblt_t* bb); bool dma2d_rgba8888_blend_mono8(const gfx_bitblt_t* bb); -#endif // TREZORHAL_DMA2D_BITBLT_H +#ifdef USE_HW_JPEG_DECODER +bool dma2d_rgba8888_copy_ycbcr420(const gfx_bitblt_t* bb); +bool dma2d_rgba8888_copy_ycbcr422(const gfx_bitblt_t* bb); +bool dma2d_rgba8888_copy_ycbcr444(const gfx_bitblt_t* bb); +#endif diff --git a/core/embed/gfx/bitblt/stm32/dma2d_bitblt.c b/core/embed/gfx/bitblt/stm32/dma2d_bitblt.c index b7778884ac..cf682c0199 100644 --- a/core/embed/gfx/bitblt/stm32/dma2d_bitblt.c +++ b/core/embed/gfx/bitblt/stm32/dma2d_bitblt.c @@ -789,3 +789,64 @@ bool dma2d_rgba8888_copy_rgba8888(const gfx_bitblt_t* bb) { bb->width, bb->height); return true; } + +#ifdef USE_HW_JPEG_DECODER +static bool dma2d_rgba8888_copy_ycbcr(const gfx_bitblt_t* bb, uint32_t css) { + dma2d_driver_t* drv = &g_dma2d_driver; + + if (!drv->initialized) { + return false; + } + + dma2d_wait(); + + if (!dma2d_accessible(bb->dst_row) || !dma2d_accessible(bb->src_row)) { + return false; + } + + drv->handle.Init.ColorMode = DMA2D_OUTPUT_ARGB8888; + drv->handle.Init.Mode = DMA2D_M2M_PFC; + drv->handle.Init.OutputOffset = bb->dst_stride / sizeof(uint32_t) - bb->width; + HAL_DMA2D_Init(&drv->handle); + + drv->handle.LayerCfg[1].InputColorMode = DMA2D_INPUT_YCBCR; + drv->handle.LayerCfg[1].InputOffset = 0; + drv->handle.LayerCfg[1].ChromaSubSampling = css; + drv->handle.LayerCfg[1].AlphaMode = 0; + drv->handle.LayerCfg[1].InputAlpha = 0; + HAL_DMA2D_ConfigLayer(&drv->handle, 1); + + HAL_DMA2D_Start(&drv->handle, (uint32_t)bb->src_row, + (uint32_t)bb->dst_row + bb->dst_x * sizeof(uint32_t), + bb->width, bb->height); + + // DMA2D overwrites CLUT during YCbCr conversion + // (seems to be a bug or an undocumented feature) + drv->clut_valid = false; + + return true; +} + +bool dma2d_rgba8888_copy_ycbcr420(const gfx_bitblt_t* bb) { + return dma2d_rgba8888_copy_ycbcr(bb, DMA2D_CSS_420); +} + +bool dma2d_rgba8888_copy_ycbcr422(const gfx_bitblt_t* bb) { + return dma2d_rgba8888_copy_ycbcr(bb, DMA2D_CSS_422); +} + +bool dma2d_rgba8888_copy_ycbcr444(const gfx_bitblt_t* bb) { + return dma2d_rgba8888_copy_ycbcr(bb, DMA2D_NO_CSS); +} + +// Temporary hack to invalidate CLUT cache used in jpeg decoder. +// This function should be removed in the future with DMA2D syscalls. +void dma2d_invalidate_clut(void) { + dma2d_driver_t* drv = &g_dma2d_driver; + if (!drv->initialized) { + return; + } + drv->clut_valid = false; +} + +#endif // USE_HW_JPEG_DECODER diff --git a/core/embed/gfx/inc/gfx/jpegdec.h b/core/embed/gfx/inc/gfx/jpegdec.h new file mode 100644 index 0000000000..73d14e318f --- /dev/null +++ b/core/embed/gfx/inc/gfx/jpegdec.h @@ -0,0 +1,127 @@ +/* + * This file is part of the Trezor project, https://trezor.io/ + * + * Copyright (c) SatoshiLabs + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#pragma once + +#include + +// Maximum number of blocks (8x8) in a slice. +// The more blocks we use, the decodeer is faster. +// Minimum value is 4 to support 4:2:0 subsampling (MCU is 16x16). +#define JPEGDEC_MAX_SLICE_BLOCKS 16 + +// Size of Y/YCbCr data buffer +// The worst case is 192 bytes per block (8x8 pixels) for 4:4:4 subsampling +#define JPEGDEC_YCBCR_BUFFER_SIZE (JPEGDEC_MAX_SLICE_BLOCKS * 8 * 8 * 3) + +// Maximum size of the RGBA8888 buffer for a slice. +#define JPEGDEC_RGBA8888_BUFFER_SIZE (JPEGDEC_MAX_SLICE_BLOCKS * 8 * 8 * 4) + +typedef struct jpegdec jpegdec_t; + +typedef enum { + // Decoder needs more data + // (jpegdec_process should be called with more data) + JPEGDEC_STATE_NEED_DATA, + // Image info is ready + // (jpegdec_get_info can be called to get the image info) + JPEGDEC_STATE_INFO_READY, + // Decoded slice is ready + // (jpegdec_get_slice_rgba8888 can be called to get the slice data) + JPEGDEC_STATE_SLICE_READY, + // Decoding is finished + JPEGDEC_STATE_FINISHED, + // Error occurred, decoding is stopped + JPEGDEC_STATE_ERROR, +} jpegdec_state_t; + +typedef enum { + JPEGDEC_IMAGE_GRAYSCALE, // Gray scale image + JPEGDEC_IMAGE_YCBCR420, // Color image with 4:2:0 subsampling + JPEGDEC_IMAGE_YCBCR422, // Color image with 4:2:2 subsampling + JPEGDEC_IMAGE_YCBCR444, // Color image with 4:4:4 subsampling +} jpegdec_image_format_t; + +typedef struct { + // Pointer to the data + const uint8_t* data; + // Size of the data in bytes + size_t size; + // Current offset in the data + size_t offset; + // Set to true when no more data is available + bool last_chunk; +} jpegdec_input_t; + +typedef struct { + // Image format + jpegdec_image_format_t format; + // Image width in pixels + int16_t width; + // Image height in pixels + int16_t height; +} jpegdec_image_t; + +typedef struct { + // Slice x-coordinate + int16_t x; + // Slice y-coordinate + int16_t y; + // Slice width + int16_t width; + // Slice height + int16_t height; +} jpegdec_slice_t; + +// Initialize and reset the decoder internal state +bool jpegdec_open(void); + +// Release the decoder and free resources +void jpegdec_close(void); + +// Process all or part of the input buffer and advances the `input->offset` +// +// `input->offset` must be aligned to 4 bytes. +// `input->size` must be aligned to 4 bytes except for the last chunk. +// `input->last_chunk` must be set to true when no more data is available. +// +// Returns the current state of the decoder: +// - `JPEGDEC_STATE_NEED_DATA` - more data is needed +// - `JPEGDEC_STATE_INFO_READY` - the image info is ready +// - `JPEGDEC_STATE_SLICE_READY` - a decoded slice is ready +// - `JPEGDEC_STATE_FINISHED` - the decoding is finished +// - `JPEGDEC_STATE_ERROR` - an error occurred +jpegdec_state_t jpegdec_process(jpegdec_input_t* input); + +// Get the decoded image info +// +// Can be called anytimer if the decoder went through the +// `JPEGDEC_STATE_INFO_READY` state. +// +// Returns true if the info is available +bool jpegdec_get_info(jpegdec_image_t* info); + +// Copy the last decoded slice to the buffer +// +// `rgba8888` must be a buffer of at least `JPEGDEC_RGBA8888_BUFFER_SIZE` +// bytes and must be aligned to 4 bytes. +// +// Can be called immediately after `jpegdec_process` returns +// `JPEGDEC_STATE_SLICE_READY`. +bool jpegdec_get_slice_rgba8888(uint32_t* rgba8888, jpegdec_slice_t* slice); diff --git a/core/embed/gfx/jpegdec/stm32u5/jpegdec.c b/core/embed/gfx/jpegdec/stm32u5/jpegdec.c new file mode 100644 index 0000000000..6d1cd09735 --- /dev/null +++ b/core/embed/gfx/jpegdec/stm32u5/jpegdec.c @@ -0,0 +1,474 @@ +/* + * This file is part of the Trezor project, https://trezor.io/ + * + * Copyright (c) SatoshiLabs + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifdef KERNEL_MODE + +#include +#include + +#include +#include +#include + +#include <../bitblt/dma2d_bitblt.h> + +#include + +// Fixes of STMicro bugs in cmsis-device-u5 +#undef JPEG_BASE_S +#define JPEG_BASE_S (AHB1PERIPH_BASE_S + 0x0A000UL) +#undef JPEG_CFR_CEOCF_Pos +#define JPEG_CFR_CEOCF_Pos (5U) +#undef JPEG_CFR_CHPDF_Pos +#define JPEG_CFR_CHPDF_Pos (6U) + +// JPEG decoder processing timeout in microseconds. +// The timeout must be selected to be long enough to process a single slice. +// 100us @ 160MHZ CPU clock speed => 8000 CPU cycles +// JPEG decoder issues 1pixel/1cycles => 125 8x8 blocks +#define JPEGDEC_PROCESSING_TIMEOUT_US 100 + +// JPEG decoder state +struct jpegdec { + // Set if the decoder is in use + bool inuse; + + // DMA channel for JPEG data output + DMA_HandleTypeDef hdma; + + // Current state of the FSM + jpegdec_state_t state; + // Decoded image parameters + jpegdec_image_t image; + + // Decoded image MCU width + int16_t mcu_width; + // Decoded image MCU height + int16_t mcu_height; + // Decoded image MCU size in bytes + size_t mcu_size; + + // Decoded YCbCr data for the current slice + uint32_t ycbcr_buffer[JPEGDEC_YCBCR_BUFFER_SIZE / sizeof(uint32_t)]; + + // Current slice x-coordinate + int16_t slice_x; + // Current slice y-coordinate + int16_t slice_y; + // Current slice width + int16_t slice_width; + // Current slice height + int16_t slice_height; +}; + +// JPEG decoder instance +jpegdec_t g_jpegdec = { + .inuse = false, +}; + +bool jpegdec_open(void) { + jpegdec_t *dec = &g_jpegdec; + + if (dec->inuse) { + return false; + } + + memset(dec, 0, sizeof(jpegdec_t)); + dec->inuse = true; + + __HAL_RCC_JPEG_FORCE_RESET(); + __HAL_RCC_JPEG_RELEASE_RESET(); + __HAL_RCC_JPEG_CLK_ENABLE(); + + // Configure JPEG codec for decoding and header parsing + JPEG->CR |= JPEG_CR_JCEN; + JPEG->CONFR1 |= JPEG_CONFR1_DE; + JPEG->CONFR1 |= JPEG_CONFR1_HDR; + JPEG->CONFR0 |= JPEG_CONFR0_START; + JPEG->CR |= JPEG_CR_OFF | JPEG_CR_IFF; + + // Configure DMA channel for JPEG data output + __HAL_RCC_GPDMA1_CLK_ENABLE(); + dec->hdma.Instance = GPDMA1_Channel4; + dec->hdma.Init.Request = GPDMA1_REQUEST_JPEG_TX; + dec->hdma.Init.BlkHWRequest = DMA_BREQ_SINGLE_BURST; + dec->hdma.Init.Direction = DMA_PERIPH_TO_MEMORY; + dec->hdma.Init.SrcInc = DMA_SINC_FIXED; + dec->hdma.Init.DestInc = DMA_DINC_INCREMENTED; + dec->hdma.Init.SrcDataWidth = DMA_SRC_DATAWIDTH_WORD; + dec->hdma.Init.DestDataWidth = DMA_DEST_DATAWIDTH_WORD; + dec->hdma.Init.Priority = DMA_LOW_PRIORITY_LOW_WEIGHT; + dec->hdma.Init.SrcBurstLength = 8; + dec->hdma.Init.DestBurstLength = 8; + dec->hdma.Init.TransferAllocatedPort = + DMA_SRC_ALLOCATED_PORT1 | DMA_DEST_ALLOCATED_PORT0; + dec->hdma.Init.TransferEventMode = DMA_TCEM_BLOCK_TRANSFER; + dec->hdma.Init.Mode = DMA_NORMAL; + + if (HAL_DMA_Init(&dec->hdma) != HAL_OK) { + dec->hdma.Instance = NULL; + goto cleanup; + } + + if (HAL_DMA_ConfigChannelAttributes( + &dec->hdma, DMA_CHANNEL_PRIV | DMA_CHANNEL_SEC | DMA_CHANNEL_SRC_SEC | + DMA_CHANNEL_DEST_SEC) != HAL_OK) { + goto cleanup; + } + + return true; + +cleanup: + jpegdec_close(); + return false; +} + +void jpegdec_close(void) { + jpegdec_t *dec = &g_jpegdec; + + if (dec->hdma.Instance != NULL) { + HAL_DMA_Abort(&dec->hdma); + HAL_DMA_DeInit(&dec->hdma); + } + + __HAL_RCC_JPEG_CLK_DISABLE(); + __HAL_RCC_JPEG_FORCE_RESET(); + __HAL_RCC_JPEG_RELEASE_RESET(); + + memset(dec, 0, sizeof(jpegdec_t)); +} + +#define READ_REG_FIELD(reg, field) (((reg) & field##_Msk) >> field##_Pos) + +// Extracts image parameters from the JPEG codec registers +// and set `dec->image` and `dec->mcu_xxx` fields +static bool jpegdec_extract_header_info(jpegdec_t *dec) { + jpegdec_image_t image = {0}; + size_t mcu_size = 64; // Grayscale, 8x8 blocks + int16_t mcu_width = 8; + int16_t mcu_height = 8; + + image.height = READ_REG_FIELD(JPEG->CONFR1, JPEG_CONFR1_YSIZE); + image.width = READ_REG_FIELD(JPEG->CONFR3, JPEG_CONFR3_XSIZE); + + if (image.height == 0 || image.width == 0) { + // Image size is zero, invalid header + return false; + } + + if (image.height > 32767 || image.width > 32767) { + // Image is too large + return false; + } + + // Number of quantization tables + int n_qt = 1 + READ_REG_FIELD(JPEG->CONFR1, JPEG_CONFR1_NF); + + if (n_qt == 1) { + // 1 quantization table => Grayscale + image.format = JPEGDEC_IMAGE_GRAYSCALE; + } else if (n_qt == 3) { + // 3 quantization table => YCbCr + int y_blocks = 1 + READ_REG_FIELD(JPEG->CONFR4, JPEG_CONFR4_NB); + int cb_blocks = 1 + READ_REG_FIELD(JPEG->CONFR5, JPEG_CONFR5_NB); + int cr_blocks = 1 + READ_REG_FIELD(JPEG->CONFR6, JPEG_CONFR6_NB); + + mcu_size = (y_blocks + cb_blocks + cr_blocks) * 64; + mcu_width = (y_blocks == 1) ? 8 : 16; + mcu_height = (y_blocks == 4) ? 16 : 8; + + if (y_blocks == 2 && cb_blocks == 1 && cr_blocks == 1) { + // 4:2:2 subsampling + image.format = JPEGDEC_IMAGE_YCBCR422; + } else if (y_blocks == 4 && cb_blocks == 1 && cr_blocks == 1) { + // 4:2:0 subsampling + image.format = JPEGDEC_IMAGE_YCBCR420; + } else if (y_blocks == 1 && cb_blocks == 1 && cr_blocks == 1) { + // 4:4:4 subsampling + image.format = JPEGDEC_IMAGE_YCBCR444; + } else { + // Unsupported subsampling + return false; + } + } else { + // 2 or 4 quantization tables are not supported + return false; + } + + dec->image = image; + dec->mcu_size = mcu_size; + dec->mcu_width = mcu_width; + dec->mcu_height = mcu_height; + return true; +} + +// Starts DMA transfer of the decoded YCbCr data for the current slice +static bool jpegdec_start_dma_transfer(jpegdec_t *dec) { + // Number ofs MCU that fit into the YCbCr buffer + int n_ycbcr = sizeof(dec->ycbcr_buffer) / dec->mcu_size; + // Number ofs MCUs that fit into the RGB buffer + int n_rgb = + JPEGDEC_MAX_SLICE_BLOCKS / ((dec->mcu_width * dec->mcu_height) / 64); + // Number of remaining MCUs in the current row + int n_row = + (dec->image.width - dec->slice_x + dec->mcu_width - 1) / dec->mcu_width; + // Number of MCUs to decode in the current slice + int mcu_count = MIN(MIN(n_ycbcr, n_rgb), n_row); + + dec->slice_width = dec->mcu_width * mcu_count; + dec->slice_height = dec->mcu_height; + + if (HAL_DMA_Start(&dec->hdma, (uint32_t)&JPEG->DOR, + (uint32_t)dec->ycbcr_buffer, + dec->mcu_size * mcu_count) != HAL_OK) { + return false; + } + + JPEG->CR |= JPEG_CR_ODMAEN; + return true; +} + +// Feeds the input FIFO with the data from the input buffer. +// Returns `true` if at least one word was written to the FIFO. +static inline bool jpegdec_feed_fifo(jpegdec_t *dec, jpegdec_input_t *inp) { + // Input FIFO needs data + uint32_t *ptr = (uint32_t *)&inp->data[inp->offset]; + if (inp->offset + 16 <= inp->size) { + // Feed the FIFO with 16 bytes + JPEG->DIR = ptr[0]; + JPEG->DIR = ptr[1]; + JPEG->DIR = ptr[2]; + JPEG->DIR = ptr[3]; + inp->offset += 16; + return true; + } else if (inp->offset < inp->size) { + // Feed the FIFO with the remaining data + while (inp->offset + 4 < inp->size) { + JPEG->DIR = *ptr++; + inp->offset += 4; + } + if (inp->offset < inp->size) { + size_t bits = (inp->size - inp->offset) * 8; + JPEG->DIR = *ptr & (0xFFFFFFFF >> (32 - bits)); + inp->offset = inp->size; + } + return true; + } + + return false; +} + +// Advances the slice coordinates to the next slice. +// Returns `true` if the decoding is complete. +static inline bool jpegdec_advance_slice_coordinates(jpegdec_t *dec) { + dec->slice_x += dec->slice_width; + if (dec->slice_x >= dec->image.width) { + dec->slice_x = 0; + dec->slice_y += dec->slice_height; + } + return dec->slice_y >= dec->image.height; +} + +jpegdec_state_t jpegdec_process(jpegdec_input_t *inp) { + jpegdec_t *dec = &g_jpegdec; + + if (!dec->inuse) { + return JPEGDEC_STATE_ERROR; + } + + // Check input buffer alignment + if (inp->offset < inp->size) { + if (!IS_ALIGNED(inp->offset, 4) || + (!IS_ALIGNED(inp->size, 4) && !inp->last_chunk)) { + return JPEGDEC_STATE_ERROR; + } + } + + switch (dec->state) { + case JPEGDEC_STATE_ERROR: + case JPEGDEC_STATE_FINISHED: + return dec->state; + + case JPEGDEC_STATE_SLICE_READY: + if (jpegdec_advance_slice_coordinates(dec)) { + dec->state = JPEGDEC_STATE_FINISHED; + return dec->state; + } + // pass through + case JPEGDEC_STATE_INFO_READY: + if (!jpegdec_start_dma_transfer(dec)) { + dec->state = JPEGDEC_STATE_ERROR; + return dec->state; + } + break; + + default: + break; + } + + uint64_t expire_time = 0; // = 0 => not active + bool timed_out = false; + uint8_t poll_counter = 0; + + for (;;) { + uint32_t sr = JPEG->SR; + + if ((sr & JPEG_SR_IFTF) != 0) { + if (jpegdec_feed_fifo(dec, inp)) { + expire_time = 0; + continue; // Feed the FIFO as fast as possible + } else if (!inp->last_chunk) { + dec->state = JPEGDEC_STATE_NEED_DATA; + break; + } + } + + if (__HAL_DMA_GET_FLAG(&dec->hdma, DMA_FLAG_TC)) { + // Clear status flags and prepare for the next transfer + HAL_DMA_PollForTransfer(&dec->hdma, HAL_DMA_FULL_TRANSFER, 0); + dec->state = JPEGDEC_STATE_SLICE_READY; + break; + } + + if ((sr & JPEG_SR_HPDF) != 0) { + // Header parsing is complete + // Clear the HPDF flag + JPEG->CFR |= JPEG_CFR_CHPDF; + bool unexpected_header = dec->image.width > 0; + if (unexpected_header || !jpegdec_extract_header_info(dec)) { + dec->state = JPEGDEC_STATE_ERROR; + } else { + dec->state = JPEGDEC_STATE_INFO_READY; + } + break; + } + + // Timeout processing (especially `systick_us()`) is quite expensive + // and therefore it is done only every 16 passes. + if (poll_counter-- == 0) { + poll_counter = 16; + if (expire_time == 0) { + // The timeout handles two situations: + // 1) Invalid input data that causes the JPEG codec not produce + // any output and the processing is stuck. + // 2) Unexpected JPEG codec stuck in the processing state. + expire_time = systick_us() + JPEGDEC_PROCESSING_TIMEOUT_US; + } else if (timed_out) { + dec->state = JPEGDEC_STATE_ERROR; + break; + } else { + // `timed_out` flag is checked in the next pass + timed_out = systick_us() > expire_time; + } + } + } + + if (dec->state == JPEGDEC_STATE_ERROR || + dec->state == JPEGDEC_STATE_FINISHED) { + JPEG->CR &= ~JPEG_CR_JCEN; + HAL_DMA_Abort(&dec->hdma); + } + + return dec->state; +} + +bool jpegdec_get_info(jpegdec_image_t *image) { + jpegdec_t *dec = &g_jpegdec; + + if (!dec->inuse) { + return false; + } + + if (dec->image.width == 0 || dec->image.height == 0) { + return false; + } + + *image = dec->image; + return true; +} + +bool jpegdec_get_slice_rgba8888(uint32_t *rgba8888, jpegdec_slice_t *slice) { + jpegdec_t *dec = &g_jpegdec; + + if (!dec->inuse) { + return false; + } + + if (dec->state != JPEGDEC_STATE_SLICE_READY) { + return false; + } + + if (!IS_ALIGNED((uint32_t)rgba8888, 4)) { + return false; + } + + slice->width = dec->slice_width; + slice->height = dec->slice_height; + slice->x = dec->slice_x; + slice->y = dec->slice_y; + + gfx_bitblt_t bb = { + .height = dec->slice_height, + .width = dec->slice_width, + .dst_row = rgba8888, + .dst_stride = dec->slice_width * 4, + .dst_x = 0, + .dst_y = 0, + .src_row = dec->ycbcr_buffer, + .src_stride = 0, + .src_x = 0, + .src_y = 0, + .src_fg = 0, + .src_bg = 0, + .src_alpha = 255, + }; + +#ifdef KERNEL + tz_set_dma2d_unpriv(false); +#endif + + switch (dec->image.format) { + case JPEGDEC_IMAGE_YCBCR420: + dma2d_rgba8888_copy_ycbcr420(&bb); + break; + case JPEGDEC_IMAGE_YCBCR422: + dma2d_rgba8888_copy_ycbcr422(&bb); + break; + case JPEGDEC_IMAGE_YCBCR444: + dma2d_rgba8888_copy_ycbcr444(&bb); + break; + case JPEGDEC_IMAGE_GRAYSCALE: + // Conversion from grayscale to RGBA8888 is not supported + return false; + default: + return false; + } + + // Wait until the DMA transfer is complete so that the caller can use + // data in the `rgba8888` buffer immediately. + dma2d_wait(); + +#ifdef KERNEL + tz_set_dma2d_unpriv(true); +#endif + + return true; +} + +#endif // KERNEL_MODE diff --git a/core/embed/gfx/terminal.c b/core/embed/gfx/terminal.c index 4de471bb0f..2e2b451e47 100644 --- a/core/embed/gfx/terminal.c +++ b/core/embed/gfx/terminal.c @@ -80,6 +80,7 @@ static void term_redraw_rows(int start_row, int row_count) { .src_stride = 8, .src_fg = terminal_fgcolor, .src_bg = terminal_bgcolor, + .src_alpha = 255, }; for (int y = start_row; y < start_row + row_count; y++) { diff --git a/core/embed/rust/Cargo.toml b/core/embed/rust/Cargo.toml index 1735c38e59..76bf8e0fc3 100644 --- a/core/embed/rust/Cargo.toml +++ b/core/embed/rust/Cargo.toml @@ -8,9 +8,9 @@ build = "build.rs" [features] default = ["layout_bolt"] crypto = ["zeroize"] -layout_bolt = ["jpeg"] +layout_bolt = [] layout_caesar = [] -layout_delizia = ["jpeg", "dma2d"] +layout_delizia = [] micropython = [] protobuf = ["micropython"] ui = [] @@ -22,16 +22,16 @@ display_rgba8888 = ["ui_antialiasing"] ui_debug = [] ui_antialiasing = [] ui_blurring = [] -ui_jpeg_decoder = ["jpeg"] ui_image_buffer = [] ui_color_32bit = [] ui_overlay = [] ui_empty_lock = [] +ui_jpeg = [] +hw_jpeg_decoder = [] bootloader = [] button = [] touch = [] clippy = [] -jpeg = [] debug = ["ui_debug"] sbu = [] haptic = [] @@ -56,7 +56,7 @@ test = [ "touch", "translations", "ui", - "ui_jpeg_decoder", + "ui_jpeg", "ui_blurring", "ui_image_buffer", "ui_overlay", diff --git a/core/embed/rust/build.rs b/core/embed/rust/build.rs index 10cd01de9d..b41cf8b835 100644 --- a/core/embed/rust/build.rs +++ b/core/embed/rust/build.rs @@ -53,6 +53,7 @@ const DEFAULT_BINDGEN_MACROS_COMMON: &[&str] = &[ "-DUSE_HAPTIC", "-DUSE_RGB_LED", "-DUSE_BLE", + "-DUSE_HW_JPEG_DECODER", ]; fn add_bindgen_macros<'a>( @@ -395,7 +396,18 @@ fn generate_trezorhal_bindings() { // haptic .allowlist_type("haptic_effect_t") .allowlist_function("haptic_play") - .allowlist_function("haptic_play_custom"); + .allowlist_function("haptic_play_custom") + // jpegdec + .allowlist_var("JPEGDEC_RGBA8888_BUFFER_SIZE") + .allowlist_type("jpegdec_state_t") + .allowlist_type("jpegdec_image_t") + .allowlist_type("jpegdec_image_format_t") + .allowlist_type("jpegdec_slice_t") + .allowlist_function("jpegdec_open") + .allowlist_function("jpegdec_close") + .allowlist_function("jpegdec_process") + .allowlist_function("jpegdec_get_info") + .allowlist_function("jpegdec_get_slice_rgba8888"); // Write the bindings to a file in the OUR_DIR. bindings diff --git a/core/embed/rust/src/trezorhal/jpegdec.rs b/core/embed/rust/src/trezorhal/jpegdec.rs new file mode 100644 index 0000000000..ac5369fca0 --- /dev/null +++ b/core/embed/rust/src/trezorhal/jpegdec.rs @@ -0,0 +1,204 @@ +use super::ffi; + +use crate::ui::{ + geometry::{Offset, Point, Rect}, + shape::{Bitmap, BitmapFormat, BitmapView}, +}; + +use crate::io::BinaryData; +use num_traits::FromPrimitive; + +pub const RGBA8888_BUFFER_SIZE: usize = ffi::JPEGDEC_RGBA8888_BUFFER_SIZE as _; + +#[derive(PartialEq, Debug, Eq, FromPrimitive, Clone, Copy)] +enum JpegDecState { + NeedData = ffi::jpegdec_state_t_JPEGDEC_STATE_NEED_DATA as _, + InfoReady = ffi::jpegdec_state_t_JPEGDEC_STATE_INFO_READY as _, + SliceReady = ffi::jpegdec_state_t_JPEGDEC_STATE_SLICE_READY as _, + Finished = ffi::jpegdec_state_t_JPEGDEC_STATE_FINISHED as _, + Error = ffi::jpegdec_state_t_JPEGDEC_STATE_ERROR as _, +} + +#[derive(PartialEq, Debug, Eq, FromPrimitive, Clone, Copy)] +pub enum JpegDecImageFormat { + GrayScale = ffi::jpegdec_image_format_t_JPEGDEC_IMAGE_GRAYSCALE as _, + YCBCR420 = ffi::jpegdec_image_format_t_JPEGDEC_IMAGE_YCBCR420 as _, + YCBCR422 = ffi::jpegdec_image_format_t_JPEGDEC_IMAGE_YCBCR422 as _, + YCBCR444 = ffi::jpegdec_image_format_t_JPEGDEC_IMAGE_YCBCR444 as _, +} + +pub struct JpegDecImage { + pub width: i16, + pub height: i16, + pub format: JpegDecImageFormat, +} + +pub struct JpegDecoder<'a> { + jpeg: BinaryData<'a>, + jpeg_pos: usize, + buff: [u8; 1024], + buff_pos: usize, + buff_len: usize, +} + +impl Drop for JpegDecoder<'_> { + fn drop(&mut self) { + // SAFETY: + // We cannot have more than one instance of JpegDecoder at a time. + // The `jpegdec_close` is called in pair with `jpegdec_open`. + unsafe { + ffi::jpegdec_close(); + } + } +} + +impl<'a> JpegDecoder<'a> { + /// Creates a new JPEG decoder instance from the given JPEG data. + /// + /// The function reads the JPEG header and returns. + pub fn new(jpeg: BinaryData<'a>) -> Result { + // SAFETY: + // `jpegdec_open()` is always called in pair with `jpegdec_close()`. + if !unsafe { ffi::jpegdec_open() } { + // Already open + return Err(()); + } + + let mut dec = Self { + jpeg, + jpeg_pos: 0, + buff: [0; 1024], + buff_pos: 0, + buff_len: 0, + }; + + loop { + match dec.read_input() { + JpegDecState::InfoReady => break, + JpegDecState::NeedData => {} + _ => return Err(()), + }; + } + + Ok(dec) + } + + // Returns the image format and dimensions. + pub fn image(&self) -> Result { + let mut info = ffi::jpegdec_image_t { + width: 0, + height: 0, + format: 0, + }; + + // SAFETY: + // - `info` is a valid pointer to a mutable `jpegdec_image_t` struct. + if unsafe { ffi::jpegdec_get_info(&mut info) } { + Ok(JpegDecImage { + width: info.width, + height: info.height, + format: unwrap!(JpegDecImageFormat::from_u8(info.format as _)), + }) + } else { + Err(()) + } + } + + /// Decodes the JPEG image and calls the output function for each slice. + /// Requires a temporary buffer of size `RGBA8888_BUFFER_SIZE`. + /// The output function should return `true` to continue decoding or `false` + /// to stop. Returns `Ok(())` if the decoding was successful or + /// `Err(())` if an error occurred. + pub fn decode( + &mut self, + buff: &mut [u8], + output: &mut dyn FnMut(Rect, BitmapView) -> bool, + ) -> Result<(), ()> { + loop { + match self.read_input() { + JpegDecState::SliceReady => { + if !self.write_output(buff, output) { + break; + }; + } + JpegDecState::Finished => break, + JpegDecState::NeedData => {} + _ => return Err(()), + }; + } + Ok(()) + } + + fn read_input(&mut self) -> JpegDecState { + if self.buff_pos == self.buff_len { + self.buff_len = self.jpeg.read(self.jpeg_pos, &mut self.buff); + self.buff_pos = 0; + self.jpeg_pos += self.buff_len; + } + + let mut inp = ffi::jpegdec_input_t { + data: self.buff.as_ptr(), + size: self.buff_len, + offset: self.buff_pos, + last_chunk: self.buff_len < self.buff.len(), + }; + + // SAFETY: + // - `inp.data` points to the mutable buffer we own + // - `inp.size` is valid buffer size + // - `inp.offset` is a valid offset in the buffer + // - jpegdec_process() doesn't retain the pointers to the data for later use + let state_u8 = unsafe { ffi::jpegdec_process(&mut inp) }; + self.buff_pos = inp.offset; + + unwrap!(JpegDecState::from_u8(state_u8 as _)) + } + + fn write_output( + &self, + buff: &mut [u8], + output: &mut dyn FnMut(Rect, BitmapView) -> bool, + ) -> bool { + // SAFETY: + // - after aligning the buffer to u32, the we check the + // length of the buffer to be at least `RGBA8888_BUFFER_SIZE` + let rgba_u32 = unsafe { buff.align_to_mut::().1 }; + assert!(rgba_u32.len() * 4 >= RGBA8888_BUFFER_SIZE); + + let mut slice = ffi::jpegdec_slice_t { + x: 0, + y: 0, + width: 0, + height: 0, + }; + + // SAFETY: + // - `rgba_u32` is a valid pointer to a mutable buffer of u32 of length at + // least `RGBA8888_BUFFER_SIZE` + // - `slice` is a valid pointer to a mutable `jpegdec_slice_t` + // - `jpegdec_get_slice_rgba8888` doesn't retain the pointers to the data for + // later use + unsafe { ffi::jpegdec_get_slice_rgba8888(rgba_u32.as_mut_ptr(), &mut slice) }; + + let r = Rect::from_top_left_and_size( + Point::new(slice.x, slice.y), + Offset::new(slice.width, slice.height), + ); + + // SAFETY: + // - reinterpreting &[u32] to &[u8] is safe + let rgba_u8 = unsafe { buff.align_to::().1 }; + + let bitmap = unwrap!(Bitmap::new( + BitmapFormat::RGBA8888, + None, + r.size(), + None, + rgba_u8 + )); + + let view = BitmapView::new(&bitmap); + + output(r, view) + } +} diff --git a/core/embed/rust/src/trezorhal/mod.rs b/core/embed/rust/src/trezorhal/mod.rs index 51632ea62e..e56d51b2c5 100644 --- a/core/embed/rust/src/trezorhal/mod.rs +++ b/core/embed/rust/src/trezorhal/mod.rs @@ -13,6 +13,9 @@ mod ffi; pub mod haptic; pub mod io; + +#[cfg(feature = "hw_jpeg_decoder")] +pub mod jpegdec; pub mod model; pub mod random; #[cfg(feature = "rgb_led")] diff --git a/core/embed/rust/src/ui/component/mod.rs b/core/embed/rust/src/ui/component/mod.rs index b6c2b8ad4a..e15948fdf6 100644 --- a/core/embed/rust/src/ui/component/mod.rs +++ b/core/embed/rust/src/ui/component/mod.rs @@ -4,12 +4,16 @@ pub mod bar; pub mod base; pub mod border; pub mod button_request; -#[cfg(all(feature = "jpeg", feature = "ui_image_buffer", feature = "micropython"))] +#[cfg(all( + feature = "ui_jpeg", + feature = "ui_image_buffer", + feature = "micropython" +))] pub mod cached_jpeg; pub mod connect; pub mod empty; pub mod image; -#[cfg(all(feature = "jpeg", feature = "micropython"))] +#[cfg(all(feature = "ui_jpeg", feature = "micropython"))] pub mod jpeg; pub mod label; pub mod map; @@ -30,10 +34,14 @@ pub use bar::Bar; pub use base::{Child, Component, ComponentExt, Event, EventCtx, FlowMsg, Never, Timer}; pub use border::Border; pub use button_request::{ButtonRequestExt, SendButtonRequest}; -#[cfg(all(feature = "jpeg", feature = "ui_image_buffer", feature = "micropython"))] +#[cfg(all( + feature = "ui_jpeg", + feature = "ui_image_buffer", + feature = "micropython" +))] pub use cached_jpeg::CachedJpeg; pub use empty::Empty; -#[cfg(all(feature = "jpeg", feature = "micropython"))] +#[cfg(all(feature = "ui_jpeg", feature = "micropython"))] pub use jpeg::Jpeg; pub use label::Label; pub use map::{MsgMap, PageMap}; diff --git a/core/embed/rust/src/ui/shape/cache/drawing_cache.rs b/core/embed/rust/src/ui/shape/cache/drawing_cache.rs index 91158e81b3..516f3d7d67 100644 --- a/core/embed/rust/src/ui/shape/cache/drawing_cache.rs +++ b/core/embed/rust/src/ui/shape/cache/drawing_cache.rs @@ -3,9 +3,12 @@ use super::zlib_cache::ZlibCache; #[cfg(feature = "ui_blurring")] use super::blur_cache::BlurCache; -#[cfg(feature = "ui_jpeg_decoder")] +#[cfg(all(feature = "ui_jpeg", not(feature = "hw_jpeg_decoder")))] use super::jpeg_cache::JpegCache; +#[cfg(feature = "hw_jpeg_decoder")] +use crate::trezorhal::jpegdec; + use core::cell::{RefCell, RefMut}; use without_alloc::alloc::LocalAllocLeakExt; @@ -19,10 +22,28 @@ const ZLIB_CACHE_SLOTS: usize = 3; #[cfg(not(feature = "framebuffer"))] const RENDER_BUFF_SIZE: usize = (240 * 2 * 16) + ALIGN_PAD; -#[cfg(feature = "ui_overlay")] -const IMAGE_BUFF_SIZE: usize = 240 * 240 + ALIGN_PAD; -#[cfg(not(feature = "ui_overlay"))] -const IMAGE_BUFF_SIZE: usize = 2048 + ALIGN_PAD; +const fn const_max(a: usize, b: usize) -> usize { + if a > b { + a + } else { + b + } +} +const IMAGE_BUFF_SIZE: usize = { + const DEFAULT: usize = 2048; + + #[cfg(feature = "ui_overlay")] + const OVERLAY: usize = 240 * 240; + #[cfg(not(feature = "ui_overlay"))] + const OVERLAY: usize = 0; + + #[cfg(feature = "hw_jpeg_decoder")] + const JPEG: usize = jpegdec::RGBA8888_BUFFER_SIZE; + #[cfg(not(feature = "hw_jpeg_decoder"))] + const JPEG: usize = 0; + + const_max(DEFAULT, const_max(JPEG, OVERLAY)) + ALIGN_PAD +}; pub type ImageBuff = [u8; IMAGE_BUFF_SIZE]; @@ -38,7 +59,7 @@ pub struct DrawingCache<'a> { image_buff: &'a RefCell, zlib_cache: RefCell>, - #[cfg(feature = "ui_jpeg_decoder")] + #[cfg(all(feature = "ui_jpeg", not(feature = "hw_jpeg_decoder")))] jpeg_cache: RefCell>, #[cfg(feature = "ui_blurring")] @@ -67,7 +88,7 @@ impl<'a> DrawingCache<'a> { ZlibCache::new(bump_a, ZLIB_CACHE_SLOTS), "ZLIB cache alloc" )), - #[cfg(feature = "ui_jpeg_decoder")] + #[cfg(all(feature = "ui_jpeg", not(feature = "hw_jpeg_decoder")))] jpeg_cache: RefCell::new(unwrap!(JpegCache::new(bump_a), "JPEG cache alloc")), #[cfg(feature = "ui_blurring")] blur_cache: RefCell::new(unwrap!(BlurCache::new(bump_a), "Blur cache alloc")), @@ -83,7 +104,7 @@ impl<'a> DrawingCache<'a> { } /// Returns an object for decompression of JPEG images - #[cfg(feature = "ui_jpeg_decoder")] + #[cfg(all(feature = "ui_jpeg", not(feature = "hw_jpeg_decoder")))] pub fn jpeg(&self) -> RefMut> { self.jpeg_cache.borrow_mut() } @@ -111,7 +132,7 @@ impl<'a> DrawingCache<'a> { size += ZlibCache::get_bump_size(ZLIB_CACHE_SLOTS); - #[cfg(feature = "ui_jpeg_decoder")] + #[cfg(all(feature = "ui_jpeg", not(feature = "hw_jpeg_decoder")))] { size += JpegCache::get_bump_size(); } diff --git a/core/embed/rust/src/ui/shape/cache/jpeg_cache.rs b/core/embed/rust/src/ui/shape/cache/jpeg_cache.rs index 22c08645c6..0c24151d9d 100644 --- a/core/embed/rust/src/ui/shape/cache/jpeg_cache.rs +++ b/core/embed/rust/src/ui/shape/cache/jpeg_cache.rs @@ -196,7 +196,7 @@ impl<'a> JpegCache<'a> { offset_y += row_canvas.height() - offset_y % row_canvas.height(); } } else { - // Create a new row for cahing decoded JPEG data + // Create a new row for caching decoded JPEG data // Now there's nobody else holding any reference to canvas_buff so // we can get a mutable reference and pass it to a new instance // of Rgb565Canvas diff --git a/core/embed/rust/src/ui/shape/cache/mod.rs b/core/embed/rust/src/ui/shape/cache/mod.rs index 0717577ee7..fb4a64c8bc 100644 --- a/core/embed/rust/src/ui/shape/cache/mod.rs +++ b/core/embed/rust/src/ui/shape/cache/mod.rs @@ -1,7 +1,7 @@ pub mod blur_cache; pub mod drawing_cache; -#[cfg(feature = "ui_jpeg_decoder")] +#[cfg(all(feature = "ui_jpeg", not(feature = "hw_jpeg_decoder")))] pub mod jpeg_cache; pub mod zlib_cache; diff --git a/core/embed/rust/src/ui/shape/jpeg.rs b/core/embed/rust/src/ui/shape/jpeg.rs index 63f6c13fdb..f72d645800 100644 --- a/core/embed/rust/src/ui/shape/jpeg.rs +++ b/core/embed/rust/src/ui/shape/jpeg.rs @@ -6,7 +6,13 @@ use crate::{ }, }; -use super::{Bitmap, BitmapFormat, BitmapView, Canvas, DrawingCache, Renderer, Shape, ShapeClone}; +use super::{Canvas, DrawingCache, Renderer, Shape, ShapeClone}; + +#[cfg(not(feature = "hw_jpeg_decoder"))] +use super::{Bitmap, BitmapFormat, BitmapView}; + +#[cfg(feature = "hw_jpeg_decoder")] +use crate::{trezorhal::jpegdec::JpegDecoder, ui::display::Color}; use without_alloc::alloc::LocalAllocLeakExt; @@ -24,11 +30,12 @@ pub struct JpegImage<'a> { blur_radius: usize, /// Dimming of blurred image in range of 0..255 (default 255) dim: u8, + /// Final size calculated from JPEG headers + size: Offset, /// Set if blurring is pending /// (used only during image drawing). + #[cfg(not(feature = "hw_jpeg_decoder"))] blur_tag: Option, - /// Final size calculated from TOIF data - size: Offset, } impl<'a> JpegImage<'a> { @@ -36,12 +43,14 @@ impl<'a> JpegImage<'a> { JpegImage { pos, align: Alignment2D::TOP_LEFT, - scale: 0, - dim: 255, - blur_radius: 0, jpeg, - blur_tag: None, + scale: 0, + blur_radius: 0, + dim: 255, + size: Offset::zero(), + #[cfg(not(feature = "hw_jpeg_decoder"))] + blur_tag: None, } } @@ -88,38 +97,51 @@ impl<'a> Shape<'a> for JpegImage<'a> { } fn cleanup(&mut self, _cache: &DrawingCache<'a>) { - self.blur_tag = None; + #[cfg(not(feature = "hw_jpeg_decoder"))] + { + self.blur_tag = None; + } } - /* - // Faster implementation suitable for DirectRenderer without blurring support - // (but is terribly slow on ProgressiveRenderer if slices are not aligned - // to JPEG MCUs ) - fn draw(&mut self, canvas: &mut dyn RgbCanvasEx, cache: &DrawingCache<'a>) { - let bounds = self.bounds(cache); + #[cfg(feature = "hw_jpeg_decoder")] + fn draw(&mut self, canvas: &mut dyn Canvas, cache: &DrawingCache<'a>) { + let bounds = self.bounds(); let clip = canvas.viewport().relative_clip(bounds).clip; - // translate clip to JPEG relative coordinates + let vp = canvas.set_clip(clip); + + // Translate clip to JPEG relative coordinates let clip = clip.translate(-canvas.viewport().origin); let clip = clip.translate((-bounds.top_left()).into()); - unwrap!( - cache.jpeg().decompress_mcu( - self.jpeg, - self.scale, - clip.top_left(), - &mut |mcu_r, mcu_bitmap| { - // Draw single MCU - canvas.draw_bitmap(mcu_r.translate(bounds.top_left().into()), mcu_bitmap); - // Return true if we are not done yet - mcu_r.x1 < clip.x1 || mcu_r.y1 < clip.y1 - } - ), - "Invalid JPEG" - ); - }*/ + // Get temporary buffer for image decoding + let buff = &mut unwrap!(cache.image_buff(), "No image buffer"); + + let mut jpegdec = unwrap!(JpegDecoder::new(self.jpeg)); + let _ = jpegdec.decode(&mut buff[..], &mut |slice_r, slice| { + // Draw single slice + canvas.draw_bitmap(slice_r.translate(bounds.top_left().into()), slice); + // Return true if we are not done yet + slice_r.x1 < clip.x1 || slice_r.y1 < clip.y1 + }); + + if self.dim < 255 { + // Draw dimmed overlay. + // This solution is suboptimal and might be replaced by + // using faster alpha blending in the hardware. + canvas.fill_rect(clip, Color::black(), 255 - self.dim); + } + + if self.blur_radius > 0 { + // Blur the image + canvas.blur_rect(clip, self.blur_radius, cache); + } + + canvas.set_viewport(vp); + } // This is a little bit slower implementation suitable for ProgressiveRenderer + #[cfg(not(feature = "hw_jpeg_decoder"))] fn draw(&mut self, canvas: &mut dyn Canvas, cache: &DrawingCache<'a>) { let bounds = self.bounds(); let clip = canvas.viewport().relative_clip(bounds).clip; diff --git a/core/embed/rust/src/ui/shape/mod.rs b/core/embed/rust/src/ui/shape/mod.rs index 16879a0f48..9813a1fe05 100644 --- a/core/embed/rust/src/ui/shape/mod.rs +++ b/core/embed/rust/src/ui/shape/mod.rs @@ -8,7 +8,7 @@ mod canvas; mod circle; mod corner_highlight; mod display; -#[cfg(feature = "ui_jpeg_decoder")] +#[cfg(feature = "ui_jpeg")] mod jpeg; #[cfg(not(feature = "framebuffer"))] mod progressive_render; @@ -31,7 +31,7 @@ pub use canvas::{ pub use circle::Circle; pub use corner_highlight::CornerHighlight; pub use display::{render_on_canvas, render_on_display, unlock_bumps_on_failure, ConcreteRenderer}; -#[cfg(feature = "ui_jpeg_decoder")] +#[cfg(feature = "ui_jpeg")] pub use jpeg::JpegImage; #[cfg(not(feature = "framebuffer"))] pub use progressive_render::ProgressiveRenderer; diff --git a/core/embed/rust/trezorhal.h b/core/embed/rust/trezorhal.h index 6e00aac760..7a5200249c 100644 --- a/core/embed/rust/trezorhal.h +++ b/core/embed/rust/trezorhal.h @@ -13,6 +13,10 @@ #include #include "storage.h" +#ifdef USE_HW_JPEG_DECODER +#include +#endif + #ifdef USE_BLE #include #endif diff --git a/core/embed/sys/syscall/stm32/syscall_dispatch.c b/core/embed/sys/syscall/stm32/syscall_dispatch.c index b8f9445055..7843ea0e1a 100644 --- a/core/embed/sys/syscall/stm32/syscall_dispatch.c +++ b/core/embed/sys/syscall/stm32/syscall_dispatch.c @@ -51,6 +51,10 @@ #include #endif +#ifdef USE_HW_JPEG_DECODER +#include +#endif + #ifdef USE_OPTIGA #include #endif @@ -724,6 +728,31 @@ __attribute((no_stack_protector)) void syscall_handler(uint32_t *args, } break; #endif +#ifdef USE_HW_JPEG_DECODER + case SYSCALL_JPEGDEC_OPEN: { + args[0] = jpegdec_open(); + } break; + + case SYSCALL_JPEGDEC_CLOSE: { + jpegdec_close(); + } break; + + case SYSCALL_JPEGDEC_PROCESS: { + args[0] = jpegdec_process__verified((jpegdec_input_t *)args[0]); + } break; + + case SYSCALL_JPEGDEC_GET_INFO: { + args[0] = jpegdec_get_info__verified((jpegdec_image_t *)args[0]); + break; + } + + case SYSCALL_JPEGDEC_GET_SLICE_RGBA8888: { + args[0] = jpegdec_get_slice_rgba8888__verified( + (void *)args[0], (jpegdec_slice_t *)args[1]); + break; + } +#endif // USE_HW_JPEG_DECODER + default: system_exit_fatal("Invalid syscall", __FILE__, __LINE__); break; diff --git a/core/embed/sys/syscall/stm32/syscall_numbers.h b/core/embed/sys/syscall/stm32/syscall_numbers.h index 2edef3d3f3..11ad1a5748 100644 --- a/core/embed/sys/syscall/stm32/syscall_numbers.h +++ b/core/embed/sys/syscall/stm32/syscall_numbers.h @@ -17,8 +17,7 @@ * along with this program. If not, see . */ -#ifndef SYSCALL_NUMBERS_H -#define SYSCALL_NUMBERS_H +#pragma once // Syscall identifiers typedef enum { @@ -149,6 +148,10 @@ typedef enum { SYSCALL_POWERCTL_SUSPEND, -} syscall_number_t; + SYSCALL_JPEGDEC_OPEN, + SYSCALL_JPEGDEC_CLOSE, + SYSCALL_JPEGDEC_PROCESS, + SYSCALL_JPEGDEC_GET_INFO, + SYSCALL_JPEGDEC_GET_SLICE_RGBA8888, -#endif // SYSCALL_NUMBERS_H +} syscall_number_t; diff --git a/core/embed/sys/syscall/stm32/syscall_stubs.c b/core/embed/sys/syscall/stm32/syscall_stubs.c index f7048cacf6..1a02d3881c 100644 --- a/core/embed/sys/syscall/stm32/syscall_stubs.c +++ b/core/embed/sys/syscall/stm32/syscall_stubs.c @@ -687,4 +687,40 @@ void powerctl_suspend(void) { syscall_invoke0(SYSCALL_POWERCTL_SUSPEND); } #endif // USE_POWERCTL +// ============================================================================= +// jpegdec.h +// ============================================================================= + +#ifdef USE_HW_JPEG_DECODER + +#include + +bool jpegdec_open(void) { return (bool)syscall_invoke0(SYSCALL_JPEGDEC_OPEN); } + +void jpegdec_close(void) { + { + // Temporary hack to fix the problem with dual DMA2D driver in + // user/kernel space - will be removed in the future with DMA2D syscalls + extern void dma2d_invalidate_clut(); + dma2d_invalidate_clut(); + } + syscall_invoke0(SYSCALL_JPEGDEC_CLOSE); +} + +jpegdec_state_t jpegdec_process(jpegdec_input_t *input) { + return (jpegdec_state_t)syscall_invoke1((uint32_t)input, + SYSCALL_JPEGDEC_PROCESS); +} + +bool jpegdec_get_info(jpegdec_image_t *info) { + return (bool)syscall_invoke1((uint32_t)info, SYSCALL_JPEGDEC_GET_INFO); +} + +bool jpegdec_get_slice_rgba8888(uint32_t *rgba8888, jpegdec_slice_t *slice) { + return (bool)syscall_invoke2((uint32_t)rgba8888, (uint32_t)slice, + SYSCALL_JPEGDEC_GET_SLICE_RGBA8888); +} + +#endif // USE_HW_JPEG_DECODER + #endif // KERNEL_MODE diff --git a/core/embed/sys/syscall/stm32/syscall_verifiers.c b/core/embed/sys/syscall/stm32/syscall_verifiers.c index 925acde30b..3909e2cab0 100644 --- a/core/embed/sys/syscall/stm32/syscall_verifiers.c +++ b/core/embed/sys/syscall/stm32/syscall_verifiers.c @@ -781,4 +781,48 @@ access_violation: } #endif +// --------------------------------------------------------------------- + +#ifdef USE_HW_JPEG_DECODER + +jpegdec_state_t jpegdec_process__verified(jpegdec_input_t *input) { + if (!probe_write_access(input, sizeof(*input))) { + goto access_violation; + } + + return jpegdec_process(input); + +access_violation: + return JPEGDEC_STATE_ERROR; +} + +bool jpegdec_get_info__verified(jpegdec_image_t *image) { + if (!probe_write_access(image, sizeof(*image))) { + goto access_violation; + } + + return jpegdec_get_info(image); + +access_violation: + return false; +} + +bool jpegdec_get_slice_rgba8888__verified(void *rgba8888, + jpegdec_slice_t *slice) { + if (!probe_write_access(rgba8888, JPEGDEC_RGBA8888_BUFFER_SIZE)) { + goto access_violation; + } + + if (!probe_write_access(slice, sizeof(*slice))) { + goto access_violation; + } + + return jpegdec_get_slice_rgba8888(rgba8888, slice); + +access_violation: + return false; +} + +#endif // USE_HW_JPEG_DECODER + #endif // SYSCALL_DISPATCH diff --git a/core/embed/sys/syscall/stm32/syscall_verifiers.h b/core/embed/sys/syscall/stm32/syscall_verifiers.h index b1b2d9287a..7b9b31e71e 100644 --- a/core/embed/sys/syscall/stm32/syscall_verifiers.h +++ b/core/embed/sys/syscall/stm32/syscall_verifiers.h @@ -17,8 +17,7 @@ * along with this program. If not, see . */ -#ifndef TREZORHAL_SYSCALL_VERIFIERS_H -#define TREZORHAL_SYSCALL_VERIFIERS_H +#pragma once #ifdef SYSCALL_DISPATCH @@ -203,6 +202,18 @@ secbool ble_read__verified(uint8_t *data, size_t len); #endif -#endif // SYSCALL_DISPATCH +// --------------------------------------------------------------------- +#ifdef USE_HW_JPEG_DECODER -#endif // TREZORHAL_SYSCALL_VERIFIERS_H +#include + +jpegdec_state_t jpegdec_process__verified(jpegdec_input_t *input); + +bool jpegdec_get_info__verified(jpegdec_image_t *image); + +bool jpegdec_get_slice_rgba8888__verified(void *rgba8888, + jpegdec_slice_t *slice); + +#endif // USE_HW_JPEG_DECODER + +#endif // SYSCALL_DISPATCH diff --git a/core/site_scons/models/T3W1/trezor_t3w1_revA.py b/core/site_scons/models/T3W1/trezor_t3w1_revA.py index fdce74d845..48e7da96f5 100644 --- a/core/site_scons/models/T3W1/trezor_t3w1_revA.py +++ b/core/site_scons/models/T3W1/trezor_t3w1_revA.py @@ -148,6 +148,12 @@ def configure( features_available.append("dma2d") sources += ["embed/gfx/bitblt/stm32/dma2d_bitblt.c"] + defines += ["USE_HW_JPEG_DECODER"] + features_available.append("hw_jpeg_decoder") + sources += [ + "embed/gfx/jpegdec/stm32u5/jpegdec.c", + ] + defines += [ ("USE_HASH_PROCESSOR", "1"), ("USE_STORAGE_HWKEY", "1"), diff --git a/core/site_scons/models/T3W1/trezor_t3w1_revA0.py b/core/site_scons/models/T3W1/trezor_t3w1_revA0.py index 23c81d2af3..6330fa3243 100644 --- a/core/site_scons/models/T3W1/trezor_t3w1_revA0.py +++ b/core/site_scons/models/T3W1/trezor_t3w1_revA0.py @@ -139,12 +139,16 @@ def configure( features_available.append("framebuffer") features_available.append("display_rgb565") - defines += [ - "USE_DMA2D", - ] + defines += ["USE_DMA2D"] features_available.append("dma2d") sources += ["embed/gfx/bitblt/stm32/dma2d_bitblt.c"] + defines += ["USE_HW_JPEG_DECODER"] + features_available.append("hw_jpeg_decoder") + sources += [ + "embed/gfx/jpegdec/stm32u5/jpegdec.c", + ] + defines += [ ("USE_HASH_PROCESSOR", "1"), ("USE_STORAGE_HWKEY", "1"), diff --git a/core/site_scons/models/T3W1/trezor_t3w1_revB.py b/core/site_scons/models/T3W1/trezor_t3w1_revB.py index 2a765279fe..6d8b3e62f2 100644 --- a/core/site_scons/models/T3W1/trezor_t3w1_revB.py +++ b/core/site_scons/models/T3W1/trezor_t3w1_revB.py @@ -148,6 +148,12 @@ def configure( features_available.append("dma2d") sources += ["embed/gfx/bitblt/stm32/dma2d_bitblt.c"] + defines += ["USE_HW_JPEG_DECODER"] + features_available.append("hw_jpeg_decoder") + sources += [ + "embed/gfx/jpegdec/stm32u5/jpegdec.c", + ] + defines += [ ("USE_HASH_PROCESSOR", "1"), ("USE_STORAGE_HWKEY", "1"), diff --git a/core/site_scons/ui/ui_bolt.py b/core/site_scons/ui/ui_bolt.py index be726c0337..5869ec9b87 100644 --- a/core/site_scons/ui/ui_bolt.py +++ b/core/site_scons/ui/ui_bolt.py @@ -22,7 +22,7 @@ def init_ui( add_font("BOLD", "Font_Roboto_Bold_20", defines, sources) if stage == "firmware": rust_features.append("ui_blurring") - rust_features.append("ui_jpeg_decoder") + rust_features.append("ui_jpeg") def get_ui_layout() -> str: diff --git a/core/site_scons/ui/ui_delizia.py b/core/site_scons/ui/ui_delizia.py index 128ccd882f..ba14aa127c 100644 --- a/core/site_scons/ui/ui_delizia.py +++ b/core/site_scons/ui/ui_delizia.py @@ -22,7 +22,7 @@ def init_ui( add_font("BOLD", "Font_TTSatoshi_DemiBold_21", defines, sources) if stage == "firmware": rust_features.append("ui_blurring") - rust_features.append("ui_jpeg_decoder") + rust_features.append("ui_jpeg") rust_features.append("ui_image_buffer") rust_features.append("ui_overlay")