1
0
mirror of https://github.com/trezor/trezor-firmware.git synced 2025-01-10 15:30:55 +00:00

refactor(core/rust): more explicit buffer semantics, StrBuffer::alloc

[no changelog]
This commit is contained in:
matejcik 2022-09-05 13:10:14 +02:00 committed by matejcik
parent 5a2e831214
commit 7ba7eff118
3 changed files with 135 additions and 180 deletions

View File

@ -1,152 +1,65 @@
use core::{
convert::TryFrom,
ops::{Deref, DerefMut},
ptr, slice, str,
};
use core::{convert::TryFrom, ops::Deref, ptr, slice, str};
use crate::{error::Error, micropython::obj::Obj};
use super::ffi;
/// Represents an immutable slice of bytes stored on the MicroPython heap and
/// owned by values that obey the `MP_BUFFER_READ` buffer protocol, such as
/// `bytes`, `str`, `bytearray` or `memoryview`.
/// Represents an immutable UTF-8 string managed by MicroPython GC.
/// This either means static data, or a valid GC object.
///
/// # Safety
///
/// In most cases, it is unsound to store `Buffer` values in a GC-unreachable
/// location, such as static data. It is also unsound to let the contents be
/// modified while a reference to them is being held.
pub struct Buffer {
/// We assume that MicroPython is handling strings according to Python
/// semantics, i.e., that string data is immutable.
/// Furthermore, we assume that string data is always either static or a GC head
/// pointer, i.e., we can never obtain a pointer into the middle of a GC string.
///
/// Given the above assumptions about MicroPython strings, working with
/// StrBuffers in Rust is safe.
pub struct StrBuffer {
ptr: *const u8,
len: usize,
}
impl Buffer {
pub fn empty() -> Self {
Self::from(b"")
}
}
impl TryFrom<Obj> for Buffer {
type Error = Error;
fn try_from(obj: Obj) -> Result<Self, Self::Error> {
let bufinfo = get_buffer_info(obj, ffi::MP_BUFFER_READ)?;
Ok(Self {
ptr: bufinfo.buf as _,
len: bufinfo.len as _,
})
}
}
impl Default for Buffer {
fn default() -> Self {
Self::empty()
}
}
impl Deref for Buffer {
type Target = [u8];
fn deref(&self) -> &Self::Target {
self.as_ref()
}
}
impl AsRef<[u8]> for Buffer {
fn as_ref(&self) -> &[u8] {
buffer_as_ref(self.ptr, self.len)
}
}
impl From<&'static [u8]> for Buffer {
fn from(val: &'static [u8]) -> Self {
Buffer {
ptr: val.as_ptr(),
len: val.len(),
}
}
}
impl<const N: usize> From<&'static [u8; N]> for Buffer {
fn from(val: &'static [u8; N]) -> Self {
Buffer {
ptr: val.as_ptr(),
len: val.len(),
}
}
}
/// Represents a mutable slice of bytes stored on the MicroPython heap and
/// owned by values that obey the `MP_BUFFER_WRITE` buffer protocol, such as
/// `bytearray` or `memoryview`.
///
/// # Safety
///
/// In most cases, it is unsound to store `Buffer` values in a GC-unreachable
/// location, such as static data. It is also unsound to let the contents be
/// modified while the reference to them is being held.
pub struct BufferMut {
ptr: *mut u8,
len: usize,
}
impl TryFrom<Obj> for BufferMut {
type Error = Error;
fn try_from(obj: Obj) -> Result<Self, Self::Error> {
let bufinfo = get_buffer_info(obj, ffi::MP_BUFFER_WRITE)?;
Ok(Self {
ptr: bufinfo.buf as _,
len: bufinfo.len as _,
})
}
}
impl Deref for BufferMut {
type Target = [u8];
fn deref(&self) -> &Self::Target {
self.as_ref()
}
}
impl DerefMut for BufferMut {
fn deref_mut(&mut self) -> &mut Self::Target {
self.as_mut()
}
}
impl AsRef<[u8]> for BufferMut {
fn as_ref(&self) -> &[u8] {
buffer_as_ref(self.ptr, self.len)
}
}
impl AsMut<[u8]> for BufferMut {
fn as_mut(&mut self) -> &mut [u8] {
buffer_as_mut(self.ptr, self.len)
}
}
/// Represents an immutable UTF-8 string stored on the MicroPython heap and
/// owned by a `str` object.
///
/// # Safety
///
/// In most cases, it is unsound to store `StrBuffer` values in a GC-unreachable
/// location, such as static data. It is also unsound to let the contents be
/// modified while a reference to them is being held.
#[derive(Default)]
pub struct StrBuffer(Buffer);
impl StrBuffer {
pub fn empty() -> Self {
Self::from("")
}
pub fn alloc(val: &str) -> Result<Self, Error> {
// SAFETY:
// We assume that if `gc_alloc` returns successfully, the result is a valid
// pointer to GC-controlled memory of at least `val.len() + 1` bytes.
unsafe {
let raw = ffi::gc_alloc(val.len() + 1, 0) as *mut u8;
if raw.is_null() {
return Err(Error::AllocationFailed);
}
// SAFETY: Memory should be freshly allocated and as such cannot overlap.
ptr::copy_nonoverlapping(val.as_ptr(), raw, val.len());
// Null-terminate the string for C ASCIIZ compatibility. This will not be
// reflected in Rust-visible slice, the zero byte is after the end.
raw.add(val.len()).write(0);
Ok(Self {
ptr: raw,
len: val.len(),
})
}
}
fn as_bytes(&self) -> &[u8] {
if self.ptr.is_null() {
&[]
} else {
unsafe { slice::from_raw_parts(self.ptr, self.len) }
}
}
}
impl Default for StrBuffer {
fn default() -> Self {
Self::empty()
}
}
impl TryFrom<Obj> for StrBuffer {
@ -154,7 +67,21 @@ impl TryFrom<Obj> for StrBuffer {
fn try_from(obj: Obj) -> Result<Self, Self::Error> {
if obj.is_qstr() || unsafe { ffi::mp_type_str.is_type_of(obj) } {
Ok(Self(Buffer::try_from(obj)?))
let bufinfo = get_buffer_info(obj, ffi::MP_BUFFER_READ)?;
let new = Self {
ptr: bufinfo.buf as _,
len: bufinfo.len as _,
};
// MicroPython _should_ ensure that values of type `str` are UTF-8.
// Rust seems to be stricter in what it considers UTF-8 though.
// If there is a mismatch, we return an error.
let bytes = new.as_bytes();
if str::from_utf8(bytes).is_err() {
return Err(Error::TypeError);
}
Ok(new)
} else {
Err(Error::TypeError)
}
@ -171,17 +98,21 @@ impl Deref for StrBuffer {
impl AsRef<str> for StrBuffer {
fn as_ref(&self) -> &str {
// MicroPython _should_ ensure that values of type `str` are UTF-8.
// Rust seems to be stricter in what it considers UTF-8 though.
// In case there's a mismatch, this code will cleanly panic
// before attempting to use the data.
unwrap!(str::from_utf8(self.0.as_ref()), "Invalid internal UTF-8.")
// SAFETY:
// - If constructed from a Rust `&str`, this is safe.
// - If constructed from a MicroPython string, we check validity of UTF-8 at
// construction time. Python semantics promise not to mutate the underlying
// data from under us.
unsafe { str::from_utf8_unchecked(self.as_bytes()) }
}
}
impl From<&'static str> for StrBuffer {
fn from(val: &'static str) -> Self {
Self(Buffer::from(val.as_bytes()))
Self {
ptr: val.as_ptr(),
len: val.len(),
}
}
}
@ -192,9 +123,7 @@ fn get_buffer_info(obj: Obj, flags: u32) -> Result<ffi::mp_buffer_info_t, Error>
typecode: 0,
};
// SAFETY: We assume that if `ffi::mp_get_buffer` returns successfully,
// `bufinfo.buf` contains a pointer to data of `bufinfo.len` bytes. Later
// we consider this data either GC-allocated or effectively `'static`, embedding
// them in `Buffer`/`BufferMut`.
// `bufinfo.buf` contains a pointer to data of `bufinfo.len` bytes.
// EXCEPTION: Does not raise for Micropython's builtin types, and we don't
// implement custom buffer protocols.
if unsafe { ffi::mp_get_buffer(obj, &mut bufinfo, flags as _) } {
@ -204,36 +133,53 @@ fn get_buffer_info(obj: Obj, flags: u32) -> Result<ffi::mp_buffer_info_t, Error>
}
}
fn buffer_as_ref<'a>(ptr: *const u8, len: usize) -> &'a [u8] {
if ptr.is_null() {
// `ptr` can be null if len == 0.
&[]
/// Get an immutable reference to a buffer from a MicroPython object.
///
/// SAFETY:
/// The caller is responsible for ensuring immutability of the returned buffer,
/// in particular that:
/// (a) no mutable reference to the same buffer is held at the same time,
/// (b) the buffer is not modified in MicroPython while the reference to it is
/// being held.
pub unsafe fn get_buffer<'a>(obj: Obj) -> Result<&'a [u8], Error> {
let bufinfo = get_buffer_info(obj, ffi::MP_BUFFER_READ)?;
if bufinfo.buf.is_null() {
// `bufinfo.buf` can be null if len == 0.
Ok(&[])
} else {
// SAFETY: We assume that `ptr` is pointing to memory:
// - without any mutable references,
// - valid and immutable in `'a`,
// - of at least `len` bytes.
unsafe { slice::from_raw_parts(ptr, len) }
// SAFETY: We assume that `bufinfo.buf` is pointing to memory:
// - valid in `'a`
// - of at least `bufinfo.len` bytes
// The caller is responsible for ensuring that:
// - there are no mutable references
// - that the buffer is immutable in `'a`
Ok(unsafe { slice::from_raw_parts(bufinfo.buf as _, bufinfo.len) })
}
}
fn buffer_as_mut<'a>(ptr: *mut u8, len: usize) -> &'a mut [u8] {
if ptr.is_null() {
// `ptr` can be null if len == 0.
&mut []
/// Get a mutable reference to a buffer from a MicroPython object.
///
/// SAFETY:
/// The caller is responsible for ensuring uniqueness of the mutable reference,
/// in particular that:
/// (a) no other reference to the same buffer is held at the same time,
/// (b) the buffer is not modified in MicroPython while the reference to it is
/// being held.
pub unsafe fn get_buffer_mut<'a>(obj: Obj) -> Result<&'a mut [u8], Error> {
let bufinfo = get_buffer_info(obj, ffi::MP_BUFFER_WRITE)?;
if bufinfo.buf.is_null() {
// `bufinfo.buf` can be null if len == 0.
Ok(&mut [])
} else {
// SAFETY: We assume that `ptr` is pointing to memory:
// - without any mutable references,
// SAFETY: We assume that `bufinfo.buf` is pointing to memory:
// - valid and mutable in `'a`,
// - of at least `len` bytes.
unsafe { slice::from_raw_parts_mut(ptr, len) }
}
}
#[cfg(feature = "ui_debug")]
impl crate::trace::Trace for Buffer {
fn trace(&self, t: &mut dyn crate::trace::Tracer) {
self.as_ref().trace(t)
// - of at least `bufinfo.len` bytes.
// The caller is responsible for ensuring that:
// - there are no other references
// - the buffer is not mutated outside of Rust's control.
Ok(unsafe { slice::from_raw_parts_mut(bufinfo.buf as _, bufinfo.len) })
}
}

View File

@ -5,7 +5,7 @@ use core::{
use crate::{
error::Error,
micropython::{buffer::Buffer, gc::Gc, list::List, map::Map, obj::Obj, qstr::Qstr, util},
micropython::{buffer, gc::Gc, list::List, map::Map, obj::Obj, qstr::Qstr, util},
};
use super::{
@ -40,7 +40,6 @@ pub extern "C" fn protobuf_type_for_wire(wire_id: Obj) -> Obj {
#[no_mangle]
pub extern "C" fn protobuf_decode(buf: Obj, msg_def: Obj, enable_experimental: Obj) -> Obj {
let block = || {
let buf = Buffer::try_from(buf)?;
let def = Gc::<MsgDefObj>::try_from(msg_def)?;
let enable_experimental = bool::try_from(enable_experimental)?;
@ -52,7 +51,11 @@ pub extern "C" fn protobuf_decode(buf: Obj, msg_def: Obj, enable_experimental: O
return Err(error::experimental_not_enabled());
}
let stream = &mut InputStream::new(&buf);
// SAFETY:
// We assume that for the lifetime of `buf`, no MicroPython code can run that
// would mutate the buffer, nor pass it to another Rust function.
let buf = unsafe { buffer::get_buffer(buf) }?;
let stream = &mut InputStream::new(buf);
let decoder = Decoder {
enable_experimental,
};

View File

@ -3,7 +3,7 @@ use core::convert::{TryFrom, TryInto};
use crate::{
error::Error,
micropython::{
buffer::{Buffer, BufferMut},
buffer,
gc::Gc,
iter::{Iter, IterBuf},
list::List,
@ -36,10 +36,13 @@ pub extern "C" fn protobuf_encode(buf: Obj, obj: Obj) -> Obj {
let block = || {
let obj = Gc::<MsgObj>::try_from(obj)?;
// We assume there are no other refs into `buf` at this point. This specifically
// means that no fields of `obj` should reference `buf` memory.
let buf = &mut BufferMut::try_from(buf)?;
let stream = &mut BufferStream::new(buf.as_mut());
// SAFETY:
// We assume that:
// - there are no other refs into `buf` at this point. This specifically means
// that no fields of `obj` should reference `buf` memory.
// - for the lifetime of `buf`, no Python code will mutate the contents.
let buf = unsafe { buffer::get_buffer_mut(buf)? };
let stream = &mut BufferStream::new(buf);
Encoder.encode_message(stream, &obj.def(), &obj)?;
@ -125,7 +128,8 @@ impl Encoder {
let mut len = 0;
let iter = Iter::try_from_obj_with_buf(value, &mut iter_buf)?;
for value in iter {
let buffer = Buffer::try_from(value)?;
// SAFETY: buffer is dropped immediately.
let buffer = unsafe { buffer::get_buffer(value) }?;
len += buffer.len();
}
stream.write_uvarint(len as u64)?;
@ -133,14 +137,16 @@ impl Encoder {
// Serialize the buffers one-by-one.
let iter = Iter::try_from_obj_with_buf(value, &mut iter_buf)?;
for value in iter {
let buffer = Buffer::try_from(value)?;
stream.write(&buffer)?;
// SAFETY: buffer is dropped immediately.
let buffer = unsafe { buffer::get_buffer(value) }?;
stream.write(buffer)?;
}
} else {
// Single length-delimited field.
let buffer = Buffer::try_from(value)?;
// SAFETY: buffer is dropped immediately.
let buffer = unsafe { buffer::get_buffer(value) }?;
stream.write_uvarint(buffer.len() as u64)?;
stream.write(&buffer)?;
stream.write(buffer)?;
}
}
FieldType::Msg(msg_type) => {