1
0
mirror of https://github.com/trezor/trezor-firmware.git synced 2025-01-10 15:30:55 +00:00

refactor(core/rust): more explicit buffer semantics, StrBuffer::alloc

[no changelog]
This commit is contained in:
matejcik 2022-09-05 13:10:14 +02:00 committed by matejcik
parent 5a2e831214
commit 7ba7eff118
3 changed files with 135 additions and 180 deletions

View File

@ -1,152 +1,65 @@
use core::{ use core::{convert::TryFrom, ops::Deref, ptr, slice, str};
convert::TryFrom,
ops::{Deref, DerefMut},
ptr, slice, str,
};
use crate::{error::Error, micropython::obj::Obj}; use crate::{error::Error, micropython::obj::Obj};
use super::ffi; use super::ffi;
/// Represents an immutable slice of bytes stored on the MicroPython heap and /// Represents an immutable UTF-8 string managed by MicroPython GC.
/// owned by values that obey the `MP_BUFFER_READ` buffer protocol, such as /// This either means static data, or a valid GC object.
/// `bytes`, `str`, `bytearray` or `memoryview`.
/// ///
/// # Safety /// # Safety
/// ///
/// In most cases, it is unsound to store `Buffer` values in a GC-unreachable /// We assume that MicroPython is handling strings according to Python
/// location, such as static data. It is also unsound to let the contents be /// semantics, i.e., that string data is immutable.
/// modified while a reference to them is being held. /// Furthermore, we assume that string data is always either static or a GC head
pub struct Buffer { /// pointer, i.e., we can never obtain a pointer into the middle of a GC string.
///
/// Given the above assumptions about MicroPython strings, working with
/// StrBuffers in Rust is safe.
pub struct StrBuffer {
ptr: *const u8, ptr: *const u8,
len: usize, len: usize,
} }
impl Buffer {
pub fn empty() -> Self {
Self::from(b"")
}
}
impl TryFrom<Obj> for Buffer {
type Error = Error;
fn try_from(obj: Obj) -> Result<Self, Self::Error> {
let bufinfo = get_buffer_info(obj, ffi::MP_BUFFER_READ)?;
Ok(Self {
ptr: bufinfo.buf as _,
len: bufinfo.len as _,
})
}
}
impl Default for Buffer {
fn default() -> Self {
Self::empty()
}
}
impl Deref for Buffer {
type Target = [u8];
fn deref(&self) -> &Self::Target {
self.as_ref()
}
}
impl AsRef<[u8]> for Buffer {
fn as_ref(&self) -> &[u8] {
buffer_as_ref(self.ptr, self.len)
}
}
impl From<&'static [u8]> for Buffer {
fn from(val: &'static [u8]) -> Self {
Buffer {
ptr: val.as_ptr(),
len: val.len(),
}
}
}
impl<const N: usize> From<&'static [u8; N]> for Buffer {
fn from(val: &'static [u8; N]) -> Self {
Buffer {
ptr: val.as_ptr(),
len: val.len(),
}
}
}
/// Represents a mutable slice of bytes stored on the MicroPython heap and
/// owned by values that obey the `MP_BUFFER_WRITE` buffer protocol, such as
/// `bytearray` or `memoryview`.
///
/// # Safety
///
/// In most cases, it is unsound to store `Buffer` values in a GC-unreachable
/// location, such as static data. It is also unsound to let the contents be
/// modified while the reference to them is being held.
pub struct BufferMut {
ptr: *mut u8,
len: usize,
}
impl TryFrom<Obj> for BufferMut {
type Error = Error;
fn try_from(obj: Obj) -> Result<Self, Self::Error> {
let bufinfo = get_buffer_info(obj, ffi::MP_BUFFER_WRITE)?;
Ok(Self {
ptr: bufinfo.buf as _,
len: bufinfo.len as _,
})
}
}
impl Deref for BufferMut {
type Target = [u8];
fn deref(&self) -> &Self::Target {
self.as_ref()
}
}
impl DerefMut for BufferMut {
fn deref_mut(&mut self) -> &mut Self::Target {
self.as_mut()
}
}
impl AsRef<[u8]> for BufferMut {
fn as_ref(&self) -> &[u8] {
buffer_as_ref(self.ptr, self.len)
}
}
impl AsMut<[u8]> for BufferMut {
fn as_mut(&mut self) -> &mut [u8] {
buffer_as_mut(self.ptr, self.len)
}
}
/// Represents an immutable UTF-8 string stored on the MicroPython heap and
/// owned by a `str` object.
///
/// # Safety
///
/// In most cases, it is unsound to store `StrBuffer` values in a GC-unreachable
/// location, such as static data. It is also unsound to let the contents be
/// modified while a reference to them is being held.
#[derive(Default)]
pub struct StrBuffer(Buffer);
impl StrBuffer { impl StrBuffer {
pub fn empty() -> Self { pub fn empty() -> Self {
Self::from("") Self::from("")
} }
pub fn alloc(val: &str) -> Result<Self, Error> {
// SAFETY:
// We assume that if `gc_alloc` returns successfully, the result is a valid
// pointer to GC-controlled memory of at least `val.len() + 1` bytes.
unsafe {
let raw = ffi::gc_alloc(val.len() + 1, 0) as *mut u8;
if raw.is_null() {
return Err(Error::AllocationFailed);
}
// SAFETY: Memory should be freshly allocated and as such cannot overlap.
ptr::copy_nonoverlapping(val.as_ptr(), raw, val.len());
// Null-terminate the string for C ASCIIZ compatibility. This will not be
// reflected in Rust-visible slice, the zero byte is after the end.
raw.add(val.len()).write(0);
Ok(Self {
ptr: raw,
len: val.len(),
})
}
}
fn as_bytes(&self) -> &[u8] {
if self.ptr.is_null() {
&[]
} else {
unsafe { slice::from_raw_parts(self.ptr, self.len) }
}
}
}
impl Default for StrBuffer {
fn default() -> Self {
Self::empty()
}
} }
impl TryFrom<Obj> for StrBuffer { impl TryFrom<Obj> for StrBuffer {
@ -154,7 +67,21 @@ impl TryFrom<Obj> for StrBuffer {
fn try_from(obj: Obj) -> Result<Self, Self::Error> { fn try_from(obj: Obj) -> Result<Self, Self::Error> {
if obj.is_qstr() || unsafe { ffi::mp_type_str.is_type_of(obj) } { if obj.is_qstr() || unsafe { ffi::mp_type_str.is_type_of(obj) } {
Ok(Self(Buffer::try_from(obj)?)) let bufinfo = get_buffer_info(obj, ffi::MP_BUFFER_READ)?;
let new = Self {
ptr: bufinfo.buf as _,
len: bufinfo.len as _,
};
// MicroPython _should_ ensure that values of type `str` are UTF-8.
// Rust seems to be stricter in what it considers UTF-8 though.
// If there is a mismatch, we return an error.
let bytes = new.as_bytes();
if str::from_utf8(bytes).is_err() {
return Err(Error::TypeError);
}
Ok(new)
} else { } else {
Err(Error::TypeError) Err(Error::TypeError)
} }
@ -171,17 +98,21 @@ impl Deref for StrBuffer {
impl AsRef<str> for StrBuffer { impl AsRef<str> for StrBuffer {
fn as_ref(&self) -> &str { fn as_ref(&self) -> &str {
// MicroPython _should_ ensure that values of type `str` are UTF-8. // SAFETY:
// Rust seems to be stricter in what it considers UTF-8 though. // - If constructed from a Rust `&str`, this is safe.
// In case there's a mismatch, this code will cleanly panic // - If constructed from a MicroPython string, we check validity of UTF-8 at
// before attempting to use the data. // construction time. Python semantics promise not to mutate the underlying
unwrap!(str::from_utf8(self.0.as_ref()), "Invalid internal UTF-8.") // data from under us.
unsafe { str::from_utf8_unchecked(self.as_bytes()) }
} }
} }
impl From<&'static str> for StrBuffer { impl From<&'static str> for StrBuffer {
fn from(val: &'static str) -> Self { fn from(val: &'static str) -> Self {
Self(Buffer::from(val.as_bytes())) Self {
ptr: val.as_ptr(),
len: val.len(),
}
} }
} }
@ -192,9 +123,7 @@ fn get_buffer_info(obj: Obj, flags: u32) -> Result<ffi::mp_buffer_info_t, Error>
typecode: 0, typecode: 0,
}; };
// SAFETY: We assume that if `ffi::mp_get_buffer` returns successfully, // SAFETY: We assume that if `ffi::mp_get_buffer` returns successfully,
// `bufinfo.buf` contains a pointer to data of `bufinfo.len` bytes. Later // `bufinfo.buf` contains a pointer to data of `bufinfo.len` bytes.
// we consider this data either GC-allocated or effectively `'static`, embedding
// them in `Buffer`/`BufferMut`.
// EXCEPTION: Does not raise for Micropython's builtin types, and we don't // EXCEPTION: Does not raise for Micropython's builtin types, and we don't
// implement custom buffer protocols. // implement custom buffer protocols.
if unsafe { ffi::mp_get_buffer(obj, &mut bufinfo, flags as _) } { if unsafe { ffi::mp_get_buffer(obj, &mut bufinfo, flags as _) } {
@ -204,36 +133,53 @@ fn get_buffer_info(obj: Obj, flags: u32) -> Result<ffi::mp_buffer_info_t, Error>
} }
} }
fn buffer_as_ref<'a>(ptr: *const u8, len: usize) -> &'a [u8] { /// Get an immutable reference to a buffer from a MicroPython object.
if ptr.is_null() { ///
// `ptr` can be null if len == 0. /// SAFETY:
&[] /// The caller is responsible for ensuring immutability of the returned buffer,
/// in particular that:
/// (a) no mutable reference to the same buffer is held at the same time,
/// (b) the buffer is not modified in MicroPython while the reference to it is
/// being held.
pub unsafe fn get_buffer<'a>(obj: Obj) -> Result<&'a [u8], Error> {
let bufinfo = get_buffer_info(obj, ffi::MP_BUFFER_READ)?;
if bufinfo.buf.is_null() {
// `bufinfo.buf` can be null if len == 0.
Ok(&[])
} else { } else {
// SAFETY: We assume that `ptr` is pointing to memory: // SAFETY: We assume that `bufinfo.buf` is pointing to memory:
// - without any mutable references, // - valid in `'a`
// - valid and immutable in `'a`, // - of at least `bufinfo.len` bytes
// - of at least `len` bytes. // The caller is responsible for ensuring that:
unsafe { slice::from_raw_parts(ptr, len) } // - there are no mutable references
// - that the buffer is immutable in `'a`
Ok(unsafe { slice::from_raw_parts(bufinfo.buf as _, bufinfo.len) })
} }
} }
fn buffer_as_mut<'a>(ptr: *mut u8, len: usize) -> &'a mut [u8] { /// Get a mutable reference to a buffer from a MicroPython object.
if ptr.is_null() { ///
// `ptr` can be null if len == 0. /// SAFETY:
&mut [] /// The caller is responsible for ensuring uniqueness of the mutable reference,
/// in particular that:
/// (a) no other reference to the same buffer is held at the same time,
/// (b) the buffer is not modified in MicroPython while the reference to it is
/// being held.
pub unsafe fn get_buffer_mut<'a>(obj: Obj) -> Result<&'a mut [u8], Error> {
let bufinfo = get_buffer_info(obj, ffi::MP_BUFFER_WRITE)?;
if bufinfo.buf.is_null() {
// `bufinfo.buf` can be null if len == 0.
Ok(&mut [])
} else { } else {
// SAFETY: We assume that `ptr` is pointing to memory: // SAFETY: We assume that `bufinfo.buf` is pointing to memory:
// - without any mutable references,
// - valid and mutable in `'a`, // - valid and mutable in `'a`,
// - of at least `len` bytes. // - of at least `bufinfo.len` bytes.
unsafe { slice::from_raw_parts_mut(ptr, len) } // The caller is responsible for ensuring that:
} // - there are no other references
} // - the buffer is not mutated outside of Rust's control.
Ok(unsafe { slice::from_raw_parts_mut(bufinfo.buf as _, bufinfo.len) })
#[cfg(feature = "ui_debug")]
impl crate::trace::Trace for Buffer {
fn trace(&self, t: &mut dyn crate::trace::Tracer) {
self.as_ref().trace(t)
} }
} }

View File

@ -5,7 +5,7 @@ use core::{
use crate::{ use crate::{
error::Error, error::Error,
micropython::{buffer::Buffer, gc::Gc, list::List, map::Map, obj::Obj, qstr::Qstr, util}, micropython::{buffer, gc::Gc, list::List, map::Map, obj::Obj, qstr::Qstr, util},
}; };
use super::{ use super::{
@ -40,7 +40,6 @@ pub extern "C" fn protobuf_type_for_wire(wire_id: Obj) -> Obj {
#[no_mangle] #[no_mangle]
pub extern "C" fn protobuf_decode(buf: Obj, msg_def: Obj, enable_experimental: Obj) -> Obj { pub extern "C" fn protobuf_decode(buf: Obj, msg_def: Obj, enable_experimental: Obj) -> Obj {
let block = || { let block = || {
let buf = Buffer::try_from(buf)?;
let def = Gc::<MsgDefObj>::try_from(msg_def)?; let def = Gc::<MsgDefObj>::try_from(msg_def)?;
let enable_experimental = bool::try_from(enable_experimental)?; let enable_experimental = bool::try_from(enable_experimental)?;
@ -52,7 +51,11 @@ pub extern "C" fn protobuf_decode(buf: Obj, msg_def: Obj, enable_experimental: O
return Err(error::experimental_not_enabled()); return Err(error::experimental_not_enabled());
} }
let stream = &mut InputStream::new(&buf); // SAFETY:
// We assume that for the lifetime of `buf`, no MicroPython code can run that
// would mutate the buffer, nor pass it to another Rust function.
let buf = unsafe { buffer::get_buffer(buf) }?;
let stream = &mut InputStream::new(buf);
let decoder = Decoder { let decoder = Decoder {
enable_experimental, enable_experimental,
}; };

View File

@ -3,7 +3,7 @@ use core::convert::{TryFrom, TryInto};
use crate::{ use crate::{
error::Error, error::Error,
micropython::{ micropython::{
buffer::{Buffer, BufferMut}, buffer,
gc::Gc, gc::Gc,
iter::{Iter, IterBuf}, iter::{Iter, IterBuf},
list::List, list::List,
@ -36,10 +36,13 @@ pub extern "C" fn protobuf_encode(buf: Obj, obj: Obj) -> Obj {
let block = || { let block = || {
let obj = Gc::<MsgObj>::try_from(obj)?; let obj = Gc::<MsgObj>::try_from(obj)?;
// We assume there are no other refs into `buf` at this point. This specifically // SAFETY:
// means that no fields of `obj` should reference `buf` memory. // We assume that:
let buf = &mut BufferMut::try_from(buf)?; // - there are no other refs into `buf` at this point. This specifically means
let stream = &mut BufferStream::new(buf.as_mut()); // that no fields of `obj` should reference `buf` memory.
// - for the lifetime of `buf`, no Python code will mutate the contents.
let buf = unsafe { buffer::get_buffer_mut(buf)? };
let stream = &mut BufferStream::new(buf);
Encoder.encode_message(stream, &obj.def(), &obj)?; Encoder.encode_message(stream, &obj.def(), &obj)?;
@ -125,7 +128,8 @@ impl Encoder {
let mut len = 0; let mut len = 0;
let iter = Iter::try_from_obj_with_buf(value, &mut iter_buf)?; let iter = Iter::try_from_obj_with_buf(value, &mut iter_buf)?;
for value in iter { for value in iter {
let buffer = Buffer::try_from(value)?; // SAFETY: buffer is dropped immediately.
let buffer = unsafe { buffer::get_buffer(value) }?;
len += buffer.len(); len += buffer.len();
} }
stream.write_uvarint(len as u64)?; stream.write_uvarint(len as u64)?;
@ -133,14 +137,16 @@ impl Encoder {
// Serialize the buffers one-by-one. // Serialize the buffers one-by-one.
let iter = Iter::try_from_obj_with_buf(value, &mut iter_buf)?; let iter = Iter::try_from_obj_with_buf(value, &mut iter_buf)?;
for value in iter { for value in iter {
let buffer = Buffer::try_from(value)?; // SAFETY: buffer is dropped immediately.
stream.write(&buffer)?; let buffer = unsafe { buffer::get_buffer(value) }?;
stream.write(buffer)?;
} }
} else { } else {
// Single length-delimited field. // Single length-delimited field.
let buffer = Buffer::try_from(value)?; // SAFETY: buffer is dropped immediately.
let buffer = unsafe { buffer::get_buffer(value) }?;
stream.write_uvarint(buffer.len() as u64)?; stream.write_uvarint(buffer.len() as u64)?;
stream.write(&buffer)?; stream.write(buffer)?;
} }
} }
FieldType::Msg(msg_type) => { FieldType::Msg(msg_type) => {