2019-07-03 13:07:04 +00:00
|
|
|
"""
|
2017-07-04 16:09:08 +00:00
|
|
|
Extremely minimal streaming codec for a subset of protobuf. Supports uint32,
|
|
|
|
bytes, string, embedded message and repeated fields.
|
2019-07-03 13:07:04 +00:00
|
|
|
"""
|
2017-08-21 11:22:35 +00:00
|
|
|
|
2019-07-03 13:07:04 +00:00
|
|
|
from micropython import const
|
2017-08-21 11:22:35 +00:00
|
|
|
|
2019-07-03 13:07:04 +00:00
|
|
|
if False:
|
|
|
|
from typing import Any, Dict, List, Type, TypeVar
|
|
|
|
from typing_extensions import Protocol
|
2017-08-21 11:22:35 +00:00
|
|
|
|
2019-07-03 13:07:04 +00:00
|
|
|
class AsyncReader(Protocol):
|
|
|
|
async def areadinto(self, buf: bytearray) -> int:
|
|
|
|
"""
|
|
|
|
Reads `len(buf)` bytes into `buf`, or raises `EOFError`.
|
|
|
|
"""
|
2017-08-21 11:22:35 +00:00
|
|
|
|
2019-07-03 13:07:04 +00:00
|
|
|
class AsyncWriter(Protocol):
|
|
|
|
async def awrite(self, buf: bytes) -> int:
|
|
|
|
"""
|
|
|
|
Writes all bytes from `buf`, or raises `EOFError`.
|
|
|
|
"""
|
2016-09-21 12:14:49 +00:00
|
|
|
|
2016-09-29 10:29:43 +00:00
|
|
|
|
2017-07-04 16:09:08 +00:00
|
|
|
_UVARINT_BUFFER = bytearray(1)
|
2016-09-21 12:14:49 +00:00
|
|
|
|
|
|
|
|
2019-07-03 13:07:04 +00:00
|
|
|
async def load_uvarint(reader: AsyncReader) -> int:
|
2017-07-04 16:09:08 +00:00
|
|
|
buffer = _UVARINT_BUFFER
|
|
|
|
result = 0
|
|
|
|
shift = 0
|
|
|
|
byte = 0x80
|
|
|
|
while byte & 0x80:
|
2017-08-15 13:09:09 +00:00
|
|
|
await reader.areadinto(buffer)
|
2017-07-04 16:09:08 +00:00
|
|
|
byte = buffer[0]
|
|
|
|
result += (byte & 0x7F) << shift
|
|
|
|
shift += 7
|
|
|
|
return result
|
2016-09-21 12:14:49 +00:00
|
|
|
|
2016-10-14 13:29:14 +00:00
|
|
|
|
2019-07-03 13:07:04 +00:00
|
|
|
async def dump_uvarint(writer: AsyncWriter, n: int) -> None:
|
2018-05-09 11:25:07 +00:00
|
|
|
if n < 0:
|
|
|
|
raise ValueError("Cannot dump signed value, convert it to unsigned first.")
|
2017-07-04 16:09:08 +00:00
|
|
|
buffer = _UVARINT_BUFFER
|
2019-07-03 13:07:04 +00:00
|
|
|
shifted = 1
|
2017-07-04 16:09:08 +00:00
|
|
|
while shifted:
|
|
|
|
shifted = n >> 7
|
|
|
|
buffer[0] = (n & 0x7F) | (0x80 if shifted else 0x00)
|
2017-08-15 13:09:09 +00:00
|
|
|
await writer.awrite(buffer)
|
2017-07-04 16:09:08 +00:00
|
|
|
n = shifted
|
2016-10-05 10:24:55 +00:00
|
|
|
|
|
|
|
|
2019-07-03 13:07:04 +00:00
|
|
|
def count_uvarint(n: int) -> int:
|
2018-10-01 09:31:45 +00:00
|
|
|
if n < 0:
|
|
|
|
raise ValueError("Cannot dump signed value, convert it to unsigned first.")
|
2018-10-26 13:14:23 +00:00
|
|
|
if n <= 0x7F:
|
2018-10-01 09:31:45 +00:00
|
|
|
return 1
|
2018-10-26 13:14:23 +00:00
|
|
|
if n <= 0x3FFF:
|
2018-10-01 09:31:45 +00:00
|
|
|
return 2
|
2018-10-26 13:14:23 +00:00
|
|
|
if n <= 0x1FFFFF:
|
2018-10-01 09:31:45 +00:00
|
|
|
return 3
|
2018-10-26 13:14:23 +00:00
|
|
|
if n <= 0xFFFFFFF:
|
2018-10-01 09:31:45 +00:00
|
|
|
return 4
|
2018-10-26 13:14:23 +00:00
|
|
|
if n <= 0x7FFFFFFFF:
|
2018-10-01 09:31:45 +00:00
|
|
|
return 5
|
2018-10-26 13:14:23 +00:00
|
|
|
if n <= 0x3FFFFFFFFFF:
|
2018-10-01 09:31:45 +00:00
|
|
|
return 6
|
2018-10-26 13:14:23 +00:00
|
|
|
if n <= 0x1FFFFFFFFFFFF:
|
2018-10-01 09:31:45 +00:00
|
|
|
return 7
|
2018-10-26 13:14:23 +00:00
|
|
|
if n <= 0xFFFFFFFFFFFFFF:
|
2018-10-01 09:31:45 +00:00
|
|
|
return 8
|
2018-10-26 13:14:23 +00:00
|
|
|
if n <= 0x7FFFFFFFFFFFFFFF:
|
2018-10-01 09:31:45 +00:00
|
|
|
return 9
|
|
|
|
raise ValueError
|
|
|
|
|
|
|
|
|
2018-05-09 11:25:07 +00:00
|
|
|
# protobuf interleaved signed encoding:
|
|
|
|
# https://developers.google.com/protocol-buffers/docs/encoding#structure
|
|
|
|
# the idea is to save the sign in LSbit instead of twos-complement.
|
|
|
|
# so counting up, you go: 0, -1, 1, -2, 2, ... (as the first bit changes, sign flips)
|
|
|
|
#
|
|
|
|
# To achieve this with a twos-complement number:
|
|
|
|
# 1. shift left by 1, leaving LSbit free
|
|
|
|
# 2. if the number is negative, do bitwise negation.
|
|
|
|
# This keeps positive number the same, and converts negative from twos-complement
|
|
|
|
# to the appropriate value, while setting the sign bit.
|
|
|
|
#
|
|
|
|
# The original algorithm makes use of the fact that arithmetic (signed) shift
|
|
|
|
# keeps the sign bits, so for a n-bit number, (x >> n) gets us "all sign bits".
|
|
|
|
# Then you can take "number XOR all-sign-bits", which is XOR 0 (identity) for positive
|
|
|
|
# and XOR 1 (bitwise negation) for negative. Cute and efficient.
|
|
|
|
#
|
|
|
|
# But this is harder in Python because we don't natively know the bit size of the number.
|
|
|
|
# So we have to branch on whether the number is negative.
|
|
|
|
|
2018-07-03 14:20:58 +00:00
|
|
|
|
2019-07-03 13:07:04 +00:00
|
|
|
def sint_to_uint(sint: int) -> int:
|
2018-05-09 11:25:07 +00:00
|
|
|
res = sint << 1
|
|
|
|
if sint < 0:
|
|
|
|
res = ~res
|
|
|
|
return res
|
|
|
|
|
|
|
|
|
2019-07-03 13:07:04 +00:00
|
|
|
def uint_to_sint(uint: int) -> int:
|
2018-05-09 11:25:07 +00:00
|
|
|
sign = uint & 1
|
|
|
|
res = uint >> 1
|
|
|
|
if sign:
|
|
|
|
res = ~res
|
|
|
|
return res
|
2016-04-07 21:45:10 +00:00
|
|
|
|
|
|
|
|
2018-05-09 11:25:07 +00:00
|
|
|
class UVarintType:
|
2018-01-30 14:11:09 +00:00
|
|
|
WIRE_TYPE = 0
|
|
|
|
|
|
|
|
|
2018-05-09 11:25:07 +00:00
|
|
|
class SVarintType:
|
2018-01-30 14:11:09 +00:00
|
|
|
WIRE_TYPE = 0
|
|
|
|
|
|
|
|
|
2017-07-04 16:09:08 +00:00
|
|
|
class BoolType:
|
|
|
|
WIRE_TYPE = 0
|
2016-08-05 10:35:45 +00:00
|
|
|
|
2016-04-07 21:45:10 +00:00
|
|
|
|
2017-07-04 16:09:08 +00:00
|
|
|
class BytesType:
|
2016-10-26 13:38:36 +00:00
|
|
|
WIRE_TYPE = 2
|
2016-04-07 21:45:10 +00:00
|
|
|
|
|
|
|
|
2017-07-04 16:09:08 +00:00
|
|
|
class UnicodeType:
|
2016-04-07 21:45:10 +00:00
|
|
|
WIRE_TYPE = 2
|
|
|
|
|
2016-09-21 12:14:49 +00:00
|
|
|
|
2017-07-04 16:09:08 +00:00
|
|
|
class MessageType:
|
2016-10-26 13:38:36 +00:00
|
|
|
WIRE_TYPE = 2
|
2018-09-18 12:13:21 +00:00
|
|
|
|
2019-07-03 13:07:04 +00:00
|
|
|
# Type id for the wire codec.
|
|
|
|
# Technically, not every protobuf message has this.
|
|
|
|
MESSAGE_WIRE_TYPE = -1
|
|
|
|
|
2018-09-18 12:13:21 +00:00
|
|
|
@classmethod
|
2019-07-03 13:07:04 +00:00
|
|
|
def get_fields(cls) -> Dict:
|
2018-09-18 12:13:21 +00:00
|
|
|
return {}
|
2016-10-06 12:50:15 +00:00
|
|
|
|
2019-07-03 13:07:04 +00:00
|
|
|
def __init__(self, **kwargs: Any) -> None:
|
2016-11-08 17:49:58 +00:00
|
|
|
for kw in kwargs:
|
|
|
|
setattr(self, kw, kwargs[kw])
|
|
|
|
|
2019-07-03 13:07:04 +00:00
|
|
|
def __eq__(self, rhs: Any) -> bool:
|
2018-07-03 14:20:58 +00:00
|
|
|
return self.__class__ is rhs.__class__ and self.__dict__ == rhs.__dict__
|
2016-11-08 17:49:58 +00:00
|
|
|
|
2019-07-03 13:07:04 +00:00
|
|
|
def __repr__(self) -> str:
|
2018-07-03 14:20:58 +00:00
|
|
|
return "<%s>" % self.__class__.__name__
|
2016-11-08 17:49:58 +00:00
|
|
|
|
2017-07-04 16:09:08 +00:00
|
|
|
|
|
|
|
class LimitedReader:
|
2019-07-03 13:07:04 +00:00
|
|
|
def __init__(self, reader: AsyncReader, limit: int) -> None:
|
2017-07-04 16:09:08 +00:00
|
|
|
self.reader = reader
|
|
|
|
self.limit = limit
|
|
|
|
|
2019-07-03 13:07:04 +00:00
|
|
|
async def areadinto(self, buf: bytearray) -> int:
|
2017-07-04 16:09:08 +00:00
|
|
|
if self.limit < len(buf):
|
|
|
|
raise EOFError
|
|
|
|
else:
|
2017-08-15 13:09:09 +00:00
|
|
|
nread = await self.reader.areadinto(buf)
|
2017-07-04 16:09:08 +00:00
|
|
|
self.limit -= nread
|
|
|
|
return nread
|
|
|
|
|
|
|
|
|
|
|
|
FLAG_REPEATED = const(1)
|
|
|
|
|
2019-07-03 13:07:04 +00:00
|
|
|
if False:
|
|
|
|
LoadedMessageType = TypeVar("LoadedMessageType", bound=MessageType)
|
|
|
|
|
2017-07-04 16:09:08 +00:00
|
|
|
|
2019-07-03 13:07:04 +00:00
|
|
|
async def load_message(
|
|
|
|
reader: AsyncReader, msg_type: Type[LoadedMessageType]
|
|
|
|
) -> LoadedMessageType:
|
2018-09-18 12:13:21 +00:00
|
|
|
fields = msg_type.get_fields()
|
2017-07-04 16:09:08 +00:00
|
|
|
msg = msg_type()
|
|
|
|
|
|
|
|
while True:
|
2016-09-21 12:14:49 +00:00
|
|
|
try:
|
2017-07-04 16:09:08 +00:00
|
|
|
fkey = await load_uvarint(reader)
|
|
|
|
except EOFError:
|
|
|
|
break # no more fields to load
|
|
|
|
|
|
|
|
ftag = fkey >> 3
|
|
|
|
wtype = fkey & 7
|
|
|
|
|
|
|
|
field = fields.get(ftag, None)
|
|
|
|
|
|
|
|
if field is None: # unknown field, skip it
|
|
|
|
if wtype == 0:
|
|
|
|
await load_uvarint(reader)
|
|
|
|
elif wtype == 2:
|
|
|
|
ivalue = await load_uvarint(reader)
|
2017-08-15 13:09:09 +00:00
|
|
|
await reader.areadinto(bytearray(ivalue))
|
2017-07-04 16:09:08 +00:00
|
|
|
else:
|
|
|
|
raise ValueError
|
|
|
|
continue
|
|
|
|
|
|
|
|
fname, ftype, fflags = field
|
|
|
|
if wtype != ftype.WIRE_TYPE:
|
|
|
|
raise TypeError # parsed wire type differs from the schema
|
|
|
|
|
|
|
|
ivalue = await load_uvarint(reader)
|
|
|
|
|
|
|
|
if ftype is UVarintType:
|
|
|
|
fvalue = ivalue
|
2018-05-09 11:25:07 +00:00
|
|
|
elif ftype is SVarintType:
|
|
|
|
fvalue = uint_to_sint(ivalue)
|
2017-07-04 16:09:08 +00:00
|
|
|
elif ftype is BoolType:
|
|
|
|
fvalue = bool(ivalue)
|
|
|
|
elif ftype is BytesType:
|
|
|
|
fvalue = bytearray(ivalue)
|
2017-08-15 13:09:09 +00:00
|
|
|
await reader.areadinto(fvalue)
|
2017-07-04 16:09:08 +00:00
|
|
|
elif ftype is UnicodeType:
|
|
|
|
fvalue = bytearray(ivalue)
|
2017-08-15 13:09:09 +00:00
|
|
|
await reader.areadinto(fvalue)
|
2018-08-03 16:52:20 +00:00
|
|
|
fvalue = bytes(fvalue).decode()
|
2017-07-04 16:09:08 +00:00
|
|
|
elif issubclass(ftype, MessageType):
|
|
|
|
fvalue = await load_message(LimitedReader(reader, ivalue), ftype)
|
|
|
|
else:
|
|
|
|
raise TypeError # field type is unknown
|
|
|
|
|
|
|
|
if fflags & FLAG_REPEATED:
|
|
|
|
pvalue = getattr(msg, fname, [])
|
|
|
|
pvalue.append(fvalue)
|
|
|
|
fvalue = pvalue
|
|
|
|
setattr(msg, fname, fvalue)
|
|
|
|
|
|
|
|
# fill missing fields
|
2018-09-18 12:13:21 +00:00
|
|
|
for tag in fields:
|
|
|
|
field = fields[tag]
|
2017-07-04 16:09:08 +00:00
|
|
|
if not hasattr(msg, field[0]):
|
|
|
|
setattr(msg, field[0], None)
|
|
|
|
|
|
|
|
return msg
|
|
|
|
|
|
|
|
|
2019-07-03 13:07:04 +00:00
|
|
|
async def dump_message(
|
|
|
|
writer: AsyncWriter, msg: MessageType, fields: Dict = None
|
|
|
|
) -> None:
|
2017-07-04 16:09:08 +00:00
|
|
|
repvalue = [0]
|
2018-10-01 09:31:45 +00:00
|
|
|
|
|
|
|
if fields is None:
|
|
|
|
fields = msg.get_fields()
|
2017-07-04 16:09:08 +00:00
|
|
|
|
|
|
|
for ftag in fields:
|
2018-05-09 11:25:07 +00:00
|
|
|
fname, ftype, fflags = fields[ftag]
|
2017-07-04 16:09:08 +00:00
|
|
|
|
|
|
|
fvalue = getattr(msg, fname, None)
|
|
|
|
if fvalue is None:
|
|
|
|
continue
|
|
|
|
|
|
|
|
fkey = (ftag << 3) | ftype.WIRE_TYPE
|
|
|
|
|
|
|
|
if not fflags & FLAG_REPEATED:
|
|
|
|
repvalue[0] = fvalue
|
|
|
|
fvalue = repvalue
|
|
|
|
|
2018-10-01 09:31:45 +00:00
|
|
|
if issubclass(ftype, MessageType):
|
|
|
|
ffields = ftype.get_fields()
|
|
|
|
else:
|
|
|
|
ffields = None
|
|
|
|
|
2017-07-04 16:09:08 +00:00
|
|
|
for svalue in fvalue:
|
|
|
|
await dump_uvarint(writer, fkey)
|
|
|
|
|
|
|
|
if ftype is UVarintType:
|
|
|
|
await dump_uvarint(writer, svalue)
|
|
|
|
|
2018-05-09 11:25:07 +00:00
|
|
|
elif ftype is SVarintType:
|
|
|
|
await dump_uvarint(writer, sint_to_uint(svalue))
|
2018-01-30 14:11:09 +00:00
|
|
|
|
2017-07-04 16:09:08 +00:00
|
|
|
elif ftype is BoolType:
|
|
|
|
await dump_uvarint(writer, int(svalue))
|
|
|
|
|
|
|
|
elif ftype is BytesType:
|
2018-11-02 15:43:17 +00:00
|
|
|
if isinstance(svalue, list):
|
|
|
|
await dump_uvarint(writer, _count_bytes_list(svalue))
|
|
|
|
for sub_svalue in svalue:
|
|
|
|
await writer.awrite(sub_svalue)
|
|
|
|
else:
|
|
|
|
await dump_uvarint(writer, len(svalue))
|
|
|
|
await writer.awrite(svalue)
|
2017-07-04 16:09:08 +00:00
|
|
|
|
|
|
|
elif ftype is UnicodeType:
|
2018-10-01 09:31:45 +00:00
|
|
|
svalue = svalue.encode()
|
|
|
|
await dump_uvarint(writer, len(svalue))
|
|
|
|
await writer.awrite(svalue)
|
2017-07-04 16:09:08 +00:00
|
|
|
|
|
|
|
elif issubclass(ftype, MessageType):
|
2018-10-01 09:31:45 +00:00
|
|
|
await dump_uvarint(writer, count_message(svalue, ffields))
|
|
|
|
await dump_message(writer, svalue, ffields)
|
2017-07-04 16:09:08 +00:00
|
|
|
|
2016-10-26 13:38:36 +00:00
|
|
|
else:
|
2017-07-04 16:09:08 +00:00
|
|
|
raise TypeError
|
2018-10-01 09:31:45 +00:00
|
|
|
|
|
|
|
|
2019-07-03 13:07:04 +00:00
|
|
|
def count_message(msg: MessageType, fields: Dict = None) -> int:
|
2018-10-01 09:31:45 +00:00
|
|
|
nbytes = 0
|
|
|
|
repvalue = [0]
|
|
|
|
|
|
|
|
if fields is None:
|
|
|
|
fields = msg.get_fields()
|
|
|
|
|
|
|
|
for ftag in fields:
|
|
|
|
fname, ftype, fflags = fields[ftag]
|
|
|
|
|
|
|
|
fvalue = getattr(msg, fname, None)
|
|
|
|
if fvalue is None:
|
|
|
|
continue
|
|
|
|
|
|
|
|
fkey = (ftag << 3) | ftype.WIRE_TYPE
|
|
|
|
|
|
|
|
if not fflags & FLAG_REPEATED:
|
|
|
|
repvalue[0] = fvalue
|
|
|
|
fvalue = repvalue
|
|
|
|
|
|
|
|
# length of all the field keys
|
|
|
|
nbytes += count_uvarint(fkey) * len(fvalue)
|
|
|
|
|
|
|
|
if ftype is UVarintType:
|
|
|
|
for svalue in fvalue:
|
|
|
|
nbytes += count_uvarint(svalue)
|
|
|
|
|
|
|
|
elif ftype is SVarintType:
|
|
|
|
for svalue in fvalue:
|
|
|
|
nbytes += count_uvarint(sint_to_uint(svalue))
|
|
|
|
|
|
|
|
elif ftype is BoolType:
|
|
|
|
for svalue in fvalue:
|
|
|
|
nbytes += count_uvarint(int(svalue))
|
|
|
|
|
|
|
|
elif ftype is BytesType:
|
|
|
|
for svalue in fvalue:
|
2018-11-02 15:43:17 +00:00
|
|
|
if isinstance(svalue, list):
|
|
|
|
svalue = _count_bytes_list(svalue)
|
|
|
|
else:
|
|
|
|
svalue = len(svalue)
|
2018-10-01 09:31:45 +00:00
|
|
|
nbytes += count_uvarint(svalue)
|
|
|
|
nbytes += svalue
|
|
|
|
|
|
|
|
elif ftype is UnicodeType:
|
|
|
|
for svalue in fvalue:
|
|
|
|
svalue = len(svalue.encode())
|
|
|
|
nbytes += count_uvarint(svalue)
|
|
|
|
nbytes += svalue
|
|
|
|
|
|
|
|
elif issubclass(ftype, MessageType):
|
|
|
|
ffields = ftype.get_fields()
|
|
|
|
for svalue in fvalue:
|
|
|
|
fsize = count_message(svalue, ffields)
|
|
|
|
nbytes += count_uvarint(fsize)
|
|
|
|
nbytes += fsize
|
|
|
|
del ffields
|
|
|
|
|
|
|
|
else:
|
|
|
|
raise TypeError
|
|
|
|
|
|
|
|
return nbytes
|
2018-11-02 07:10:35 +00:00
|
|
|
|
|
|
|
|
2019-07-03 13:07:04 +00:00
|
|
|
def _count_bytes_list(svalue: List[bytes]) -> int:
|
2018-11-02 07:10:35 +00:00
|
|
|
res = 0
|
|
|
|
for x in svalue:
|
|
|
|
res += len(x)
|
|
|
|
return res
|