first commit

This commit is contained in:
Maurice 2025-02-13 22:41:18 +01:00
commit deb12ed037
8 changed files with 646 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/target

16
Cargo.lock generated Normal file
View File

@ -0,0 +1,16 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "log"
version = "0.4.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "04cbf5b083de1c7e0222a7a51dbfdba1cbe1c6ab0b15e29fff3f6c077fd9cd9f"
[[package]]
name = "plabble-serializer"
version = "0.1.0"
dependencies = [
"log",
]

7
Cargo.toml Normal file
View File

@ -0,0 +1,7 @@
[package]
name = "plabble-serializer"
version = "0.1.0"
edition = "2021"
[dependencies]
log = "0.4.25"

1
src/lib.rs Normal file
View File

@ -0,0 +1 @@
mod schema;

242
src/schema/deserializer.rs Normal file
View File

@ -0,0 +1,242 @@
use std::collections::BTreeMap;
use super::{
dyn_int, LengthDetermination, OptionalCondition, ParseError, ParseResult, SchemaDataType, ValueDataType
};
impl SchemaDataType {
pub fn parse(
&self,
data: &[u8],
bit_start: usize,
context: Option<&BTreeMap<usize, ValueDataType>>,
) -> Result<ParseResult, ParseError> {
// if bit start > 0, make sure the datatype supports that
if bit_start > 0 {
match self {
SchemaDataType::U4 => (),
SchemaDataType::I4 => (),
SchemaDataType::BitFlag => (),
SchemaDataType::Object(_) => (),
SchemaDataType::Array(..) => (),
_ => return Err(ParseError::InvalidBitStart),
};
}
match self {
SchemaDataType::Array(schema_data_type, length_determination) => {
let length = match length_determination {
LengthDetermination::AvailableDataLength => data.len(),
LengthDetermination::NumberWithKey(nr) => context_get_length(context, *nr)?,
LengthDetermination::Fixed(len) => *len,
};
let mut context = BTreeMap::new();
let mut offset = 0;
let mut bit_start = bit_start;
for key in 0..length {
let parsed =
schema_data_type.parse(&data[offset..], bit_start, Some(&context))?;
offset += parsed.0;
context.insert(key, parsed.1);
bit_start = parsed.2;
}
let items = context.into_values().collect();
Ok(ParseResult(offset, ValueDataType::Array(items), bit_start))
}
SchemaDataType::U4 => {
let start = next_bitstart_and_byte_read(bit_start, 4)?;
if bit_start > 4 { // TODO not sure if this check is reduntant
Err(ParseError::InvalidBitStart)
} else {
let val = u8::from_be_bytes([data[0]]);
let mask = 0xF << bit_start;
let result = (val & mask) >> bit_start;
let bytes_read = if start.1 { 1 } else { 0 };
Ok(ParseResult(bytes_read, ValueDataType::U4(result), start.0))
}
}
SchemaDataType::I4 => {
let start = next_bitstart_and_byte_read(bit_start, 4)?;
if bit_start > 4 { // TODO not sure if this check is reduntant
Err(ParseError::InvalidBitStart)
} else {
let val = i8::from_be_bytes([data[0]]);
let mask = 0xF << bit_start;
let result = (val & mask) >> bit_start;
let bytes_read = if start.1 { 1 } else { 0 };
Ok(ParseResult(
bytes_read,
ValueDataType::I4(result as i8),
start.0,
))
}
}
SchemaDataType::U8 => Ok(ParseResult(1, ValueDataType::U8(data[0]), bit_start)),
SchemaDataType::I8 => Ok(ParseResult(1, ValueDataType::I8(data[0] as i8), bit_start)),
SchemaDataType::U16 => data
.get(0..2)
.and_then(|b| b.try_into().ok())
.map(|bytes| {
ParseResult(2, ValueDataType::U16(u16::from_be_bytes(bytes)), bit_start)
})
.ok_or_else(|| ParseError::NotEnoughBytes(2, data.len())),
SchemaDataType::I16 => data
.get(0..2)
.and_then(|b| b.try_into().ok())
.map(|bytes| {
ParseResult(2, ValueDataType::I16(i16::from_be_bytes(bytes)), bit_start)
})
.ok_or_else(|| ParseError::NotEnoughBytes(2, data.len())),
SchemaDataType::U32 => data
.get(0..4)
.and_then(|b| b.try_into().ok())
.map(|bytes| {
ParseResult(4, ValueDataType::U32(u32::from_be_bytes(bytes)), bit_start)
})
.ok_or_else(|| ParseError::NotEnoughBytes(4, data.len())),
SchemaDataType::I32 => data
.get(0..4)
.and_then(|b| b.try_into().ok())
.map(|bytes| {
ParseResult(4, ValueDataType::I32(i32::from_be_bytes(bytes)), bit_start)
})
.ok_or_else(|| ParseError::NotEnoughBytes(4, data.len())),
SchemaDataType::UDynamic => {
let (val, bytes_read): (u128, usize) = dyn_int::read_from_slice(data)?;
Ok(ParseResult(
bytes_read,
ValueDataType::UDynamic(val),
bit_start,
))
}
SchemaDataType::IDynamic => {
let (val, bytes_read): (u128, usize) = dyn_int::read_from_slice(data)?;
Ok(ParseResult(
bytes_read,
ValueDataType::IDynamic(val as i128),
bit_start,
))
}
SchemaDataType::BitFlag => {
let start = next_bitstart_and_byte_read(bit_start, 1)?;
let bit = data[0] & (1 << bit_start) != 0;
let bytes_read = if start.1 { 1 } else { 0 };
Ok(ParseResult(
bytes_read,
ValueDataType::BitFlag(bit),
start.0,
))
}
SchemaDataType::String(length_determination) => {
let length = match length_determination {
LengthDetermination::AvailableDataLength => data.len(),
LengthDetermination::NumberWithKey(nr) => context_get_length(context, *nr)?,
LengthDetermination::Fixed(len) => *len,
};
let utf8 = data
.get(..length)
.ok_or_else(|| ParseError::NotEnoughBytes(length, data.len()))?
.to_vec();
let str: String = String::from_utf8(utf8).map_err(|_| ParseError::ParsingFailed)?;
Ok(ParseResult(length, ValueDataType::String(str), bit_start))
}
SchemaDataType::Optional(schema_data_type, optional_condition) => {
let available = match optional_condition {
OptionalCondition::IfDataAvailable => data.len() > 0,
OptionalCondition::ByBitFlag(nr) => context_is_flag_set(context, *nr)?,
};
if available {
schema_data_type.parse(data, bit_start, context)
} else {
Ok(ParseResult(0, ValueDataType::Optional(None), bit_start))
}
}
SchemaDataType::Object(map) => {
let mut context = BTreeMap::new();
let mut offset = 0;
let mut bit_start = bit_start;
for (key, schema) in map {
let parsed = schema.parse(&data[offset..], bit_start, Some(&context))?;
offset += parsed.0;
context.insert(*key, parsed.1);
bit_start = parsed.2;
}
Ok(ParseResult(
offset,
ValueDataType::Object(context),
bit_start,
))
}
}
}
}
fn next_bitstart_and_byte_read(
bit_start: usize,
bits_read: usize,
) -> Result<(usize, bool), ParseError> {
let new = bit_start + bits_read;
if new > 8 {
Err(ParseError::InvalidBitStart)
} else if new < 8 {
Ok((new, false))
} else {
Ok((0, true))
}
}
fn context_is_flag_set(
context: Option<&BTreeMap<usize, ValueDataType>>,
slot: usize,
) -> Result<bool, ParseError> {
match context {
Some(context) => match context.get(&slot) {
Some(context) => {
if let ValueDataType::BitFlag(value) = context {
Ok(*value)
} else {
Err(ParseError::InvalidContext(slot))
}
}
None => Err(ParseError::MissingContext(slot)),
},
None => Err(ParseError::NoContextAvailable),
}
}
fn context_get_length(
context: Option<&BTreeMap<usize, ValueDataType>>,
slot: usize,
) -> Result<usize, ParseError> {
match context {
Some(context) => match context.get(&slot) {
Some(context) => match context {
ValueDataType::U4(len) => Ok(*len as usize),
ValueDataType::I4(len) => Ok(*len as usize),
ValueDataType::U8(len) => Ok(*len as usize),
ValueDataType::I8(len) => Ok(*len as usize),
ValueDataType::U16(len) => Ok(*len as usize),
ValueDataType::I16(len) => Ok(*len as usize),
ValueDataType::U32(len) => Ok(*len as usize),
ValueDataType::I32(len) => Ok(*len as usize),
ValueDataType::UDynamic(len) => Ok(*len as usize),
ValueDataType::IDynamic(len) => Ok(*len as usize),
_ => Err(ParseError::InvalidContext(slot)),
},
None => Err(ParseError::MissingContext(slot)),
},
None => Err(ParseError::NoContextAvailable),
}
}

136
src/schema/dyn_int.rs Normal file
View File

@ -0,0 +1,136 @@
use crate::schema::ParseError;
/// Gives encoded size in bytes
///
/// # Arguments
/// * `nr` - number to encode
pub fn encoded_size(nr: u128) -> usize {
let mut res = 0;
let mut nr = nr;
while nr > 0 {
nr /= 128;
res += 1;
}
res
}
/// Encodes a number into a vector of bytes.
///
/// # Arguments
/// * `nr` - number to encode
pub fn encode(nr: u128) -> Vec<u8> {
let mut res = Vec::new();
let mut nr = nr;
while nr > 0 {
let mut encoded = nr % 128;
nr /= 128;
if nr > 0 {
encoded |= 128;
}
res.push(encoded as u8);
}
res
}
/// Decodes a number from a slice of bytes.
///
/// # Arguments
/// * `data` - slice of bytes to decode
pub fn decode(data: &[u8]) -> u128 {
let mut num = 0;
let mut multiplier = 1;
for byte in data {
num += (*byte as u128 & 127) * multiplier;
multiplier *= 128;
}
num
}
/// Decodes a number from a slice of bytes when size of encoded number is unknown, returning the number and the number of bytes read.
///
/// # Arguments
/// * `data` - slice of bytes to decode number from
///
/// # Returns
/// * (number, bytes read)
pub fn read_from_slice(data: &[u8]) -> Result<(u128, usize), ParseError> {
let mut idx = 0;
loop {
if idx > data.len() - 1 {
break Err(ParseError::NotEnoughBytes(idx + 1, data.len()));
}
if (data[idx] & 1 << 7) == 0 {
break Ok((decode(&data[..=idx]), idx + 1));
}
idx += 1;
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn can_encode_decode_number() {
let number = 1234567890;
let encoded = encode(number);
let decoded = decode(&encoded);
assert_eq!(number, decoded);
assert_eq!(5, encoded.len()); // 1234567890 ~ 2^31, 7 bits per byte = 7 * 5 = 35
}
#[test]
fn can_decode_number() {
let nr = &[216u8, 4];
let res = decode(nr);
assert_eq!(600, res);
}
#[test]
fn can_decode_number_from_larger_slice() {
let nr = &[216u8, 4, 234, 19, 74];
let res = read_from_slice(nr).unwrap();
assert_eq!((600, 2), res);
}
#[test]
fn can_decode_number_in_4_bytes() {
let max_nr = 268435455; // max number in 4 bytes
let encoded = encode(max_nr);
assert_eq!(4, encoded.len());
}
#[test]
fn cant_decode_bignr_in_4_bytes() {
let max_nr = 268435456;
let encoded = encode(max_nr);
assert_ne!(4, encoded.len());
}
#[test]
fn cant_decode_slice_that_lies() {
let slice = &[0b10111110]; // slice notes there is a second byte (7th bit, right-to-left), but there's not
let decoded = read_from_slice(slice);
assert!(decoded.is_err());
}
#[test]
fn can_encode_nr_lt_128_in_1_byte() {
let encoded = encode(127);
assert_eq!(1, encoded.len());
}
#[test]
fn can_guess_encoded_size() {
let one_byte = 127;
assert_eq!(1, encoded_size(one_byte));
let two_bytes = 128;
assert_eq!(2, encoded_size(two_bytes));
let four_bytes = 268435455;
assert_eq!(4, encoded_size(four_bytes));
}
}

136
src/schema/mod.rs Normal file
View File

@ -0,0 +1,136 @@
use core::str;
use std::collections::BTreeMap;
mod deserializer;
mod serializer;
pub mod dyn_int;
#[derive(Debug)]
pub enum SchemaDataType {
Array(Box<SchemaDataType>, LengthDetermination),
U4,
I4,
U8,
I8,
U16,
I16,
U32,
I32,
UDynamic,
IDynamic,
BitFlag,
String(LengthDetermination),
Optional(Box<SchemaDataType>, OptionalCondition),
Object(BTreeMap<usize, SchemaDataType>),
}
#[derive(Debug)]
pub enum ValueDataType {
Array(Vec<ValueDataType>),
U4(u8),
I4(i8),
U8(u8),
I8(i8),
U16(u16),
I16(i16),
U32(u32),
I32(i32),
UDynamic(u128),
IDynamic(i128),
BitFlag(bool),
String(String),
Optional(Option<Box<ValueDataType>>),
Object(BTreeMap<usize, ValueDataType>),
}
#[derive(Debug)]
pub enum OptionalCondition {
IfDataAvailable,
ByBitFlag(usize),
}
#[derive(Debug)]
pub enum LengthDetermination {
AvailableDataLength,
NumberWithKey(usize),
Fixed(usize),
}
// bytes read, value, new bit start
#[derive(Debug)]
pub struct ParseResult(usize, ValueDataType, usize);
// bytes written, bit start
#[derive(Debug)]
pub struct SerializeResult(usize, usize);
#[derive(Debug)]
pub enum ParseError {
// required, actual
NotEnoughBytes(usize, usize),
InvalidBitStart,
NoContextAvailable,
MissingContext(usize),
InvalidContext(usize),
ParsingFailed,
}
#[derive(Debug)]
pub enum SerializerError {
InvalidBitStart,
InvalidValue
}
#[test]
fn test() {
let data = ValueDataType::Array(vec![
ValueDataType::U4(0),
ValueDataType::I4(-8),
ValueDataType::U4(15),
ValueDataType::I4(7),
ValueDataType::U8(255),
ValueDataType::I8(-127)
]);
let mut buff = Vec::new();
let res = data.serialize(&mut buff, 0);
println!("{:?}", res);
println!("{:?}", buff);
println!("-----");
println!("{:0>8b}", buff[0]);
println!("{:0>8b}", -1i8);
println!("{:0>8b}", 128 as u8);
let schema = SchemaDataType::Object(BTreeMap::from([
(1, SchemaDataType::U4),
(2, SchemaDataType::I4),
(3, SchemaDataType::U4),
(4, SchemaDataType::I4),
(5, SchemaDataType::U8),
(6, SchemaDataType::I8)
]));
println!("{:?}", schema);
let res = schema.parse(&buff, 0, None);
println!("{:?}", res);
todo!();
// let schema = SchemaDataType::Object(BTreeMap::from([
// (1, SchemaDataType::U4), // 4
// (2, SchemaDataType::BitFlag), //5
// (3, SchemaDataType::BitFlag), //6
// (4, SchemaDataType::BitFlag), //7
// (5, SchemaDataType::U4), //5
// ]));
// let data = [0b00000000, 1, 0];
// let res = schema.parse(&data, 0, None);
// println!("{:?}", res);
// todo!()
// given bytes and schema should produce filled schema
}

107
src/schema/serializer.rs Normal file
View File

@ -0,0 +1,107 @@
use super::{dyn_int, SerializeResult, SerializerError, ValueDataType};
impl ValueDataType {
pub fn serialize(&self, buffer: &mut Vec<u8>, bit_start: usize) -> Result<SerializeResult, SerializerError> {
let mut len: usize = buffer.len();
match self {
ValueDataType::Array(arr) => {
let mut bit_start = bit_start;
let mut bytes_written = 0;
for item in arr {
let res = item.serialize(buffer, bit_start)?;
bytes_written += res.0;
bit_start = res.1;
}
Ok(SerializeResult(bytes_written, bit_start))
},
ValueDataType::U4(v) => {
let (next_bit_start, bytes_written) = next_bitstart_and_byte_written(bit_start, 4)?;
if *v > 0xF {
return Err(SerializerError::InvalidValue);
}
if bit_start == 0 {
buffer.push(0); // Add an empty byte to write to
len += 1;
}
let mask = 0xF << bit_start;
buffer[len - 1] &= !mask; // Clear the bits where we are writing
buffer[len - 1] |= (*v << bit_start) & mask;
Ok(SerializeResult(bytes_written, next_bit_start))
},
ValueDataType::I4(v) => {
let (next_bit_start, bytes_written) = next_bitstart_and_byte_written(bit_start, 4)?;
if *v < -8 || *v > 7 {
return Err(SerializerError::InvalidValue); // Only values -8 to 7 are valid for 4 bits
}
if bit_start == 0 {
buffer.push(0); // Add an empty byte to write to
len += 1;
}
// Write the value to the correct position
let mask = 0xF << bit_start;
buffer[len - 1] &= !mask; // Clear the bits where we are writing
buffer[len - 1] |= ((*v as u8) << bit_start) & mask;
Ok(SerializeResult(bytes_written, next_bit_start))
},
ValueDataType::U8(v) => {
buffer.push(*v);
Ok(SerializeResult(1, bit_start))
},
ValueDataType::I8(v) => {
buffer.push(*v as u8);
Ok(SerializeResult(1, bit_start))
},
ValueDataType::U16(v) => {
buffer.append(&mut v.to_be_bytes().to_vec());
Ok(SerializeResult(2, bit_start))
},
ValueDataType::I16(v) => {
buffer.append(&mut v.to_be_bytes().to_vec());
Ok(SerializeResult(2, bit_start))
},
ValueDataType::U32(v) => {
buffer.append(&mut v.to_be_bytes().to_vec());
Ok(SerializeResult(4, bit_start))
},
ValueDataType::I32(v) => {
buffer.append(&mut v.to_be_bytes().to_vec());
Ok(SerializeResult(4, bit_start))
},
ValueDataType::UDynamic(v) => {
let mut encoded = dyn_int::encode(*v);
buffer.append(&mut encoded);
Ok(SerializeResult(encoded.len(), bit_start))
},
ValueDataType::IDynamic(v) => {
let mut encoded = dyn_int::encode(*v as u128);
buffer.append(&mut encoded);
Ok(SerializeResult(encoded.len(), bit_start))
},
ValueDataType::BitFlag(_) => todo!(),
ValueDataType::String(_) => todo!(),
ValueDataType::Optional(value_data_type) => todo!(),
ValueDataType::Object(btree_map) => todo!(),
}
}
}
fn next_bitstart_and_byte_written(
bit_start: usize,
bits_written: usize,
) -> Result<(usize, usize), SerializerError> {
let new = bit_start + bits_written;
if new > 8 {
Err(SerializerError::InvalidBitStart)
} else if new < 8 {
Ok((new, 0))
} else {
Ok((0, 1))
}
}