From deb12ed03758fd0431920f16fedca3aed47afa19 Mon Sep 17 00:00:00 2001 From: Maurice Date: Thu, 13 Feb 2025 22:41:18 +0100 Subject: [PATCH] first commit --- .gitignore | 1 + Cargo.lock | 16 +++ Cargo.toml | 7 ++ src/lib.rs | 1 + src/schema/deserializer.rs | 242 +++++++++++++++++++++++++++++++++++++ src/schema/dyn_int.rs | 136 +++++++++++++++++++++ src/schema/mod.rs | 136 +++++++++++++++++++++ src/schema/serializer.rs | 107 ++++++++++++++++ 8 files changed, 646 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 src/lib.rs create mode 100644 src/schema/deserializer.rs create mode 100644 src/schema/dyn_int.rs create mode 100644 src/schema/mod.rs create mode 100644 src/schema/serializer.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..761bb1e --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,16 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "log" +version = "0.4.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04cbf5b083de1c7e0222a7a51dbfdba1cbe1c6ab0b15e29fff3f6c077fd9cd9f" + +[[package]] +name = "plabble-serializer" +version = "0.1.0" +dependencies = [ + "log", +] diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..ac55e9b --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "plabble-serializer" +version = "0.1.0" +edition = "2021" + +[dependencies] +log = "0.4.25" diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..6bde67a --- /dev/null +++ b/src/lib.rs @@ -0,0 +1 @@ +mod schema; diff --git a/src/schema/deserializer.rs b/src/schema/deserializer.rs new file mode 100644 index 0000000..d279c6c --- /dev/null +++ b/src/schema/deserializer.rs @@ -0,0 +1,242 @@ +use std::collections::BTreeMap; + +use super::{ + dyn_int, LengthDetermination, OptionalCondition, ParseError, ParseResult, SchemaDataType, ValueDataType +}; + +impl SchemaDataType { + pub fn parse( + &self, + data: &[u8], + bit_start: usize, + context: Option<&BTreeMap>, + ) -> Result { + // if bit start > 0, make sure the datatype supports that + if bit_start > 0 { + match self { + SchemaDataType::U4 => (), + SchemaDataType::I4 => (), + SchemaDataType::BitFlag => (), + SchemaDataType::Object(_) => (), + SchemaDataType::Array(..) => (), + _ => return Err(ParseError::InvalidBitStart), + }; + } + + match self { + SchemaDataType::Array(schema_data_type, length_determination) => { + let length = match length_determination { + LengthDetermination::AvailableDataLength => data.len(), + LengthDetermination::NumberWithKey(nr) => context_get_length(context, *nr)?, + LengthDetermination::Fixed(len) => *len, + }; + + let mut context = BTreeMap::new(); + let mut offset = 0; + let mut bit_start = bit_start; + for key in 0..length { + let parsed = + schema_data_type.parse(&data[offset..], bit_start, Some(&context))?; + offset += parsed.0; + context.insert(key, parsed.1); + bit_start = parsed.2; + } + + let items = context.into_values().collect(); + Ok(ParseResult(offset, ValueDataType::Array(items), bit_start)) + } + SchemaDataType::U4 => { + let start = next_bitstart_and_byte_read(bit_start, 4)?; + if bit_start > 4 { // TODO not sure if this check is reduntant + Err(ParseError::InvalidBitStart) + } else { + let val = u8::from_be_bytes([data[0]]); + let mask = 0xF << bit_start; + let result = (val & mask) >> bit_start; + let bytes_read = if start.1 { 1 } else { 0 }; + Ok(ParseResult(bytes_read, ValueDataType::U4(result), start.0)) + } + } + SchemaDataType::I4 => { + let start = next_bitstart_and_byte_read(bit_start, 4)?; + if bit_start > 4 { // TODO not sure if this check is reduntant + Err(ParseError::InvalidBitStart) + } else { + let val = i8::from_be_bytes([data[0]]); + let mask = 0xF << bit_start; + let result = (val & mask) >> bit_start; + let bytes_read = if start.1 { 1 } else { 0 }; + Ok(ParseResult( + bytes_read, + ValueDataType::I4(result as i8), + start.0, + )) + } + } + SchemaDataType::U8 => Ok(ParseResult(1, ValueDataType::U8(data[0]), bit_start)), + SchemaDataType::I8 => Ok(ParseResult(1, ValueDataType::I8(data[0] as i8), bit_start)), + SchemaDataType::U16 => data + .get(0..2) + .and_then(|b| b.try_into().ok()) + .map(|bytes| { + ParseResult(2, ValueDataType::U16(u16::from_be_bytes(bytes)), bit_start) + }) + .ok_or_else(|| ParseError::NotEnoughBytes(2, data.len())), + + SchemaDataType::I16 => data + .get(0..2) + .and_then(|b| b.try_into().ok()) + .map(|bytes| { + ParseResult(2, ValueDataType::I16(i16::from_be_bytes(bytes)), bit_start) + }) + .ok_or_else(|| ParseError::NotEnoughBytes(2, data.len())), + + SchemaDataType::U32 => data + .get(0..4) + .and_then(|b| b.try_into().ok()) + .map(|bytes| { + ParseResult(4, ValueDataType::U32(u32::from_be_bytes(bytes)), bit_start) + }) + .ok_or_else(|| ParseError::NotEnoughBytes(4, data.len())), + + SchemaDataType::I32 => data + .get(0..4) + .and_then(|b| b.try_into().ok()) + .map(|bytes| { + ParseResult(4, ValueDataType::I32(i32::from_be_bytes(bytes)), bit_start) + }) + .ok_or_else(|| ParseError::NotEnoughBytes(4, data.len())), + + SchemaDataType::UDynamic => { + let (val, bytes_read): (u128, usize) = dyn_int::read_from_slice(data)?; + Ok(ParseResult( + bytes_read, + ValueDataType::UDynamic(val), + bit_start, + )) + } + SchemaDataType::IDynamic => { + let (val, bytes_read): (u128, usize) = dyn_int::read_from_slice(data)?; + Ok(ParseResult( + bytes_read, + ValueDataType::IDynamic(val as i128), + bit_start, + )) + } + SchemaDataType::BitFlag => { + let start = next_bitstart_and_byte_read(bit_start, 1)?; + let bit = data[0] & (1 << bit_start) != 0; + let bytes_read = if start.1 { 1 } else { 0 }; + Ok(ParseResult( + bytes_read, + ValueDataType::BitFlag(bit), + start.0, + )) + } + SchemaDataType::String(length_determination) => { + let length = match length_determination { + LengthDetermination::AvailableDataLength => data.len(), + LengthDetermination::NumberWithKey(nr) => context_get_length(context, *nr)?, + LengthDetermination::Fixed(len) => *len, + }; + + let utf8 = data + .get(..length) + .ok_or_else(|| ParseError::NotEnoughBytes(length, data.len()))? + .to_vec(); + + let str: String = String::from_utf8(utf8).map_err(|_| ParseError::ParsingFailed)?; + + Ok(ParseResult(length, ValueDataType::String(str), bit_start)) + } + SchemaDataType::Optional(schema_data_type, optional_condition) => { + let available = match optional_condition { + OptionalCondition::IfDataAvailable => data.len() > 0, + OptionalCondition::ByBitFlag(nr) => context_is_flag_set(context, *nr)?, + }; + + if available { + schema_data_type.parse(data, bit_start, context) + } else { + Ok(ParseResult(0, ValueDataType::Optional(None), bit_start)) + } + } + SchemaDataType::Object(map) => { + let mut context = BTreeMap::new(); + let mut offset = 0; + let mut bit_start = bit_start; + + for (key, schema) in map { + let parsed = schema.parse(&data[offset..], bit_start, Some(&context))?; + offset += parsed.0; + context.insert(*key, parsed.1); + bit_start = parsed.2; + } + + Ok(ParseResult( + offset, + ValueDataType::Object(context), + bit_start, + )) + } + } + } +} + +fn next_bitstart_and_byte_read( + bit_start: usize, + bits_read: usize, +) -> Result<(usize, bool), ParseError> { + let new = bit_start + bits_read; + if new > 8 { + Err(ParseError::InvalidBitStart) + } else if new < 8 { + Ok((new, false)) + } else { + Ok((0, true)) + } +} + +fn context_is_flag_set( + context: Option<&BTreeMap>, + slot: usize, +) -> Result { + match context { + Some(context) => match context.get(&slot) { + Some(context) => { + if let ValueDataType::BitFlag(value) = context { + Ok(*value) + } else { + Err(ParseError::InvalidContext(slot)) + } + } + None => Err(ParseError::MissingContext(slot)), + }, + None => Err(ParseError::NoContextAvailable), + } +} + +fn context_get_length( + context: Option<&BTreeMap>, + slot: usize, +) -> Result { + match context { + Some(context) => match context.get(&slot) { + Some(context) => match context { + ValueDataType::U4(len) => Ok(*len as usize), + ValueDataType::I4(len) => Ok(*len as usize), + ValueDataType::U8(len) => Ok(*len as usize), + ValueDataType::I8(len) => Ok(*len as usize), + ValueDataType::U16(len) => Ok(*len as usize), + ValueDataType::I16(len) => Ok(*len as usize), + ValueDataType::U32(len) => Ok(*len as usize), + ValueDataType::I32(len) => Ok(*len as usize), + ValueDataType::UDynamic(len) => Ok(*len as usize), + ValueDataType::IDynamic(len) => Ok(*len as usize), + _ => Err(ParseError::InvalidContext(slot)), + }, + None => Err(ParseError::MissingContext(slot)), + }, + None => Err(ParseError::NoContextAvailable), + } +} diff --git a/src/schema/dyn_int.rs b/src/schema/dyn_int.rs new file mode 100644 index 0000000..2513157 --- /dev/null +++ b/src/schema/dyn_int.rs @@ -0,0 +1,136 @@ +use crate::schema::ParseError; + +/// Gives encoded size in bytes +/// +/// # Arguments +/// * `nr` - number to encode +pub fn encoded_size(nr: u128) -> usize { + let mut res = 0; + let mut nr = nr; + while nr > 0 { + nr /= 128; + res += 1; + } + res +} + +/// Encodes a number into a vector of bytes. +/// +/// # Arguments +/// * `nr` - number to encode +pub fn encode(nr: u128) -> Vec { + let mut res = Vec::new(); + let mut nr = nr; + while nr > 0 { + let mut encoded = nr % 128; + nr /= 128; + if nr > 0 { + encoded |= 128; + } + res.push(encoded as u8); + } + res +} + +/// Decodes a number from a slice of bytes. +/// +/// # Arguments +/// * `data` - slice of bytes to decode +pub fn decode(data: &[u8]) -> u128 { + let mut num = 0; + let mut multiplier = 1; + for byte in data { + num += (*byte as u128 & 127) * multiplier; + multiplier *= 128; + } + num +} + +/// Decodes a number from a slice of bytes when size of encoded number is unknown, returning the number and the number of bytes read. +/// +/// # Arguments +/// * `data` - slice of bytes to decode number from +/// +/// # Returns +/// * (number, bytes read) +pub fn read_from_slice(data: &[u8]) -> Result<(u128, usize), ParseError> { + let mut idx = 0; + loop { + if idx > data.len() - 1 { + break Err(ParseError::NotEnoughBytes(idx + 1, data.len())); + } + + if (data[idx] & 1 << 7) == 0 { + break Ok((decode(&data[..=idx]), idx + 1)); + } + + idx += 1; + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn can_encode_decode_number() { + let number = 1234567890; + let encoded = encode(number); + let decoded = decode(&encoded); + assert_eq!(number, decoded); + assert_eq!(5, encoded.len()); // 1234567890 ~ 2^31, 7 bits per byte = 7 * 5 = 35 + } + + #[test] + fn can_decode_number() { + let nr = &[216u8, 4]; + let res = decode(nr); + assert_eq!(600, res); + } + + #[test] + fn can_decode_number_from_larger_slice() { + let nr = &[216u8, 4, 234, 19, 74]; + let res = read_from_slice(nr).unwrap(); + assert_eq!((600, 2), res); + } + + #[test] + fn can_decode_number_in_4_bytes() { + let max_nr = 268435455; // max number in 4 bytes + let encoded = encode(max_nr); + assert_eq!(4, encoded.len()); + } + + #[test] + fn cant_decode_bignr_in_4_bytes() { + let max_nr = 268435456; + let encoded = encode(max_nr); + assert_ne!(4, encoded.len()); + } + + #[test] + fn cant_decode_slice_that_lies() { + let slice = &[0b10111110]; // slice notes there is a second byte (7th bit, right-to-left), but there's not + let decoded = read_from_slice(slice); + assert!(decoded.is_err()); + } + + #[test] + fn can_encode_nr_lt_128_in_1_byte() { + let encoded = encode(127); + assert_eq!(1, encoded.len()); + } + + #[test] + fn can_guess_encoded_size() { + let one_byte = 127; + assert_eq!(1, encoded_size(one_byte)); + + let two_bytes = 128; + assert_eq!(2, encoded_size(two_bytes)); + + let four_bytes = 268435455; + assert_eq!(4, encoded_size(four_bytes)); + } +} diff --git a/src/schema/mod.rs b/src/schema/mod.rs new file mode 100644 index 0000000..053d4ca --- /dev/null +++ b/src/schema/mod.rs @@ -0,0 +1,136 @@ +use core::str; +use std::collections::BTreeMap; + +mod deserializer; +mod serializer; +pub mod dyn_int; + +#[derive(Debug)] +pub enum SchemaDataType { + Array(Box, LengthDetermination), + U4, + I4, + U8, + I8, + U16, + I16, + U32, + I32, + UDynamic, + IDynamic, + BitFlag, + String(LengthDetermination), + Optional(Box, OptionalCondition), + Object(BTreeMap), +} + +#[derive(Debug)] +pub enum ValueDataType { + Array(Vec), + U4(u8), + I4(i8), + U8(u8), + I8(i8), + U16(u16), + I16(i16), + U32(u32), + I32(i32), + UDynamic(u128), + IDynamic(i128), + BitFlag(bool), + String(String), + Optional(Option>), + Object(BTreeMap), +} + +#[derive(Debug)] +pub enum OptionalCondition { + IfDataAvailable, + ByBitFlag(usize), +} + +#[derive(Debug)] +pub enum LengthDetermination { + AvailableDataLength, + NumberWithKey(usize), + Fixed(usize), +} + +// bytes read, value, new bit start +#[derive(Debug)] +pub struct ParseResult(usize, ValueDataType, usize); + +// bytes written, bit start +#[derive(Debug)] +pub struct SerializeResult(usize, usize); + +#[derive(Debug)] +pub enum ParseError { + // required, actual + NotEnoughBytes(usize, usize), + InvalidBitStart, + NoContextAvailable, + MissingContext(usize), + InvalidContext(usize), + ParsingFailed, +} + +#[derive(Debug)] +pub enum SerializerError { + InvalidBitStart, + InvalidValue +} + +#[test] +fn test() { + let data = ValueDataType::Array(vec![ + ValueDataType::U4(0), + ValueDataType::I4(-8), + ValueDataType::U4(15), + ValueDataType::I4(7), + ValueDataType::U8(255), + ValueDataType::I8(-127) + ]); + + let mut buff = Vec::new(); + let res = data.serialize(&mut buff, 0); + + println!("{:?}", res); + println!("{:?}", buff); + println!("-----"); + println!("{:0>8b}", buff[0]); + + println!("{:0>8b}", -1i8); + println!("{:0>8b}", 128 as u8); + + + let schema = SchemaDataType::Object(BTreeMap::from([ + (1, SchemaDataType::U4), + (2, SchemaDataType::I4), + (3, SchemaDataType::U4), + (4, SchemaDataType::I4), + (5, SchemaDataType::U8), + (6, SchemaDataType::I8) + ])); + println!("{:?}", schema); + + let res = schema.parse(&buff, 0, None); + println!("{:?}", res); + todo!(); + + // let schema = SchemaDataType::Object(BTreeMap::from([ + // (1, SchemaDataType::U4), // 4 + // (2, SchemaDataType::BitFlag), //5 + // (3, SchemaDataType::BitFlag), //6 + // (4, SchemaDataType::BitFlag), //7 + // (5, SchemaDataType::U4), //5 + + // ])); + + // let data = [0b00000000, 1, 0]; + // let res = schema.parse(&data, 0, None); + // println!("{:?}", res); + + // todo!() + // given bytes and schema should produce filled schema +} \ No newline at end of file diff --git a/src/schema/serializer.rs b/src/schema/serializer.rs new file mode 100644 index 0000000..e9f91e0 --- /dev/null +++ b/src/schema/serializer.rs @@ -0,0 +1,107 @@ +use super::{dyn_int, SerializeResult, SerializerError, ValueDataType}; + +impl ValueDataType { + pub fn serialize(&self, buffer: &mut Vec, bit_start: usize) -> Result { + let mut len: usize = buffer.len(); + match self { + ValueDataType::Array(arr) => { + let mut bit_start = bit_start; + let mut bytes_written = 0; + for item in arr { + let res = item.serialize(buffer, bit_start)?; + bytes_written += res.0; + bit_start = res.1; + } + + Ok(SerializeResult(bytes_written, bit_start)) + }, + ValueDataType::U4(v) => { + let (next_bit_start, bytes_written) = next_bitstart_and_byte_written(bit_start, 4)?; + if *v > 0xF { + return Err(SerializerError::InvalidValue); + } + + if bit_start == 0 { + buffer.push(0); // Add an empty byte to write to + len += 1; + } + + let mask = 0xF << bit_start; + buffer[len - 1] &= !mask; // Clear the bits where we are writing + buffer[len - 1] |= (*v << bit_start) & mask; + + Ok(SerializeResult(bytes_written, next_bit_start)) + }, + ValueDataType::I4(v) => { + let (next_bit_start, bytes_written) = next_bitstart_and_byte_written(bit_start, 4)?; + if *v < -8 || *v > 7 { + return Err(SerializerError::InvalidValue); // Only values -8 to 7 are valid for 4 bits + } + + if bit_start == 0 { + buffer.push(0); // Add an empty byte to write to + len += 1; + } + + // Write the value to the correct position + let mask = 0xF << bit_start; + buffer[len - 1] &= !mask; // Clear the bits where we are writing + buffer[len - 1] |= ((*v as u8) << bit_start) & mask; + + Ok(SerializeResult(bytes_written, next_bit_start)) + }, + ValueDataType::U8(v) => { + buffer.push(*v); + Ok(SerializeResult(1, bit_start)) + }, + ValueDataType::I8(v) => { + buffer.push(*v as u8); + Ok(SerializeResult(1, bit_start)) + }, + ValueDataType::U16(v) => { + buffer.append(&mut v.to_be_bytes().to_vec()); + Ok(SerializeResult(2, bit_start)) + }, + ValueDataType::I16(v) => { + buffer.append(&mut v.to_be_bytes().to_vec()); + Ok(SerializeResult(2, bit_start)) + }, + ValueDataType::U32(v) => { + buffer.append(&mut v.to_be_bytes().to_vec()); + Ok(SerializeResult(4, bit_start)) + }, + ValueDataType::I32(v) => { + buffer.append(&mut v.to_be_bytes().to_vec()); + Ok(SerializeResult(4, bit_start)) + }, + ValueDataType::UDynamic(v) => { + let mut encoded = dyn_int::encode(*v); + buffer.append(&mut encoded); + Ok(SerializeResult(encoded.len(), bit_start)) + }, + ValueDataType::IDynamic(v) => { + let mut encoded = dyn_int::encode(*v as u128); + buffer.append(&mut encoded); + Ok(SerializeResult(encoded.len(), bit_start)) + }, + ValueDataType::BitFlag(_) => todo!(), + ValueDataType::String(_) => todo!(), + ValueDataType::Optional(value_data_type) => todo!(), + ValueDataType::Object(btree_map) => todo!(), + } + } +} + +fn next_bitstart_and_byte_written( + bit_start: usize, + bits_written: usize, +) -> Result<(usize, usize), SerializerError> { + let new = bit_start + bits_written; + if new > 8 { + Err(SerializerError::InvalidBitStart) + } else if new < 8 { + Ok((new, 0)) + } else { + Ok((0, 1)) + } +} \ No newline at end of file