Skip to content

Commit

Permalink
feat: change node encoding
Browse files Browse the repository at this point in the history
  • Loading branch information
Nuhvi committed Dec 29, 2023
1 parent 8ac68bd commit 82a5091
Show file tree
Hide file tree
Showing 4 changed files with 127 additions and 51 deletions.
161 changes: 118 additions & 43 deletions mast/src/node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,6 @@ use redb::{ReadableTable, Table};

use crate::{Hash, Hasher, HASH_LEN};

// TODO: room for improvement (pending actual benchmarks to justify):
// - cache encoding

// TODO: remove unwrap
// TODO: KeyType and ValueType

Expand Down Expand Up @@ -182,20 +179,57 @@ impl Node {
self
}

/// Encodes the node in a canonical way:
/// - 1 byte header
/// - 0b1100_0000: Two reserved bits
/// - 0b0011_0000: Two bits represents the size of the key length (0, u8, u16, u32)
/// - 0b0000_1100: Two bits represents the size of the value length (0, u8, u16, u32)
/// - 0b0000_0010: left child is present
/// - 0b0000_0001: right child is present
/// - key
/// - value
fn canonical_encode(&self) -> Vec<u8> {
let mut bytes = vec![];
let key_length = self.key.len();
let val_length = self.value.len();

let key_length_encoding_length = len_encoding_length(key_length);
let val_length_encoding_length = len_encoding_length(val_length);

let header = 0_u8
| (key_length_encoding_length << 4)
| (val_length_encoding_length << 2)
| ((self.left.is_some() as u8) << 1)
| (self.right.is_some() as u8);

let mut bytes = vec![header];

// Encode key length
match key_length_encoding_length {
1 => bytes.push(key_length as u8),
2 => bytes.extend_from_slice(&(key_length as u16).to_be_bytes()),
3 => bytes.extend_from_slice(&(key_length as u32).to_be_bytes()),
_ => {} // Do nothing for 0 length
}

encode(&self.key, &mut bytes);
encode(&self.value, &mut bytes);
// Encode value length
match val_length_encoding_length {
1 => bytes.push(val_length as u8),
2 => bytes.extend_from_slice(&(val_length as u16).to_be_bytes()),
3 => bytes.extend_from_slice(&(val_length as u32).to_be_bytes()),
_ => {} // Do nothing for 0 length
}

let left = &self.left.map(|h| h.as_bytes().to_vec()).unwrap_or_default();
let right = &self
.right
.map(|h| h.as_bytes().to_vec())
.unwrap_or_default();
bytes.extend_from_slice(&self.key);
bytes.extend_from_slice(&self.value);

encode(left, &mut bytes);
encode(right, &mut bytes);
if let Some(left) = &self.left {
bytes[0] |= 0b0000_0010;
bytes.extend_from_slice(left.as_bytes());
}
if let Some(right) = &self.right {
bytes[0] |= 0b0000_0001;
bytes.extend_from_slice(right.as_bytes());
}

bytes
}
Expand All @@ -208,18 +242,7 @@ fn hash(bytes: &[u8]) -> Hash {
hasher.finalize()
}

fn encode(bytes: &[u8], out: &mut Vec<u8>) {
// TODO: find a better way to reserve bytes.
let current_len = out.len();
for _ in 0..varu64::encoding_length(bytes.len() as u64) {
out.push(0)
}
varu64::encode(bytes.len() as u64, &mut out[current_len..]);

out.extend_from_slice(bytes);
}

fn decode(bytes: &[u8]) -> (&[u8], &[u8]) {
fn varu64_decode(bytes: &[u8]) -> (&[u8], &[u8]) {
let (len, remaining) = varu64::decode(bytes).unwrap();
let value = &remaining[..len as usize];
let rest = &remaining[value.len()..];
Expand All @@ -230,30 +253,70 @@ fn decode(bytes: &[u8]) -> (&[u8], &[u8]) {
fn decode_node(data: (u64, &[u8])) -> Node {
let (ref_count, encoded_node) = data;

let (key, rest) = decode(encoded_node);
let (value, rest) = decode(rest);
// We can calculate the size of then node from the first few bytes.
let header = encoded_node[0];

let mut rest = &encoded_node[1..];

let key_length = match (header & 0b0011_0000) >> 4 {
1 => {
let len = rest[0] as usize;
rest = &rest[1..];
len
}
2 => {
let len = u16::from_be_bytes(rest[0..3].try_into().unwrap()) as usize;
rest = &rest[3..];
len
}
3 => {
let len = u32::from_be_bytes(rest[0..4].try_into().unwrap()) as usize;
rest = &rest[4..];
len
}
_ => 0,
};

let (left, rest) = decode(rest);
let left = match left.len() {
0 => None,
32 => {
let bytes: [u8; HASH_LEN] = left.try_into().unwrap();
Some(Hash::from_bytes(bytes))
let val_length = match (header & 0b0000_1100) >> 2 {
1 => {
let len = rest[0] as usize;
rest = &rest[1..];
len
}
_ => {
panic!("invalid hash length!")
2 => {
let len = u16::from_be_bytes(rest[0..3].try_into().unwrap()) as usize;
rest = &rest[3..];
len
}
3 => {
let len = u32::from_be_bytes(rest[0..4].try_into().unwrap()) as usize;
rest = &rest[4..];
len
}
_ => 0,
};

let (right, _) = decode(rest);
let right = match right.len() {
0 => None,
32 => {
let bytes: [u8; HASH_LEN] = right.try_into().unwrap();
Some(Hash::from_bytes(bytes))
let key = &rest[..key_length];
rest = &rest[key_length..];

let value = &rest[..val_length];
rest = &rest[val_length..];

let left = match header & 0b0000_0010 == 0 {
true => None,
false => {
let hash_bytes: [u8; HASH_LEN] = rest[0..32].try_into().unwrap();
rest = &rest[32..];

Some(Hash::from_bytes(hash_bytes))
}
_ => {
panic!("invalid hash length!")
};

let right = match header & 0b0000_0001 == 0 {
true => None,
false => {
let hash_bytes: [u8; HASH_LEN] = rest[0..32].try_into().unwrap();
Some(Hash::from_bytes(hash_bytes))
}
};

Expand All @@ -269,3 +332,15 @@ fn decode_node(data: (u64, &[u8])) -> Node {
hash: None,
}
}

fn len_encoding_length(len: usize) -> u8 {
if len == 0 {
0
} else if len <= u8::max_value() as usize {
1
} else if len <= u16::max_value() as usize {
2
} else {
3
}
}
14 changes: 7 additions & 7 deletions mast/src/operations/insert.rs
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ mod test {

test_operations(
&case.map(|key| Entry::insert(key.as_bytes(), &[b"v", key.as_bytes()].concat())),
Some("78fd7507ef338f1a5816ffd702394999680a9694a85f4b8af77795d9fdd5854d"),
Some("9fbdb0a2023f8029871b44722b2091a45b8209eaa5ce912740959fc00c611b91"),
)
}

Expand All @@ -233,7 +233,7 @@ mod test {

test_operations(
&case.map(|key| Entry::insert(key.as_bytes(), &[b"v", key.as_bytes()].concat())),
Some("02af3de6ed6368c5abc16f231a17d1140e7bfec483c8d0aa63af4ef744d29bc3"),
Some("26820b21fec1451a2478808bb8bc3ade05dcfbcd50d9556cca77d12d6239f4a7"),
);
}

Expand All @@ -247,7 +247,7 @@ mod test {

test_operations(
&case.map(|key| Entry::insert(key.as_bytes(), &[b"v", key.as_bytes()].concat())),
Some("02af3de6ed6368c5abc16f231a17d1140e7bfec483c8d0aa63af4ef744d29bc3"),
Some("26820b21fec1451a2478808bb8bc3ade05dcfbcd50d9556cca77d12d6239f4a7"),
)
}

Expand All @@ -257,7 +257,7 @@ mod test {

test_operations(
&case.map(|key| Entry::insert(key.as_bytes(), &[b"v", key.as_bytes()].concat())),
Some("0957cc9b87c11cef6d88a95328cfd9043a3d6a99e9ba35ee5c9c47e53fb6d42b"),
Some("96c3cff677fb331fe2901a6b5297395f089a38af9ab4ad310d362f557d60fca5"),
)
}

Expand All @@ -272,7 +272,7 @@ mod test {
i += 1;
Entry::insert(key.as_bytes(), i.to_string().as_bytes())
}),
Some("4538b4de5e58f9be9d54541e69fab8c94c31553a1dec579227ef9b572d1c1dff"),
Some("69e8b408d10174feb9d9befd0a3de95767cc0e342d0dba5f51139f4b49588fb7"),
)
}

Expand All @@ -288,7 +288,7 @@ mod test {
i += 1;
Entry::insert(key.as_bytes(), i.to_string().as_bytes())
}),
Some("c9f7aaefb18ec8569322b9621fc64f430a7389a790e0bf69ec0ad02879d6ce54"),
Some("9e73a80068adf0fb31382eb35d489aa9b50f91a3ad8e55523d5cca6d6247b15b"),
)
}

Expand All @@ -304,7 +304,7 @@ mod test {
i += 1;
Entry::insert(key.as_bytes(), i.to_string().as_bytes())
}),
Some("02e26311f2b55bf6d4a7163399f99e17c975891a05af2f1e09bc969f8bf0f95d"),
Some("8c3cb6bb83df437b73183692e4b1b3809afd6974aec49d67b1ce3266e909cb67"),
)
}
}
2 changes: 1 addition & 1 deletion mast/src/operations/remove.rs
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ mod test {

test_operations(
&case,
Some("02af3de6ed6368c5abc16f231a17d1140e7bfec483c8d0aa63af4ef744d29bc3"),
Some("26820b21fec1451a2478808bb8bc3ade05dcfbcd50d9556cca77d12d6239f4a7"),
);
}
}
1 change: 1 addition & 0 deletions mast/src/treap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ impl<'treap> HashTreap<'treap> {

pub fn insert(&mut self, key: &[u8], value: &[u8]) {
// TODO: validate key and value length.
// key and value mast be less than 2^32 bytes.

let write_txn = self.db.begin_write().unwrap();

Expand Down

0 comments on commit 82a5091

Please sign in to comment.