From 396242bc75a914826782ad240a6b0a4b7d33d507 Mon Sep 17 00:00:00 2001 From: nazeh Date: Sat, 23 Dec 2023 23:20:52 +0300 Subject: [PATCH] feat: remove working --- mast/src/operations/insert.rs | 248 ++++++++++++++++----------------- mast/src/operations/remove.rs | 255 ++++++++++++++++++++-------------- mast/src/test.rs | 2 +- mast/src/treap.rs | 10 +- 4 files changed, 280 insertions(+), 235 deletions(-) diff --git a/mast/src/operations/insert.rs b/mast/src/operations/insert.rs index f3047c3..cf012b6 100644 --- a/mast/src/operations/insert.rs +++ b/mast/src/operations/insert.rs @@ -183,128 +183,128 @@ pub(crate) fn insert( #[cfg(test)] mod test { - // use crate::test::{test_operations, Entry}; - // use proptest::prelude::*; - // - // proptest! { - // #[test] - // /// Test that upserting an entry with the same key in different tree shapes results in the - // /// expected structure - // fn test_upsert(random_entries in prop::collection::vec( - // (prop::collection::vec(any::(), 1), prop::collection::vec(any::(), 1)), - // 1..10, - // )) { - // let operations = random_entries.into_iter().map(|(key, value)| { - // Entry::insert(&key, &value) - // }).collect::>(); - // - // test_operations(&operations, None); - // } - // - // #[test] - // fn test_general_insertiong(random_entries in prop::collection::vec( - // (prop::collection::vec(any::(), 32), prop::collection::vec(any::(), 32)), - // 1..50, - // )) { - // let operations = random_entries.into_iter().map(|(key, value)| { - // Entry::insert(&key, &value) - // }).collect::>(); - // - // test_operations(&operations, None); - // } - // } - // - // #[test] - // fn insert_single_entry() { - // let case = ["A"]; - // - // test_operations( - // &case.map(|key| Entry::insert(key.as_bytes(), &[b"v", key.as_bytes()].concat())), - // Some("78fd7507ef338f1a5816ffd702394999680a9694a85f4b8af77795d9fdd5854d"), - // ) - // } - // - // #[test] - // fn sorted_alphabets() { - // let case = [ - // "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", - // "R", "S", "T", "U", "V", "W", "X", "Y", "Z", - // ]; - // - // test_operations( - // &case.map(|key| Entry::insert(key.as_bytes(), &[b"v", key.as_bytes()].concat())), - // Some("02af3de6ed6368c5abc16f231a17d1140e7bfec483c8d0aa63af4ef744d29bc3"), - // ); - // } - // - // #[test] - // fn reverse_alphabets() { - // let mut case = [ - // "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", - // "R", "S", "T", "U", "V", "W", "X", "Y", "Z", - // ]; - // case.reverse(); - // - // test_operations( - // &case.map(|key| Entry::insert(key.as_bytes(), &[b"v", key.as_bytes()].concat())), - // Some("02af3de6ed6368c5abc16f231a17d1140e7bfec483c8d0aa63af4ef744d29bc3"), - // ) - // } - // - // #[test] - // fn unsorted() { - // let case = ["D", "N", "P", "X", "A", "G", "C", "M", "H", "I", "J"]; - // - // test_operations( - // &case.map(|key| Entry::insert(key.as_bytes(), &[b"v", key.as_bytes()].concat())), - // Some("0957cc9b87c11cef6d88a95328cfd9043a3d6a99e9ba35ee5c9c47e53fb6d42b"), - // ) - // } - // - // #[test] - // fn upsert_at_root() { - // let case = ["X", "X"]; - // - // let mut i = 0; - // - // test_operations( - // &case.map(|key| { - // i += 1; - // Entry::insert(key.as_bytes(), i.to_string().as_bytes()) - // }), - // Some("4538b4de5e58f9be9d54541e69fab8c94c31553a1dec579227ef9b572d1c1dff"), - // ) - // } - // - // #[test] - // fn upsert_deeper() { - // // X has higher rank. - // let case = ["X", "F", "F"]; - // - // let mut i = 0; - // - // test_operations( - // &case.map(|key| { - // i += 1; - // Entry::insert(key.as_bytes(), i.to_string().as_bytes()) - // }), - // Some("c9f7aaefb18ec8569322b9621fc64f430a7389a790e0bf69ec0ad02879d6ce54"), - // ) - // } - // - // #[test] - // fn upsert_root_with_children() { - // // X has higher rank. - // let case = ["F", "X", "X"]; - // - // let mut i = 0; - // - // test_operations( - // &case.map(|key| { - // i += 1; - // Entry::insert(key.as_bytes(), i.to_string().as_bytes()) - // }), - // Some("02e26311f2b55bf6d4a7163399f99e17c975891a05af2f1e09bc969f8bf0f95d"), - // ) - // } + use crate::test::{test_operations, Entry}; + use proptest::prelude::*; + + proptest! { + #[test] + /// Test that upserting an entry with the same key in different tree shapes results in the + /// expected structure + fn test_upsert(random_entries in prop::collection::vec( + (prop::collection::vec(any::(), 1), prop::collection::vec(any::(), 1)), + 1..10, + )) { + let operations = random_entries.into_iter().map(|(key, value)| { + Entry::insert(&key, &value) + }).collect::>(); + + test_operations(&operations, None); + } + + #[test] + fn test_general_insertiong(random_entries in prop::collection::vec( + (prop::collection::vec(any::(), 32), prop::collection::vec(any::(), 32)), + 1..50, + )) { + let operations = random_entries.into_iter().map(|(key, value)| { + Entry::insert(&key, &value) + }).collect::>(); + + test_operations(&operations, None); + } + } + + #[test] + fn insert_single_entry() { + let case = ["A"]; + + test_operations( + &case.map(|key| Entry::insert(key.as_bytes(), &[b"v", key.as_bytes()].concat())), + Some("78fd7507ef338f1a5816ffd702394999680a9694a85f4b8af77795d9fdd5854d"), + ) + } + + #[test] + fn sorted_alphabets() { + let case = [ + "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", + "R", "S", "T", "U", "V", "W", "X", "Y", "Z", + ]; + + test_operations( + &case.map(|key| Entry::insert(key.as_bytes(), &[b"v", key.as_bytes()].concat())), + Some("02af3de6ed6368c5abc16f231a17d1140e7bfec483c8d0aa63af4ef744d29bc3"), + ); + } + + #[test] + fn reverse_alphabets() { + let mut case = [ + "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", + "R", "S", "T", "U", "V", "W", "X", "Y", "Z", + ]; + case.reverse(); + + test_operations( + &case.map(|key| Entry::insert(key.as_bytes(), &[b"v", key.as_bytes()].concat())), + Some("02af3de6ed6368c5abc16f231a17d1140e7bfec483c8d0aa63af4ef744d29bc3"), + ) + } + + #[test] + fn unsorted() { + let case = ["D", "N", "P", "X", "A", "G", "C", "M", "H", "I", "J"]; + + test_operations( + &case.map(|key| Entry::insert(key.as_bytes(), &[b"v", key.as_bytes()].concat())), + Some("0957cc9b87c11cef6d88a95328cfd9043a3d6a99e9ba35ee5c9c47e53fb6d42b"), + ) + } + + #[test] + fn upsert_at_root() { + let case = ["X", "X"]; + + let mut i = 0; + + test_operations( + &case.map(|key| { + i += 1; + Entry::insert(key.as_bytes(), i.to_string().as_bytes()) + }), + Some("4538b4de5e58f9be9d54541e69fab8c94c31553a1dec579227ef9b572d1c1dff"), + ) + } + + #[test] + fn upsert_deeper() { + // X has higher rank. + let case = ["X", "F", "F"]; + + let mut i = 0; + + test_operations( + &case.map(|key| { + i += 1; + Entry::insert(key.as_bytes(), i.to_string().as_bytes()) + }), + Some("c9f7aaefb18ec8569322b9621fc64f430a7389a790e0bf69ec0ad02879d6ce54"), + ) + } + + #[test] + fn upsert_root_with_children() { + // X has higher rank. + let case = ["F", "X", "X"]; + + let mut i = 0; + + test_operations( + &case.map(|key| { + i += 1; + Entry::insert(key.as_bytes(), i.to_string().as_bytes()) + }), + Some("02e26311f2b55bf6d4a7163399f99e17c975891a05af2f1e09bc969f8bf0f95d"), + ) + } } diff --git a/mast/src/operations/remove.rs b/mast/src/operations/remove.rs index d5e69df..b4020a4 100644 --- a/mast/src/operations/remove.rs +++ b/mast/src/operations/remove.rs @@ -4,124 +4,150 @@ use redb::Table; use super::search::binary_search_path; use crate::node::{hash, Branch, Node}; +/// Removes the target node if it exists, and returns the new root and the removed node. pub(crate) fn remove<'a>( nodes_table: &'_ mut Table<&'static [u8], (u64, &'static [u8])>, root: Option, key: &[u8], -) -> Option { +) -> (Option, Option) { let mut path = binary_search_path(nodes_table, root, key); - // The key doesn't exist, so there is nothing to remove. - let mut root = path.upper.first().map(|(n, _)| n.clone()); - - dbg!(&path); - if let Some(mut target) = path.target { - // Zipping + let mut root = None; - target.decrement_ref_count(); - target.save(nodes_table); + if let Some(mut target) = path.target.clone() { + root = zip(nodes_table, &mut target) + } else { + // clearly the lower path has the highest node, and it won't be changed. + root = path.lower.first().map(|(n, _)| n.clone()); + } - let mut left_subtree = Vec::new(); - let mut right_subtree = Vec::new(); + // If there is an upper path, we propagate the hash updates upwards. + while let Some((mut node, branch)) = path.upper.pop() { + node.decrement_ref_count().save(nodes_table); - target - .left() - .and_then(|h| Node::open(nodes_table, h)) - .map(|n| left_subtree.push(n)); + match branch { + Branch::Left => node.set_left_child(root.map(|n| n.hash())), + Branch::Right => node.set_right_child(root.map(|n| n.hash())), + }; - while let Some(next) = left_subtree - .last() - .and_then(|n| n.right().and_then(|h| Node::open(nodes_table, h))) - { - left_subtree.push(next); - } + node.increment_ref_count().save(nodes_table); - target - .right() - .and_then(|h| Node::open(nodes_table, h)) - .map(|n| right_subtree.push(n)); + root = Some(node); + } - while let Some(next) = right_subtree - .last() - .and_then(|n| n.left().and_then(|h| Node::open(nodes_table, h))) - { - right_subtree.push(next); - } + return (root, path.target); +} - let mut i = left_subtree.len().max(right_subtree.len()); - let mut last: Option = None; - - while i > 0 { - last = match (left_subtree.get_mut(i - 1), right_subtree.get_mut(i - 1)) { - (Some(left), None) => Some(left.clone()), // Left subtree is deeper - (None, Some(right)) => Some(right.clone()), // Right subtree is deeper - (Some(left), Some(right)) => { - let rank_left = hash(left.key()); - let rank_right = hash(right.key()); - - if hash(left.key()).as_bytes() > hash(right.key()).as_bytes() { - right - // decrement old version - .decrement_ref_count() - .save(nodes_table) - // save new version - .set_left_child(last.map(|n| n.hash())) - .increment_ref_count() - .save(nodes_table); - - left - // decrement old version - .decrement_ref_count() - .save(nodes_table) - // save new version - .set_right_child(Some(right.hash())) - .increment_ref_count() - .save(nodes_table); - - Some(left.clone()) - } else { - left - // decrement old version - .decrement_ref_count() - .save(nodes_table) - // save new version - .set_right_child(last.map(|n| n.hash())) - .increment_ref_count() - .save(nodes_table); - - right - // decrement old version - .decrement_ref_count() - .save(nodes_table) - // save new version - .set_left_child(Some(left.hash())) - .increment_ref_count() - .save(nodes_table); - - Some(right.clone()) - } - } - _ => { - // Should never happen! - None - } - }; - - i -= 1; - } +fn zip( + nodes_table: &'_ mut Table<&'static [u8], (u64, &'static [u8])>, + target: &mut Node, +) -> Option { + target.decrement_ref_count(); + target.save(nodes_table); + + let mut left_subtree = Vec::new(); + let mut right_subtree = Vec::new(); + + target + .left() + .and_then(|h| Node::open(nodes_table, h)) + .map(|n| left_subtree.push(n)); + target + .right() + .and_then(|h| Node::open(nodes_table, h)) + .map(|n| right_subtree.push(n)); + + while let Some(next) = left_subtree + .last() + .and_then(|n| n.right().and_then(|h| Node::open(nodes_table, h))) + { + left_subtree.push(next); + } - // dbg!(&last); - return last; - } else { - // clearly the lower path has the highest node, and it won't be changed. - return path.lower.first().map(|(n, _)| n.clone()); + while let Some(next) = right_subtree + .last() + .and_then(|n| n.left().and_then(|h| Node::open(nodes_table, h))) + { + right_subtree.push(next); } - if root.is_none() { - root = path.lower.first().map(|(n, _)| n.clone()); + let mut i = left_subtree.len().max(right_subtree.len()); + let mut previous: Option = None; + + while i > 0 { + previous = zip_up( + nodes_table, + previous, + left_subtree.get_mut(i - 1), + right_subtree.get_mut(i - 1), + ); + + i -= 1; } - return root; + previous +} + +fn zip_up( + nodes_table: &'_ mut Table<&'static [u8], (u64, &'static [u8])>, + previous: Option, + left: Option<&mut Node>, + right: Option<&mut Node>, +) -> Option { + match (left, right) { + (Some(left), None) => Some(left.clone()), // Left subtree is deeper + (None, Some(right)) => Some(right.clone()), // Right subtree is deeper + (Some(left), Some(right)) => { + let rank_left = hash(left.key()); + let rank_right = hash(right.key()); + + if hash(left.key()).as_bytes() > hash(right.key()).as_bytes() { + right + // decrement old version + .decrement_ref_count() + .save(nodes_table) + // save new version + .set_left_child(previous.map(|n| n.hash())) + .increment_ref_count() + .save(nodes_table); + + left + // decrement old version + .decrement_ref_count() + .save(nodes_table) + // save new version + .set_right_child(Some(right.hash())) + .increment_ref_count() + .save(nodes_table); + + Some(left.clone()) + } else { + left + // decrement old version + .decrement_ref_count() + .save(nodes_table) + // save new version + .set_right_child(previous.map(|n| n.hash())) + .increment_ref_count() + .save(nodes_table); + + right + // decrement old version + .decrement_ref_count() + .save(nodes_table) + // save new version + .set_left_child(Some(left.hash())) + .increment_ref_count() + .save(nodes_table); + + Some(right.clone()) + } + } + _ => { + // Should never happen! + None + } + } } #[cfg(test)] @@ -138,7 +164,7 @@ mod test { } proptest! { - // #[test] + #[test] fn insert_remove( random_entries in prop::collection::vec( (prop::collection::vec(any::(), 1), prop::collection::vec(any::(), 1), operation_strategy()), @@ -153,15 +179,12 @@ mod test { } } - // #[test] - fn empty() { + #[test] + fn becomes_empty() { let case = [("A", Operation::Insert), ("A", Operation::Remove)] .map(|(k, op)| (Entry::new(k.as_bytes(), k.as_bytes()), op)); - test_operations( - &case, - Some("78fd7507ef338f1a5816ffd702394999680a9694a85f4b8af77795d9fdd5854d"), - ) + test_operations(&case, None) } #[test] @@ -188,4 +211,22 @@ mod test { test_operations(&case, None) } + + #[test] + fn alphabet_after_remove() { + let mut case = [ + "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", + "R", "S", "T", "U", "V", "W", "X", "Y", "Z", + ] + .map(|key| Entry::insert(key.as_bytes(), &[b"v", key.as_bytes()].concat())) + .to_vec(); + + case.push(Entry::insert(&[0], &[0])); + case.push(Entry::remove(&[0])); + + test_operations( + &case, + Some("02af3de6ed6368c5abc16f231a17d1140e7bfec483c8d0aa63af4ef744d29bc3"), + ); + } } diff --git a/mast/src/test.rs b/mast/src/test.rs index 10b704f..a4caee5 100644 --- a/mast/src/test.rs +++ b/mast/src/test.rs @@ -73,7 +73,7 @@ pub fn test_operations(input: &[(Entry, Operation)], root_hash: Option<&str>) { } // Uncomment to see the graph - println!("{}", into_mermaid_graph(&treap)); + // println!("{}", into_mermaid_graph(&treap)); let collected = treap .iter() diff --git a/mast/src/treap.rs b/mast/src/treap.rs index a5f272b..3a33c88 100644 --- a/mast/src/treap.rs +++ b/mast/src/treap.rs @@ -79,9 +79,11 @@ impl<'treap> HashTreap<'treap> { write_txn.commit().unwrap(); } - pub fn remove(&mut self, key: &[u8]) -> Option<&[u8]> { + pub fn remove(&mut self, key: &[u8]) -> Option> { let write_txn = self.db.begin_write().unwrap(); + let mut removed_node; + { let mut roots_table = write_txn.open_table(ROOTS_TABLE).unwrap(); let mut nodes_table = write_txn.open_table(NODES_TABLE).unwrap(); @@ -90,7 +92,9 @@ impl<'treap> HashTreap<'treap> { .root_hash_inner(&roots_table) .and_then(|hash| Node::open(&nodes_table, hash)); - let new_root = crate::operations::remove(&mut nodes_table, old_root, key); + let (new_root, old_node) = crate::operations::remove(&mut nodes_table, old_root, key); + + removed_node = old_node; if let Some(new_root) = new_root { roots_table @@ -104,7 +108,7 @@ impl<'treap> HashTreap<'treap> { // Finally commit the changes to the storage. write_txn.commit().unwrap(); - None + removed_node.map(|node| node.value().to_vec().into_boxed_slice()) } pub fn iter(&self) -> TreapIterator<'_> {