diff --git a/lib/cretonne/src/bforest/map.rs b/lib/cretonne/src/bforest/map.rs new file mode 100644 index 0000000000..43855c4760 --- /dev/null +++ b/lib/cretonne/src/bforest/map.rs @@ -0,0 +1,753 @@ +//! Forest of maps. + +use packed_option::PackedOption; +use std::marker::PhantomData; +use super::{INNER_SIZE, BPlusComparator, Forest, NodePool, Node, NodeData, Path}; + +/// Tag type defining forest types for a map. +struct MapTypes(PhantomData<(K, V, C)>); + +impl Forest for MapTypes +where + K: Copy, + V: Copy, + C: BPlusComparator, +{ + type Key = K; + type Value = V; + type LeafKeys = [K; INNER_SIZE - 1]; + type LeafValues = [V; INNER_SIZE - 1]; + type Comparator = C; + + fn splat_key(key: Self::Key) -> Self::LeafKeys { + [key; INNER_SIZE - 1] + } + + fn splat_value(value: Self::Value) -> Self::LeafValues { + [value; INNER_SIZE - 1] + } +} + +/// Memory pool for a forest of `BPlusMap` instances. +pub struct MapForest +where + K: Copy, + V: Copy, + C: BPlusComparator, +{ + nodes: NodePool>, +} + +impl MapForest +where + K: Copy, + V: Copy, + C: BPlusComparator, +{ + /// Create a new empty forest. + pub fn new() -> MapForest { + MapForest { nodes: NodePool::new() } + } + + /// Clear all maps in the forest. + /// + /// All `BPlusMap` instances belong to this forest are invalidated and should no longer be used. + pub fn clear(&mut self) { + self.nodes.clear(); + } +} + +/// B-tree mapping from `K` to `V` using `C` for comparing keys. +/// +/// This is not a general-purpose replacement for `BTreeMap`. See the [module +/// documentation](index.html) for more information about design tradeoffs. +pub struct BPlusMap +where + K: Copy, + V: Copy, + C: BPlusComparator, +{ + root: PackedOption, + unused: PhantomData<(K, V, C)>, +} + +impl BPlusMap +where + K: Copy, + V: Copy, + C: BPlusComparator, +{ + /// Make an empty map. + pub fn new() -> BPlusMap { + BPlusMap { + root: None.into(), + unused: PhantomData, + } + } + + /// Is this an empty map? + pub fn is_empty(&self) -> bool { + self.root.is_none() + } + + /// Get the value stored for `key`. + pub fn get(&self, key: K, forest: &MapForest, comp: &C) -> Option { + self.root.expand().and_then(|root| { + Path::default().find(key, root, &forest.nodes, comp) + }) + } + + /// Insert `key, value` into the map and return the old value stored for `key`, if any. + pub fn insert( + &mut self, + key: K, + value: V, + forest: &mut MapForest, + comp: &C, + ) -> Option { + self.cursor(forest, comp).insert(key, value) + } + + /// Remove `key` from the map and return the removed value for `key`, if any. + pub fn remove(&mut self, key: K, forest: &mut MapForest, comp: &C) -> Option { + let mut c = self.cursor(forest, comp); + if c.goto(key).is_some() { + c.remove() + } else { + None + } + } + + /// Create a cursor for navigating this map. The cursor is initially positioned off the end of + /// the map. + pub fn cursor<'a>( + &'a mut self, + forest: &'a mut MapForest, + comp: &'a C, + ) -> MapCursor<'a, K, V, C> { + MapCursor::new(self, forest, comp) + } +} + +#[cfg(test)] +impl BPlusMap +where + K: Copy + ::std::fmt::Display, + V: Copy, + C: BPlusComparator, +{ + /// Verify consistency. + fn verify(&self, forest: &MapForest, comp: &C) + where + NodeData>: ::std::fmt::Display, + { + if let Some(root) = self.root.expand() { + forest.nodes.verify_tree(root, comp); + } + } + + /// Get a text version of the path to `key`. + fn tpath(&self, key: K, forest: &MapForest, comp: &C) -> String { + match self.root.expand() { + None => "map(empty)".to_string(), + Some(root) => { + let mut path = Path::default(); + path.find(key, root, &forest.nodes, comp); + path.to_string() + } + } + } +} + +/// A position in a `BPlusMap` used to navigate and modify the ordered map. +/// +/// A cursor always points at a key-value pair in the map, or "off the end" which is a position +/// after the last entry in the map. +pub struct MapCursor<'a, K, V, C> +where + K: 'a + Copy, + V: 'a + Copy, + C: 'a + BPlusComparator, +{ + root: &'a mut PackedOption, + pool: &'a mut NodePool>, + comp: &'a C, + path: Path>, +} + +impl<'a, K, V, C> MapCursor<'a, K, V, C> +where + K: Copy, + V: Copy, + C: BPlusComparator, +{ + /// Create a cursor with a default (off-the-end) location. + fn new( + container: &'a mut BPlusMap, + forest: &'a mut MapForest, + comp: &'a C, + ) -> MapCursor<'a, K, V, C> { + MapCursor { + root: &mut container.root, + pool: &mut forest.nodes, + comp, + path: Path::default(), + } + } + + /// Is this cursor pointing to an empty map? + pub fn is_empty(&self) -> bool { + self.root.is_none() + } + + /// Move cursor to the next key-value pair and return it. + /// + /// If the cursor reaches the end, return `None` and leave the cursor at the off-the-end + /// position. + pub fn next(&mut self) -> Option<(K, V)> { + self.path.next(self.pool) + } + + /// Move cursor to the previous key-value pair and return it. + /// + /// If the cursor is already pointing at the first entry, leave it there and return `None`. + pub fn prev(&mut self) -> Option<(K, V)> { + self.root.expand().and_then( + |root| self.path.prev(root, self.pool), + ) + } + + /// Get the current key, or `None` if the cursor is at the end. + pub fn key(&self) -> Option { + self.path.leaf_pos().and_then(|(node, entry)| { + self.pool[node].unwrap_leaf().0.get(entry).cloned() + }) + } + + /// Get the current value, or `None` if the cursor is at the end. + pub fn value(&self) -> Option { + self.path.leaf_pos().and_then(|(node, entry)| { + self.pool[node].unwrap_leaf().1.get(entry).cloned() + }) + } + + /// Move this cursor to `key`. + /// + /// If `key` is in the map, place the cursor at `key` and return the corresponding value. + /// + /// If `key` is not in the set, place the cursor at the next larger element (or the end) and + /// return `None`. + pub fn goto(&mut self, elem: K) -> Option { + self.root.expand().and_then(|root| { + let v = self.path.find(elem, root, self.pool, self.comp); + if v.is_none() { + self.path.normalize(self.pool); + } + v + }) + } + + /// Insert `(key, value))` into the map and leave the cursor at the inserted pair. + /// + /// If the map did not contain `key`, return `None`. + /// + /// If `key` is already present, replace the existing with `value` and return the old value. + pub fn insert(&mut self, key: K, value: V) -> Option { + match self.root.expand() { + None => { + let root = self.pool.alloc_node(NodeData::leaf(key, value)); + *self.root = root.into(); + self.path.set_root_node(root); + None + } + Some(root) => { + // TODO: Optimize the case where `self.path` is already at the correct insert pos. + let old = self.path.find(key, root, self.pool, self.comp); + if old.is_some() { + *self.path.value_mut(self.pool) = value; + } else { + *self.root = self.path.insert(key, value, self.pool).into(); + } + old + } + } + } + + /// Remove the current entry (if any) and return the mapped value. + /// This advances the cursor to the next entry after the removed one. + pub fn remove(&mut self) -> Option { + let value = self.value(); + if value.is_some() { + *self.root = self.path.remove(self.pool).into(); + } + value + } +} + +#[cfg(test)] +impl<'a, K, V, C> MapCursor<'a, K, V, C> +where + K: Copy + ::std::fmt::Display, + V: Copy + ::std::fmt::Display, + C: BPlusComparator, +{ + fn verify(&self) { + self.path.verify(self.pool); + self.root.map(|root| self.pool.verify_tree(root, self.comp)); + } + + /// Get a text version of the path to the current position. + fn tpath(&self) -> String { + self.path.to_string() + } +} + +#[cfg(test)] +mod test { + use std::mem; + use super::*; + use super::super::NodeData; + + #[test] + fn node_size() { + // check that nodes are cache line sized when keys and values are 32 bits. + type F = MapTypes; + assert_eq!(mem::size_of::>(), 64); + } + + #[test] + fn empty() { + let mut f = MapForest::::new(); + f.clear(); + + let mut m = BPlusMap::::new(); + assert!(m.is_empty()); + + assert_eq!(m.get(7, &f, &()), None); + + let mut c = m.cursor(&mut f, &()); + assert!(c.is_empty()); + assert_eq!(c.key(), None); + assert_eq!(c.value(), None); + assert_eq!(c.next(), None); + assert_eq!(c.prev(), None); + c.verify(); + assert_eq!(c.tpath(), ""); + } + + #[test] + fn inserting() { + let f = &mut MapForest::::new(); + let mut m = BPlusMap::::new(); + + // The first seven values stay in a single leaf node. + assert_eq!(m.insert(50, 5.0, f, &()), None); + assert_eq!(m.insert(50, 5.5, f, &()), Some(5.0)); + assert_eq!(m.insert(20, 2.0, f, &()), None); + assert_eq!(m.insert(80, 8.0, f, &()), None); + assert_eq!(m.insert(40, 4.0, f, &()), None); + assert_eq!(m.insert(60, 6.0, f, &()), None); + assert_eq!(m.insert(90, 9.0, f, &()), None); + assert_eq!(m.insert(200, 20.0, f, &()), None); + + m.verify(f, &()); + + // [ 20 40 50 60 80 90 200 ] + + assert_eq!(m.get(0, f, &()), None); + assert_eq!(m.get(20, f, &()), Some(2.0)); + assert_eq!(m.get(30, f, &()), None); + assert_eq!(m.get(40, f, &()), Some(4.0)); + assert_eq!(m.get(50, f, &()), Some(5.5)); + assert_eq!(m.get(60, f, &()), Some(6.0)); + assert_eq!(m.get(70, f, &()), None); + assert_eq!(m.get(80, f, &()), Some(8.0)); + assert_eq!(m.get(100, f, &()), None); + + { + let mut c = m.cursor(f, &()); + assert_eq!(c.prev(), Some((200, 20.0))); + assert_eq!(c.prev(), Some((90, 9.0))); + assert_eq!(c.prev(), Some((80, 8.0))); + assert_eq!(c.prev(), Some((60, 6.0))); + assert_eq!(c.prev(), Some((50, 5.5))); + assert_eq!(c.prev(), Some((40, 4.0))); + assert_eq!(c.prev(), Some((20, 2.0))); + assert_eq!(c.prev(), None); + } + + // Test some removals where the node stays healthy. + assert_eq!(m.tpath(50, f, &()), "node0[2]"); + assert_eq!(m.tpath(80, f, &()), "node0[4]"); + assert_eq!(m.tpath(200, f, &()), "node0[6]"); + + assert_eq!(m.remove(80, f, &()), Some(8.0)); + assert_eq!(m.tpath(50, f, &()), "node0[2]"); + assert_eq!(m.tpath(80, f, &()), "node0[4]"); + assert_eq!(m.tpath(200, f, &()), "node0[5]"); + assert_eq!(m.remove(80, f, &()), None); + m.verify(f, &()); + + assert_eq!(m.remove(20, f, &()), Some(2.0)); + assert_eq!(m.tpath(50, f, &()), "node0[1]"); + assert_eq!(m.tpath(80, f, &()), "node0[3]"); + assert_eq!(m.tpath(200, f, &()), "node0[4]"); + assert_eq!(m.remove(20, f, &()), None); + m.verify(f, &()); + + // [ 40 50 60 90 200 ] + + { + let mut c = m.cursor(f, &()); + assert_eq!(c.goto(0), None); + assert_eq!(c.key(), Some(40)); + assert_eq!(c.value(), Some(4.0)); + assert_eq!(c.next(), Some((50, 5.5))); + assert_eq!(c.next(), Some((60, 6.0))); + assert_eq!(c.next(), Some((90, 9.0))); + assert_eq!(c.next(), Some((200, 20.0))); + c.verify(); + assert_eq!(c.next(), None); + c.verify(); + } + + // Removals from the root leaf node beyond underflow. + assert_eq!(m.remove(200, f, &()), Some(20.0)); + assert_eq!(m.remove(40, f, &()), Some(4.0)); + assert_eq!(m.remove(60, f, &()), Some(6.0)); + m.verify(f, &()); + assert_eq!(m.remove(50, f, &()), Some(5.5)); + m.verify(f, &()); + assert_eq!(m.remove(90, f, &()), Some(9.0)); + m.verify(f, &()); + assert!(m.is_empty()); + } + + #[test] + fn split_level0_leaf() { + // Various ways of splitting a full leaf node at level 0. + let f = &mut MapForest::::new(); + + fn full_leaf(f: &mut MapForest) -> BPlusMap { + let mut m = BPlusMap::new(); + for n in 1..8 { + m.insert(n * 10, n as f32 * 1.1, f, &()); + } + m + } + + // Insert at front of leaf. + let mut m = full_leaf(f); + m.insert(5, 4.2, f, &()); + m.verify(f, &()); + assert_eq!(m.get(5, f, &()), Some(4.2)); + + // Insert at back of leaf. + let mut m = full_leaf(f); + m.insert(80, 4.2, f, &()); + m.verify(f, &()); + assert_eq!(m.get(80, f, &()), Some(4.2)); + + // Insert before middle (40). + let mut m = full_leaf(f); + m.insert(35, 4.2, f, &()); + m.verify(f, &()); + assert_eq!(m.get(35, f, &()), Some(4.2)); + + // Insert after middle (40). + let mut m = full_leaf(f); + m.insert(45, 4.2, f, &()); + m.verify(f, &()); + assert_eq!(m.get(45, f, &()), Some(4.2)); + } + + #[test] + fn split_level1_leaf() { + // Various ways of splitting a full leaf node at level 1. + let f = &mut MapForest::::new(); + + // Return a map whose root node is a full inner node, and the leaf nodes are all full + // containing: + // + // 110, 120, ..., 170 + // 210, 220, ..., 270 + // ... + // 810, 820, ..., 870 + fn full(f: &mut MapForest) -> BPlusMap { + let mut m = BPlusMap::new(); + + // Start by inserting elements in order. + // This should leave 8 leaf nodes with 4 elements in each. + for row in 1..9 { + for col in 1..5 { + m.insert(row * 100 + col * 10, row as f32 + col as f32 * 0.1, f, &()); + } + } + + // Then top up the leaf nodes without splitting them. + for row in 1..9 { + for col in 5..8 { + m.insert(row * 100 + col * 10, row as f32 + col as f32 * 0.1, f, &()); + } + } + + m + } + + let mut m = full(f); + // Verify geometry. Get get node2 as the root and leaves node0, 1, 3, ... + m.verify(f, &()); + assert_eq!(m.tpath(110, f, &()), "node2[0]--node0[0]"); + assert_eq!(m.tpath(140, f, &()), "node2[0]--node0[3]"); + assert_eq!(m.tpath(210, f, &()), "node2[1]--node1[0]"); + assert_eq!(m.tpath(270, f, &()), "node2[1]--node1[6]"); + assert_eq!(m.tpath(310, f, &()), "node2[2]--node3[0]"); + assert_eq!(m.tpath(810, f, &()), "node2[7]--node8[0]"); + assert_eq!(m.tpath(870, f, &()), "node2[7]--node8[6]"); + + // Front of first leaf. + m.insert(0, 4.2, f, &()); + m.verify(f, &()); + assert_eq!(m.get(0, f, &()), Some(4.2)); + + // First leaf split 4-4 after appending to LHS. + f.clear(); + m = full(f); + m.insert(135, 4.2, f, &()); + m.verify(f, &()); + assert_eq!(m.get(135, f, &()), Some(4.2)); + + // First leaf split 4-4 after prepending to RHS. + f.clear(); + m = full(f); + m.insert(145, 4.2, f, &()); + m.verify(f, &()); + assert_eq!(m.get(145, f, &()), Some(4.2)); + + // First leaf split 4-4 after appending to RHS. + f.clear(); + m = full(f); + m.insert(175, 4.2, f, &()); + m.verify(f, &()); + assert_eq!(m.get(175, f, &()), Some(4.2)); + + // Left-middle leaf split, ins LHS. + f.clear(); + m = full(f); + m.insert(435, 4.2, f, &()); + m.verify(f, &()); + assert_eq!(m.get(435, f, &()), Some(4.2)); + + // Left-middle leaf split, ins RHS. + f.clear(); + m = full(f); + m.insert(445, 4.2, f, &()); + m.verify(f, &()); + assert_eq!(m.get(445, f, &()), Some(4.2)); + + // Right-middle leaf split, ins LHS. + f.clear(); + m = full(f); + m.insert(535, 4.2, f, &()); + m.verify(f, &()); + assert_eq!(m.get(535, f, &()), Some(4.2)); + + // Right-middle leaf split, ins RHS. + f.clear(); + m = full(f); + m.insert(545, 4.2, f, &()); + m.verify(f, &()); + assert_eq!(m.get(545, f, &()), Some(4.2)); + + // Last leaf split, ins LHS. + f.clear(); + m = full(f); + m.insert(835, 4.2, f, &()); + m.verify(f, &()); + assert_eq!(m.get(835, f, &()), Some(4.2)); + + // Last leaf split, ins RHS. + f.clear(); + m = full(f); + m.insert(845, 4.2, f, &()); + m.verify(f, &()); + assert_eq!(m.get(845, f, &()), Some(4.2)); + + // Front of last leaf. + f.clear(); + m = full(f); + m.insert(805, 4.2, f, &()); + m.verify(f, &()); + assert_eq!(m.get(805, f, &()), Some(4.2)); + } + + // Make a tree with two barely healthy leaf nodes: + // [ 10 20 30 40 ] [ 50 60 70 80 ] + fn two_leaf(f: &mut MapForest) -> BPlusMap { + f.clear(); + let mut m = BPlusMap::new(); + for n in 1..9 { + m.insert(n * 10, n as f32, f, &()); + } + m + } + + #[test] + fn remove_level1() { + let f = &mut MapForest::::new(); + let mut m = two_leaf(f); + + // Verify geometry. + m.verify(f, &()); + assert_eq!(m.tpath(10, f, &()), "node2[0]--node0[0]"); + assert_eq!(m.tpath(40, f, &()), "node2[0]--node0[3]"); + assert_eq!(m.tpath(49, f, &()), "node2[0]--node0[4]"); + assert_eq!(m.tpath(50, f, &()), "node2[1]--node1[0]"); + assert_eq!(m.tpath(80, f, &()), "node2[1]--node1[3]"); + + // Remove the front entry from a node that stays healthy. + assert_eq!(m.insert(55, 5.5, f, &()), None); + assert_eq!(m.remove(50, f, &()), Some(5.0)); + m.verify(f, &()); + assert_eq!(m.tpath(49, f, &()), "node2[0]--node0[4]"); + assert_eq!(m.tpath(50, f, &()), "node2[0]--node0[4]"); + assert_eq!(m.tpath(55, f, &()), "node2[1]--node1[0]"); + + // Remove the front entry from the first leaf node: No critical key to update. + assert_eq!(m.insert(15, 1.5, f, &()), None); + assert_eq!(m.remove(10, f, &()), Some(1.0)); + m.verify(f, &()); + + // [ 15 20 30 40 ] [ 55 60 70 80 ] + + // Remove the front entry from a right-most node that underflows. + // No rebalancing for the right-most node. Still need critical key update. + assert_eq!(m.remove(55, f, &()), Some(5.5)); + m.verify(f, &()); + assert_eq!(m.tpath(55, f, &()), "node2[0]--node0[4]"); + assert_eq!(m.tpath(60, f, &()), "node2[1]--node1[0]"); + + // [ 15 20 30 40 ] [ 60 70 80 ] + + // Replenish the right leaf. + assert_eq!(m.insert(90, 9.0, f, &()), None); + assert_eq!(m.insert(100, 10.0, f, &()), None); + m.verify(f, &()); + assert_eq!(m.tpath(55, f, &()), "node2[0]--node0[4]"); + assert_eq!(m.tpath(60, f, &()), "node2[1]--node1[0]"); + + // [ 15 20 30 40 ] [ 60 70 80 90 100 ] + + // Removing one entry from the left leaf should trigger a rebalancing from the right + // sibling. + assert_eq!(m.remove(20, f, &()), Some(2.0)); + m.verify(f, &()); + + // [ 15 30 40 60 ] [ 70 80 90 100 ] + // Check that the critical key was updated correctly. + assert_eq!(m.tpath(50, f, &()), "node2[0]--node0[3]"); + assert_eq!(m.tpath(60, f, &()), "node2[0]--node0[3]"); + assert_eq!(m.tpath(70, f, &()), "node2[1]--node1[0]"); + + // Remove front entry from the left-most leaf node, underflowing. + // This should cause two leaf nodes to be merged and the root node to go away. + assert_eq!(m.remove(15, f, &()), Some(1.5)); + m.verify(f, &()); + } + + #[test] + fn remove_level1_rightmost() { + let f = &mut MapForest::::new(); + let mut m = two_leaf(f); + + // [ 10 20 30 40 ] [ 50 60 70 80 ] + + // Remove entries from the right leaf. This doesn't trigger a rebalancing. + assert_eq!(m.remove(60, f, &()), Some(6.0)); + assert_eq!(m.remove(80, f, &()), Some(8.0)); + assert_eq!(m.remove(50, f, &()), Some(5.0)); + m.verify(f, &()); + + // [ 10 20 30 40 ] [ 70 ] + assert_eq!(m.tpath(50, f, &()), "node2[0]--node0[4]"); + assert_eq!(m.tpath(70, f, &()), "node2[1]--node1[0]"); + + // Removing the last entry from the right leaf should cause a collapse. + assert_eq!(m.remove(70, f, &()), Some(7.0)); + m.verify(f, &()); + } + + // Make a 3-level tree with barely healthy nodes. + // 1 root, 8 inner nodes, 7*4+5=33 leaf nodes, 4 entries each. + fn level3_sparse(f: &mut MapForest) -> BPlusMap { + f.clear(); + let mut m = BPlusMap::new(); + for n in 1..133 { + m.insert(n * 10, n as f32, f, &()); + } + m + } + + #[test] + fn level3_removes() { + let f = &mut MapForest::::new(); + let mut m = level3_sparse(f); + m.verify(f, &()); + + // Check geometry. + // Root: node11 + // [ node2 170 node10 330 node16 490 node21 650 node26 810 node31 970 node36 1130 node41 ] + // L1: node11 + assert_eq!(m.tpath(0, f, &()), "node11[0]--node2[0]--node0[0]"); + assert_eq!(m.tpath(10000, f, &()), "node11[7]--node41[4]--node40[4]"); + + // 650 is a critical key in the middle of the root. + assert_eq!(m.tpath(640, f, &()), "node11[3]--node21[3]--node19[3]"); + assert_eq!(m.tpath(650, f, &()), "node11[4]--node26[0]--node20[0]"); + + // Deleting 640 triggers a rebalance from node19 to node 20, cascading to n21 -> n26. + assert_eq!(m.remove(640, f, &()), Some(64.0)); + m.verify(f, &()); + assert_eq!(m.tpath(650, f, &()), "node11[3]--node26[3]--node20[3]"); + + // 1130 is in the first leaf of the last L1 node. Deleting it triggers a rebalance node35 + // -> node37, but no rebalance above where there is no right sibling. + assert_eq!(m.tpath(1130, f, &()), "node11[6]--node41[0]--node35[0]"); + assert_eq!(m.tpath(1140, f, &()), "node11[6]--node41[0]--node35[1]"); + assert_eq!(m.remove(1130, f, &()), Some(113.0)); + m.verify(f, &()); + assert_eq!(m.tpath(1140, f, &()), "node11[6]--node41[0]--node37[0]"); + } + + #[test] + fn insert_many() { + let f = &mut MapForest::::new(); + let mut m = BPlusMap::::new(); + + let mm = 4096; + let mut x = 0; + + for n in 0..mm { + assert_eq!(m.insert(x, n as f32, f, &()), None); + m.verify(f, &()); + + x = (x + n + 1) % mm; + } + + x = 0; + for n in 0..mm { + assert_eq!(m.get(x, f, &()), Some(n as f32)); + x = (x + n + 1) % mm; + } + + x = 0; + for n in 0..mm { + assert_eq!(m.remove(x, f, &()), Some(n as f32)); + m.verify(f, &()); + + x = (x + n + 1) % mm; + } + + assert!(m.is_empty()); + } +} diff --git a/lib/cretonne/src/bforest/mod.rs b/lib/cretonne/src/bforest/mod.rs new file mode 100644 index 0000000000..de8ee1092d --- /dev/null +++ b/lib/cretonne/src/bforest/mod.rs @@ -0,0 +1,172 @@ +//! A forest of B+-trees. +//! +//! This module provides a data structures representing a set of small ordered sets or maps. +//! It is implemented as a forest of B+-trees all allocating nodes out of the same pool. +//! +//! **These are not general purpose data structures that are somehow magically faster that the +//! standard library's `BTreeSet` and `BTreeMap` types.** +//! +//! The tradeoffs are different: +//! +//! - Keys and values are expected to be small and copyable. We optimize for 32-bit types. +//! - A comparator object is used to compare keys, allowing smaller "context free" keys. +//! - Empty trees have a very small 32-bit footprint. +//! - All the trees in a forest can be cleared in constant time. + +use std::borrow::BorrowMut; +use std::cmp::Ordering; + +mod map; +mod node; +mod path; +mod pool; +mod set; + +pub use self::map::{MapForest, BPlusMap, MapCursor}; +pub use self::set::{SetForest, BPlusSet, SetCursor}; + +use self::node::NodeData; +use self::path::Path; +use self::pool::NodePool; + +/// The maximum branching factor of an inner node in a B+-tree. +/// The minimum number of outgoing edges is `INNER_SIZE/2`. +const INNER_SIZE: usize = 8; + +/// Given the worst case branching factor of `INNER_SIZE/2` = 4, this is the +/// worst case path length from the root node to a leaf node in a tree with 2^32 +/// entries. We would run out of node references before we hit `MAX_PATH`. +const MAX_PATH: usize = 16; + +/// Key comparator. +/// +/// Keys don't need to implement `Ord`. They are compared using a comparator object which +/// provides a context for comparison. +pub trait BPlusComparator +where + K: Copy, +{ + /// Compare keys `a` and `b`. + /// + /// This relation must provide a total ordering or the key space. + fn cmp(&self, a: K, b: K) -> Ordering; + + /// Binary search for `k` in an ordered slice. + /// + /// Assume that `s` is already sorted according to this ordering, search for the key `k`. + /// + /// Returns `Ok(idx)` if `k` was found in the slice or `Err(idx)` with the position where it + /// should be inserted to preserve the ordering. + fn search(&self, k: K, s: &[K]) -> Result { + s.binary_search_by(|x| self.cmp(*x, k)) + } +} + +/// Trivial comparator that doesn't actually provide any context. +impl BPlusComparator for () +where + K: Copy + Ord, +{ + fn cmp(&self, a: K, b: K) -> Ordering { + a.cmp(&b) + } +} + +/// Family of types shared by the map and set forest implementations. +trait Forest { + /// The key type is present for both sets and maps. + type Key: Copy; + + /// The value type is `()` for sets. + type Value: Copy; + + /// An array of keys for the leaf nodes. + type LeafKeys: Copy + BorrowMut<[Self::Key]>; + + /// An array of values for the leaf nodes. + type LeafValues: Copy + BorrowMut<[Self::Value]>; + + /// Type used for key comparisons. + type Comparator: BPlusComparator; + + /// Splat a single key into a whole array. + fn splat_key(key: Self::Key) -> Self::LeafKeys; + + /// Splat a single value inst a whole array + fn splat_value(value: Self::Value) -> Self::LeafValues; +} + +/// A reference to a B+-tree node. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +struct Node(u32); +entity_impl!(Node, "node"); + +/// Empty type to be used as the "value" in B-trees representing sets. +#[derive(Clone, Copy)] +struct SetValue(); + +/// Insert `x` into `s` at position `i`, pushing out the last element. +fn slice_insert(s: &mut [T], i: usize, x: T) { + for j in (i + 1..s.len()).rev() { + s[j] = s[j - 1]; + } + s[i] = x; +} + +/// Shift elements in `s` to the left by `n` positions. +fn slice_shift(s: &mut [T], n: usize) { + for j in 0..s.len() - n { + s[j] = s[j + n]; + } +} + +#[cfg(test)] +mod test { + use super::*; + use entity::EntityRef; + use ir::Ebb; + + #[test] + fn comparator() { + let ebb1 = Ebb::new(1); + let ebb2 = Ebb::new(2); + let ebb3 = Ebb::new(3); + let ebb4 = Ebb::new(4); + let vals = [ebb1, ebb2, ebb4]; + let comp = (); + assert_eq!(comp.search(ebb1, &vals), Ok(0)); + assert_eq!(comp.search(ebb3, &vals), Err(2)); + assert_eq!(comp.search(ebb4, &vals), Ok(2)); + } + + #[test] + fn slice_insertion() { + let mut a = ['a', 'b', 'c', 'd']; + + slice_insert(&mut a[0..1], 0, 'e'); + assert_eq!(a, ['e', 'b', 'c', 'd']); + + slice_insert(&mut a, 0, 'a'); + assert_eq!(a, ['a', 'e', 'b', 'c']); + + slice_insert(&mut a, 3, 'g'); + assert_eq!(a, ['a', 'e', 'b', 'g']); + + slice_insert(&mut a, 1, 'h'); + assert_eq!(a, ['a', 'h', 'e', 'b']); + } + + #[test] + fn slice_shifting() { + let mut a = ['a', 'b', 'c', 'd']; + + slice_shift(&mut a[0..1], 1); + assert_eq!(a, ['a', 'b', 'c', 'd']); + + slice_shift(&mut a[1..], 1); + assert_eq!(a, ['a', 'c', 'd', 'd']); + + slice_shift(&mut a, 2); + assert_eq!(a, ['d', 'd', 'd', 'd']); + } +} diff --git a/lib/cretonne/src/bforest/node.rs b/lib/cretonne/src/bforest/node.rs new file mode 100644 index 0000000000..53a96f9b14 --- /dev/null +++ b/lib/cretonne/src/bforest/node.rs @@ -0,0 +1,813 @@ +//! B+-tree nodes. + +use std::borrow::{Borrow, BorrowMut}; +use std::fmt; +use super::{Forest, Node, INNER_SIZE, SetValue, slice_insert, slice_shift}; + +/// B+-tree node. +/// +/// A B+-tree has different node types for inner nodes and leaf nodes. Inner nodes contain M node +/// references and M-1 keys while leaf nodes contain N keys and values. Values for M and N are +/// chosen such that a node is exactly 64 bytes (a cache line) when keys and values are 32 bits +/// each. +/// +/// An inner node contains at least M/2 node references unless it is the right-most node at its +/// level. A leaf node contains at least N/2 keys unless it is the right-most leaf. +pub(super) enum NodeData { + Inner { + /// The number of keys in this node. + /// The number of node references is always one more. + size: u8, + + /// Keys discriminating sub-trees. + /// + /// The key in `keys[i]` is greater than all keys in `tree[i]` and less than or equal to + /// all keys in `tree[i+1]`. + keys: [F::Key; INNER_SIZE - 1], + + /// Sub-trees. + tree: [Node; INNER_SIZE], + }, + Leaf { + /// Number of key-value pairs in this node. + size: u8, + + // Key array. + keys: F::LeafKeys, + + // Value array. + vals: F::LeafValues, + }, + /// An unused node on the free list. + Free { next: Option }, +} + +// Implement `Clone` and `Copy` manually, because deriving them would also require `Forest` to +// implement `Clone`. +impl Copy for NodeData {} +impl Clone for NodeData { + fn clone(&self) -> Self { + *self + } +} + +impl NodeData { + /// Is this a free/unused node? + pub fn is_free(&self) -> bool { + match self { + &NodeData::Free { .. } => true, + _ => false, + } + } + + /// Get the number of entries in this node. + /// + /// This is the number of outgoing edges in an inner node, or the number of key-value pairs in + /// a leaf node. + pub fn entries(&self) -> usize { + match self { + &NodeData::Inner { size, .. } => usize::from(size) + 1, + &NodeData::Leaf { size, .. } => usize::from(size), + &NodeData::Free { .. } => panic!("freed node"), + } + } + + /// Create an inner node with a single key and two sub-trees. + pub fn inner(left: Node, key: F::Key, right: Node) -> NodeData { + // Splat the key and right node to the whole array. + // Saves us from inventing a default/reserved value. + let mut tree = [right; INNER_SIZE]; + tree[0] = left; + NodeData::Inner { + size: 1, + keys: [key; INNER_SIZE - 1], + tree, + } + } + + /// Create a leaf node with a single key-value pair. + pub fn leaf(key: F::Key, value: F::Value) -> NodeData { + NodeData::Leaf { + size: 1, + keys: F::splat_key(key), + vals: F::splat_value(value), + } + } + + /// Unwrap an inner node into two slices (keys, trees). + pub fn unwrap_inner(&self) -> (&[F::Key], &[Node]) { + match self { + &NodeData::Inner { + size, + ref keys, + ref tree, + } => { + let size = usize::from(size); + // TODO: We could probably use `get_unchecked()` here since `size` is always in + // range. + (&keys[0..size], &tree[0..size + 1]) + } + _ => panic!("Expected inner node"), + } + } + + /// Unwrap a leaf node into two slices (keys, values) of the same length. + pub fn unwrap_leaf(&self) -> (&[F::Key], &[F::Value]) { + match self { + &NodeData::Leaf { + size, + ref keys, + ref vals, + } => { + let size = usize::from(size); + let keys = keys.borrow(); + let vals = vals.borrow(); + // TODO: We could probably use `get_unchecked()` here since `size` is always in + // range. + (&keys[0..size], &vals[0..size]) + } + _ => panic!("Expected leaf node"), + } + } + + /// Unwrap a mutable leaf node into two slices (keys, values) of the same length. + pub fn unwrap_leaf_mut(&mut self) -> (&mut [F::Key], &mut [F::Value]) { + match self { + &mut NodeData::Leaf { + size, + ref mut keys, + ref mut vals, + } => { + let size = usize::from(size); + let keys = keys.borrow_mut(); + let vals = vals.borrow_mut(); + // TODO: We could probably use `get_unchecked_mut()` here since `size` is always in + // range. + (&mut keys[0..size], &mut vals[0..size]) + } + _ => panic!("Expected leaf node"), + } + } + + /// Get the critical key for a leaf node. + /// This is simply the first key. + pub fn leaf_crit_key(&self) -> F::Key { + match self { + &NodeData::Leaf { size, ref keys, .. } => { + debug_assert!(size > 0, "Empty leaf node"); + keys.borrow()[0] + } + _ => panic!("Expected leaf node"), + } + } + + /// Try to insert `(key, node)` at key-position `index` in an inner node. + /// This means that `key` is inserted at `keys[i]` and `node` is inserted at `tree[i + 1]`. + /// If the node is full, this leaves the node unchanged and returns false. + pub fn try_inner_insert(&mut self, index: usize, key: F::Key, node: Node) -> bool { + match self { + &mut NodeData::Inner { + ref mut size, + ref mut keys, + ref mut tree, + } => { + let sz = usize::from(*size); + debug_assert!(sz <= keys.len()); + debug_assert!(index <= sz, "Can't insert at {} with {} keys", index, sz); + + if let Some(ks) = keys.get_mut(0..sz + 1) { + *size = (sz + 1) as u8; + slice_insert(ks, index, key); + slice_insert(&mut tree[1..sz + 2], index, node); + true + } else { + false + } + } + _ => panic!("Expected inner node"), + } + } + + /// Try to insert `key, value` at `index` in a leaf node, but fail and return false if the node + /// is full. + pub fn try_leaf_insert(&mut self, index: usize, key: F::Key, value: F::Value) -> bool { + match self { + &mut NodeData::Leaf { + ref mut size, + ref mut keys, + ref mut vals, + } => { + let sz = usize::from(*size); + let keys = keys.borrow_mut(); + let vals = vals.borrow_mut(); + debug_assert!(sz <= keys.len()); + debug_assert!(index <= sz); + + if let Some(ks) = keys.get_mut(0..sz + 1) { + *size = (sz + 1) as u8; + slice_insert(ks, index, key); + slice_insert(&mut vals[0..sz + 1], index, value); + true + } else { + false + } + } + _ => panic!("Expected leaf node"), + } + } + + /// Split off the second half of this node. + /// It is assumed that this a completely full inner or leaf node. + /// + /// The `insert_index` parameter is the position where an insertion was tried and failed. The + /// node will be split in half with a bias towards an even split after the insertion is retried. + pub fn split(&mut self, insert_index: usize) -> SplitOff { + match self { + &mut NodeData::Inner { + ref mut size, + ref keys, + ref tree, + } => { + debug_assert_eq!(usize::from(*size), keys.len(), "Node not full"); + + // Number of tree entries in the lhs node. + let l_ents = split_pos(tree.len(), insert_index + 1); + let r_ents = tree.len() - l_ents; + + // With INNER_SIZE=8, we get l_ents=4 and: + // + // self: [ n0 k0 n1 k1 n2 k2 n3 k3 n4 k4 n5 k5 n6 k6 n7 ] + // lhs: [ n0 k0 n1 k1 n2 k2 n3 ] + // crit_key = k3 (not present in either node) + // rhs: [ n4 k4 n5 k5 n6 k6 n7 ] + + // 1. Truncate the LHS. + *size = (l_ents - 1) as u8; + + // 2. Copy second half to `rhs_data`. + let mut r_keys = *keys; + r_keys[0..r_ents - 1].copy_from_slice(&keys[l_ents..]); + + let mut r_tree = *tree; + r_tree[0..r_ents].copy_from_slice(&tree[l_ents..]); + + SplitOff { + lhs_entries: l_ents, + rhs_entries: r_ents, + crit_key: keys[l_ents - 1], + rhs_data: NodeData::Inner { + size: (r_ents - 1) as u8, + keys: r_keys, + tree: r_tree, + }, + } + } + &mut NodeData::Leaf { + ref mut size, + ref keys, + ref vals, + } => { + let o_keys = keys.borrow(); + let o_vals = vals.borrow(); + debug_assert_eq!(usize::from(*size), o_keys.len(), "Node not full"); + + let l_size = split_pos(o_keys.len(), insert_index); + let r_size = o_keys.len() - l_size; + + // 1. Truncate the LHS node at `l_size`. + *size = l_size as u8; + + // 2. Copy second half to `rhs_data`. + let mut r_keys = *keys; + r_keys.borrow_mut()[0..r_size].copy_from_slice(&o_keys[l_size..]); + + let mut r_vals = *vals; + r_vals.borrow_mut()[0..r_size].copy_from_slice(&o_vals[l_size..]); + + SplitOff { + lhs_entries: l_size, + rhs_entries: r_size, + crit_key: o_keys[l_size], + rhs_data: NodeData::Leaf { + size: r_size as u8, + keys: r_keys, + vals: r_vals, + }, + } + } + _ => panic!("Expected leaf node"), + } + } + + /// Remove the sub-tree at `index` from this inner node. + /// + /// Note that `index` refers to a sub-tree entry and not a key entry as it does for + /// `try_inner_insert()`. It is possible to remove the first sub-tree (which can't be inserted + /// by `try_inner_insert()`). + /// + /// Return an indication of the node's health (i.e. below half capacity). + pub fn inner_remove(&mut self, index: usize) -> Removed { + match self { + &mut NodeData::Inner { + ref mut size, + ref mut keys, + ref mut tree, + } => { + let ents = usize::from(*size) + 1; + debug_assert!(ents <= tree.len()); + debug_assert!(index < ents); + // Leave an invalid 0xff size when node becomes empty. + *size = ents.wrapping_sub(2) as u8; + if ents > 1 { + slice_shift(&mut keys[index.saturating_sub(1)..ents - 1], 1); + } + slice_shift(&mut tree[index..ents], 1); + Removed::new(index, ents - 1, tree.len()) + } + _ => panic!("Expected inner node"), + } + } + + /// Remove the key-value pair at `index` from this leaf node. + /// + /// Return an indication of the node's health (i.e. below half capacity). + pub fn leaf_remove(&mut self, index: usize) -> Removed { + match self { + &mut NodeData::Leaf { + ref mut size, + ref mut keys, + ref mut vals, + } => { + let sz = usize::from(*size); + let keys = keys.borrow_mut(); + let vals = vals.borrow_mut(); + *size -= 1; + slice_shift(&mut keys[index..sz], 1); + slice_shift(&mut vals[index..sz], 1); + Removed::new(index, sz - 1, keys.len()) + } + _ => panic!("Expected leaf node"), + } + } + + /// Balance this node with its right sibling. + /// + /// It is assumed that the current node has underflowed. Look at the right sibling node and do + /// one of two things: + /// + /// 1. Move all entries to the right node, leaving this node empty, or + /// 2. Distribute entries evenly between the two nodes. + /// + /// In the first case, `None` is returned. In the second case, the new critical key for the + /// right sibling node is returned. + pub fn balance(&mut self, crit_key: F::Key, rhs: &mut NodeData) -> Option { + match (self, rhs) { + (&mut NodeData::Inner { + size: ref mut l_size, + keys: ref mut l_keys, + tree: ref mut l_tree, + }, + &mut NodeData::Inner { + size: ref mut r_size, + keys: ref mut r_keys, + tree: ref mut r_tree, + }) => { + let l_ents = usize::from(*l_size) + 1; + let r_ents = usize::from(*r_size) + 1; + let ents = l_ents + r_ents; + + if ents <= r_tree.len() { + // All entries will fit in the RHS node. + // We'll leave the LHS node empty, but first use it as a scratch space. + *l_size = 0; + // Insert `crit_key` between the two nodes. + l_keys[l_ents - 1] = crit_key; + l_keys[l_ents..ents - 1].copy_from_slice(&r_keys[0..r_ents - 1]); + r_keys[0..ents - 1].copy_from_slice(&l_keys[0..ents - 1]); + l_tree[l_ents..ents].copy_from_slice(&r_tree[0..r_ents]); + r_tree[0..ents].copy_from_slice(&l_tree[0..ents]); + *r_size = (ents - 1) as u8; + None + } else { + // The entries don't all fit in one node. Distribute some from RHS -> LHS. + // Split evenly with a bias to putting one entry in LHS. + let r_goal = ents / 2; + let l_goal = ents - r_goal; + debug_assert!(l_goal > l_ents, "Node must be underflowed"); + + l_keys[l_ents - 1] = crit_key; + l_keys[l_ents..l_goal - 1].copy_from_slice(&r_keys[0..l_goal - 1 - l_ents]); + l_tree[l_ents..l_goal].copy_from_slice(&r_tree[0..l_goal - l_ents]); + *l_size = (l_goal - 1) as u8; + + let new_crit = r_keys[r_ents - r_goal - 1]; + slice_shift(&mut r_keys[0..r_ents - 1], r_ents - r_goal); + slice_shift(&mut r_tree[0..r_ents], r_ents - r_goal); + *r_size = (r_goal - 1) as u8; + + Some(new_crit) + } + } + (&mut NodeData::Leaf { + size: ref mut l_size, + keys: ref mut l_keys, + vals: ref mut l_vals, + }, + &mut NodeData::Leaf { + size: ref mut r_size, + keys: ref mut r_keys, + vals: ref mut r_vals, + }) => { + let l_ents = usize::from(*l_size); + let l_keys = l_keys.borrow_mut(); + let l_vals = l_vals.borrow_mut(); + let r_ents = usize::from(*r_size); + let r_keys = r_keys.borrow_mut(); + let r_vals = r_vals.borrow_mut(); + let ents = l_ents + r_ents; + + if ents <= r_vals.len() { + // We can fit all entries in the RHS node. + // We'll leave the LHS node empty, but first use it as a scratch space. + *l_size = 0; + l_keys[l_ents..ents].copy_from_slice(&r_keys[0..r_ents]); + r_keys[0..ents].copy_from_slice(&l_keys[0..ents]); + l_vals[l_ents..ents].copy_from_slice(&r_vals[0..r_ents]); + r_vals[0..ents].copy_from_slice(&l_vals[0..ents]); + *r_size = ents as u8; + None + } else { + // The entries don't all fit in one node. Distribute some from RHS -> LHS. + // Split evenly with a bias to putting one entry in LHS. + let r_goal = ents / 2; + let l_goal = ents - r_goal; + debug_assert!(l_goal > l_ents, "Node must be underflowed"); + + l_keys[l_ents..l_goal].copy_from_slice(&r_keys[0..l_goal - l_ents]); + l_vals[l_ents..l_goal].copy_from_slice(&r_vals[0..l_goal - l_ents]); + *l_size = l_goal as u8; + + slice_shift(&mut r_keys[0..r_ents], r_ents - r_goal); + slice_shift(&mut r_vals[0..r_ents], r_ents - r_goal); + *r_size = r_goal as u8; + + Some(r_keys[0]) + } + } + _ => panic!("Mismatched nodes"), + } + } +} + +/// Find the right split position for halving a full node with `len` entries to recover from a +/// failed insertion at `ins`. +/// +/// If `len` is even, we should split straight down the middle regardless of `len`. +/// +/// If `len` is odd, we should split the node such that the two halves are the same size after the +/// insertion is retried. +fn split_pos(len: usize, ins: usize) -> usize { + // Anticipate `len` being a compile time constant, so this all folds away when `len` is even. + if ins <= len / 2 { + len / 2 + } else { + (len + 1) / 2 + } +} + +/// The result of splitting off the second half of a node. +pub(super) struct SplitOff { + /// The number of entries left in the original node which becomes the left-hand-side of the + /// pair. This is the number of outgoing node edges for an inner node, and the number of + /// key-value pairs for a leaf node. + pub lhs_entries: usize, + + /// The number of entries in the new RHS node. + pub rhs_entries: usize, + + /// The critical key separating the LHS and RHS nodes. All keys in the LHS sub-tree are less + /// than the critical key, and all entries in the RHS sub-tree are greater or equal to the + /// critical key. + pub crit_key: F::Key, + + /// The RHS node data containing the elements that were removed from the original node (now the + /// LHS). + pub rhs_data: NodeData, +} + +/// The result of removing an entry from a node. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub(super) enum Removed { + /// An entry was removed, and the node is still in good shape. + Healthy, + + /// The node is in good shape after removing the rightmost element. + Rightmost, + + /// The node has too few entries now, and it should be balanced with a sibling node. + Underflow, + + /// The last entry was removed. For an inner node, this means that the `keys` array is empty + /// and there is just a single sub-tree left. + Empty, +} + +impl Removed { + /// Create a `Removed` status from a size and capacity. + fn new(removed: usize, new_size: usize, capacity: usize) -> Removed { + if 2 * new_size >= capacity { + if removed == new_size { + Removed::Rightmost + } else { + Removed::Healthy + } + } else if new_size > 0 { + Removed::Underflow + } else { + Removed::Empty + } + } +} + +// Display ": value" or nothing at all for `()`. +pub(super) trait ValDisp { + fn valfmt(&self, f: &mut fmt::Formatter) -> fmt::Result; +} + +impl ValDisp for SetValue { + fn valfmt(&self, _: &mut fmt::Formatter) -> fmt::Result { + Ok(()) + } +} + +impl ValDisp for T { + fn valfmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, ":{}", self) + } +} + +impl fmt::Display for NodeData +where + F: Forest, + F::Key: fmt::Display, + F::Value: ValDisp, +{ + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + &NodeData::Inner { size, keys, tree } => { + write!(f, "[ {}", tree[0])?; + for i in 0..usize::from(size) { + write!(f, " {} {}", keys[i], tree[i + 1])?; + } + write!(f, " ]") + } + &NodeData::Leaf { size, keys, vals } => { + let keys = keys.borrow(); + let vals = vals.borrow(); + write!(f, "[")?; + for i in 0..usize::from(size) { + write!(f, " {}", keys[i])?; + vals[i].valfmt(f)?; + } + write!(f, " ]") + } + &NodeData::Free { next: Some(n) } => write!(f, "[ free -> {} ]", n), + &NodeData::Free { next: None } => write!(f, "[ free ]"), + } + } +} + +#[cfg(test)] +mod test { + use std::mem; + use super::*; + + // Forest impl for a set implementation. + struct TF(); + + impl Forest for TF { + type Key = char; + type Value = SetValue; + type LeafKeys = [char; 15]; + type LeafValues = [SetValue; 15]; + type Comparator = (); + + fn splat_key(key: Self::Key) -> Self::LeafKeys { + [key; 15] + } + + fn splat_value(value: Self::Value) -> Self::LeafValues { + [value; 15] + } + } + + #[test] + fn inner() { + let n1 = Node(1); + let n2 = Node(2); + let n3 = Node(3); + let n4 = Node(4); + let mut inner = NodeData::::inner(n1, 'c', n4); + assert_eq!(mem::size_of_val(&inner), 64); + assert_eq!(inner.to_string(), "[ node1 c node4 ]"); + + assert!(inner.try_inner_insert(0, 'a', n2)); + assert_eq!(inner.to_string(), "[ node1 a node2 c node4 ]"); + + assert!(inner.try_inner_insert(1, 'b', n3)); + assert_eq!(inner.to_string(), "[ node1 a node2 b node3 c node4 ]"); + + for i in 3..7 { + assert!(inner.try_inner_insert( + usize::from(i), + ('a' as u8 + i) as char, + Node(i as u32 + 2), + )); + } + assert_eq!( + inner.to_string(), + "[ node1 a node2 b node3 c node4 d node5 e node6 f node7 g node8 ]" + ); + + // Now the node is full and insertion should fail anywhere. + assert!(!inner.try_inner_insert(0, 'x', n3)); + assert!(!inner.try_inner_insert(4, 'x', n3)); + assert!(!inner.try_inner_insert(7, 'x', n3)); + + // Splitting should be independent of the hint because we have an even number of node + // references. + let saved = inner.clone(); + let sp = inner.split(1); + assert_eq!(sp.lhs_entries, 4); + assert_eq!(sp.rhs_entries, 4); + assert_eq!(sp.crit_key, 'd'); + // The critical key is not present in either of the resulting nodes. + assert_eq!(inner.to_string(), "[ node1 a node2 b node3 c node4 ]"); + assert_eq!(sp.rhs_data.to_string(), "[ node5 e node6 f node7 g node8 ]"); + + assert_eq!(inner.inner_remove(0), Removed::Underflow); + assert_eq!(inner.to_string(), "[ node2 b node3 c node4 ]"); + + assert_eq!(inner.inner_remove(1), Removed::Underflow); + assert_eq!(inner.to_string(), "[ node2 c node4 ]"); + + assert_eq!(inner.inner_remove(1), Removed::Underflow); + assert_eq!(inner.to_string(), "[ node2 ]"); + + assert_eq!(inner.inner_remove(0), Removed::Empty); + + inner = saved; + let sp = inner.split(6); + assert_eq!(sp.lhs_entries, 4); + assert_eq!(sp.rhs_entries, 4); + assert_eq!(sp.crit_key, 'd'); + assert_eq!(inner.to_string(), "[ node1 a node2 b node3 c node4 ]"); + assert_eq!(sp.rhs_data.to_string(), "[ node5 e node6 f node7 g node8 ]"); + } + + #[test] + fn leaf() { + let mut leaf = NodeData::::leaf('d', SetValue()); + assert_eq!(leaf.to_string(), "[ d ]"); + + assert!(leaf.try_leaf_insert(0, 'a', SetValue())); + assert_eq!(leaf.to_string(), "[ a d ]"); + assert!(leaf.try_leaf_insert(1, 'b', SetValue())); + assert!(leaf.try_leaf_insert(2, 'c', SetValue())); + assert_eq!(leaf.to_string(), "[ a b c d ]"); + for i in 4..15 { + assert!(leaf.try_leaf_insert( + usize::from(i), + ('a' as u8 + i) as char, + SetValue(), + )); + } + assert_eq!(leaf.to_string(), "[ a b c d e f g h i j k l m n o ]"); + + // Now the node is full and insertion should fail anywhere. + assert!(!leaf.try_leaf_insert(0, 'x', SetValue())); + assert!(!leaf.try_leaf_insert(8, 'x', SetValue())); + assert!(!leaf.try_leaf_insert(15, 'x', SetValue())); + + // The index given to `split` is not the split position, it's a hint for balancing the node. + let saved = leaf.clone(); + let sp = leaf.split(12); + assert_eq!(sp.lhs_entries, 8); + assert_eq!(sp.rhs_entries, 7); + assert_eq!(sp.crit_key, 'i'); + assert_eq!(leaf.to_string(), "[ a b c d e f g h ]"); + assert_eq!(sp.rhs_data.to_string(), "[ i j k l m n o ]"); + + assert!(leaf.try_leaf_insert(8, 'i', SetValue())); + assert_eq!(leaf.leaf_remove(2), Removed::Healthy); + assert_eq!(leaf.to_string(), "[ a b d e f g h i ]"); + assert_eq!(leaf.leaf_remove(7), Removed::Underflow); + assert_eq!(leaf.to_string(), "[ a b d e f g h ]"); + + leaf = saved; + let sp = leaf.split(7); + assert_eq!(sp.lhs_entries, 7); + assert_eq!(sp.rhs_entries, 8); + assert_eq!(sp.crit_key, 'h'); + assert_eq!(leaf.to_string(), "[ a b c d e f g ]"); + assert_eq!(sp.rhs_data.to_string(), "[ h i j k l m n o ]"); + } + + #[test] + fn optimal_split_pos() { + // An even split is easy. + assert_eq!(split_pos(8, 0), 4); + assert_eq!(split_pos(8, 8), 4); + + // Easy cases for odd splits. + assert_eq!(split_pos(7, 0), 3); + assert_eq!(split_pos(7, 7), 4); + + // If the insertion point is the same as the split position, we + // will append to the lhs node. + assert_eq!(split_pos(7, 3), 3); + assert_eq!(split_pos(7, 4), 4); + } + + #[test] + fn inner_balance() { + let n1 = Node(1); + let n2 = Node(2); + let n3 = Node(3); + let mut lhs = NodeData::::inner(n1, 'a', n2); + assert!(lhs.try_inner_insert(1, 'b', n3)); + assert_eq!(lhs.to_string(), "[ node1 a node2 b node3 ]"); + + let n11 = Node(11); + let n12 = Node(12); + let mut rhs = NodeData::::inner(n11, 'p', n12); + + for i in 1..4 { + assert!(rhs.try_inner_insert( + usize::from(i), + ('p' as u8 + i) as char, + Node(i as u32 + 12), + )); + } + assert_eq!( + rhs.to_string(), + "[ node11 p node12 q node13 r node14 s node15 ]" + ); + + // 3+5 elements fit in RHS. + assert_eq!(lhs.balance('o', &mut rhs), None); + assert_eq!( + rhs.to_string(), + "[ node1 a node2 b node3 o node11 p node12 q node13 r node14 s node15 ]" + ); + + // 2+8 elements are redistributed. + lhs = NodeData::::inner(Node(20), 'x', Node(21)); + assert_eq!(lhs.balance('y', &mut rhs), Some('o')); + assert_eq!( + lhs.to_string(), + "[ node20 x node21 y node1 a node2 b node3 ]" + ); + assert_eq!( + rhs.to_string(), + "[ node11 p node12 q node13 r node14 s node15 ]" + ); + } + + #[test] + fn leaf_balance() { + let mut lhs = NodeData::::leaf('a', SetValue()); + for i in 1..6 { + assert!(lhs.try_leaf_insert( + usize::from(i), + ('a' as u8 + i) as char, + SetValue(), + )); + } + assert_eq!(lhs.to_string(), "[ a b c d e f ]"); + + let mut rhs = NodeData::::leaf('0', SetValue()); + for i in 1..8 { + assert!(rhs.try_leaf_insert( + usize::from(i), + ('0' as u8 + i) as char, + SetValue(), + )); + } + assert_eq!(rhs.to_string(), "[ 0 1 2 3 4 5 6 7 ]"); + + // 6+8 elements all fits in rhs. + assert_eq!(lhs.balance('0', &mut rhs), None); + assert_eq!(rhs.to_string(), "[ a b c d e f 0 1 2 3 4 5 6 7 ]"); + + assert!(lhs.try_leaf_insert(0, 'x', SetValue())); + assert!(lhs.try_leaf_insert(1, 'y', SetValue())); + assert!(lhs.try_leaf_insert(2, 'z', SetValue())); + assert_eq!(lhs.to_string(), "[ x y z ]"); + + // 3+14 elements need redistribution. + assert_eq!(lhs.balance('a', &mut rhs), Some('0')); + assert_eq!(lhs.to_string(), "[ x y z a b c d e f ]"); + assert_eq!(rhs.to_string(), "[ 0 1 2 3 4 5 6 7 ]"); + } +} diff --git a/lib/cretonne/src/bforest/path.rs b/lib/cretonne/src/bforest/path.rs new file mode 100644 index 0000000000..2dceb805f4 --- /dev/null +++ b/lib/cretonne/src/bforest/path.rs @@ -0,0 +1,817 @@ +//! A path from the root of a B+-tree to a leaf node. + +use std::borrow::Borrow; +use std::marker::PhantomData; +use super::{Forest, Node, NodeData, NodePool, MAX_PATH, BPlusComparator, slice_insert, slice_shift}; +use super::node::Removed; + +#[cfg(test)] +use std::fmt; + +pub(super) struct Path { + /// Number of path entries including the root and leaf nodes. + size: usize, + + /// Path of node references from the root to a leaf node. + node: [Node; MAX_PATH], + + /// Entry number in each node. + entry: [u8; MAX_PATH], + + unused: PhantomData, +} + +impl Default for Path { + fn default() -> Path { + Path { + size: 0, + node: [Node(0); MAX_PATH], + entry: [0; MAX_PATH], + unused: PhantomData, + } + } +} + +impl Path { + /// Reset path by searching for `key` starting from `root`. + /// + /// If `key` is in the tree, returns the corresponding value and leaved the path pointing at + /// the entry. Otherwise returns `None` and: + /// + /// - A key smaller than all stored keys returns a path to the first entry of the first leaf. + /// - A key larger than all stored keys returns a path to one beyond the last element of the + /// last leaf. + /// - A key between the stored keys of adjacent leaf nodes returns a path to one beyond the + /// last entry of the first of the leaf nodes. + /// + pub fn find( + &mut self, + key: F::Key, + root: Node, + pool: &NodePool, + comp: &F::Comparator, + ) -> Option { + let mut node = root; + for level in 0.. { + self.size = level + 1; + self.node[level] = node; + match &pool[node] { + &NodeData::Inner { size, keys, tree } => { + // Invariant: `tree[i]` contains keys smaller than + // `keys[i]`, greater or equal to `keys[i-1]`. + let i = match comp.search(key, &keys[0..size.into()]) { + // We hit an existing key, so follow the >= branch. + Ok(i) => i + 1, + // Key is less than `keys[i]`, so follow the < branch. + Err(i) => i, + }; + self.entry[level] = i as u8; + node = tree[i]; + } + &NodeData::Leaf { size, keys, vals } => { + // For a leaf we want either the found key or an insert position. + return match comp.search(key, &keys.borrow()[0..size.into()]) { + Ok(i) => { + self.entry[level] = i as u8; + Some(vals.borrow()[i]) + } + Err(i) => { + self.entry[level] = i as u8; + None + } + }; + } + &NodeData::Free { .. } => panic!("Free {} reached from {}", node, root), + } + } + unreachable!(); + } + + /// Move this path to the next key-value pair and return it. + pub fn next(&mut self, pool: &NodePool) -> Option<(F::Key, F::Value)> { + match self.leaf_pos() { + None => return None, + Some((node, entry)) => { + let (keys, vals) = pool[node].unwrap_leaf(); + if entry + 1 < keys.len() { + self.entry[self.size - 1] += 1; + return Some((keys[entry + 1], vals[entry + 1])); + } + } + } + + // The current leaf node is exhausted. Move to the next one. + let leaf_level = self.size - 1; + self.next_node(leaf_level, pool).map(|node| { + let (keys, vals) = pool[node].unwrap_leaf(); + (keys[0], vals[0]) + }) + } + + /// Move this path to the previous key-value pair and return it. + /// + /// If the path is at the off-the-end position, go to the last key-value pair. + /// + /// If the path is already at the first key-value pair, leave it there and return `None`. + pub fn prev(&mut self, root: Node, pool: &NodePool) -> Option<(F::Key, F::Value)> { + // We use `size == 0` as a generic off-the-end position. + if self.size == 0 { + self.goto_subtree_last(0, root, pool); + let (node, entry) = self.leaf_pos().unwrap(); + let (keys, vals) = pool[node].unwrap_leaf(); + return Some((keys[entry], vals[entry])); + } + + match self.leaf_pos() { + None => return None, + Some((node, entry)) => { + if entry > 0 { + self.entry[self.size - 1] -= 1; + let (keys, vals) = pool[node].unwrap_leaf(); + return Some((keys[entry - 1], vals[entry - 1])); + } + } + } + + // The current leaf node is exhausted. Move to the previous one. + self.prev_leaf(pool).map(|node| { + let (keys, vals) = pool[node].unwrap_leaf(); + let e = self.leaf_entry(); + (keys[e], vals[e]) + }) + } + + /// Move path to the first entry of the next node at level, if one exists. + /// + /// Returns the new node if it exists. + /// + /// Reset the path to `size = 0` and return `None` if there is no next node. + fn next_node(&mut self, level: usize, pool: &NodePool) -> Option { + match self.right_sibling_branch_level(level, pool) { + None => { + self.size = 0; + None + } + Some(bl) => { + let (_, bnodes) = pool[self.node[bl]].unwrap_inner(); + self.entry[bl] += 1; + let mut node = bnodes[usize::from(self.entry[bl])]; + + for l in bl + 1..level { + self.node[l] = node; + self.entry[l] = 0; + node = pool[node].unwrap_inner().1[0]; + } + + self.node[level] = node; + self.entry[level] = 0; + Some(node) + } + } + } + + /// Move the path to the last entry of the previous leaf node, if one exists. + /// + /// Returns the new leaf node if it exists. + /// + /// Leave the path unchanged and returns `None` if we are already at the first leaf node. + fn prev_leaf(&mut self, pool: &NodePool) -> Option { + self.left_sibling_branch_level(self.size - 1).map(|bl| { + let entry = self.entry[bl] - 1; + self.entry[bl] = entry; + let (_, bnodes) = pool[self.node[bl]].unwrap_inner(); + self.goto_subtree_last(bl + 1, bnodes[usize::from(entry)], pool) + }) + } + + /// Move this path to the last position for the sub-tree at `level, root`. + fn goto_subtree_last(&mut self, level: usize, root: Node, pool: &NodePool) -> Node { + let mut node = root; + for l in level.. { + self.node[l] = node; + match &pool[node] { + &NodeData::Inner { size, ref tree, .. } => { + self.entry[l] = size; + node = tree[usize::from(size)]; + } + &NodeData::Leaf { size, .. } => { + self.entry[l] = size - 1; + self.size = l + 1; + break; + } + &NodeData::Free { .. } => panic!("Free {} reached from {}", node, root), + } + } + node + } + + /// Set the root node and point the path at the first entry of the node. + pub fn set_root_node(&mut self, root: Node) { + self.size = 1; + self.node[0] = root; + self.entry[0] = 0; + } + + /// Get the current leaf node and entry, if any. + pub fn leaf_pos(&self) -> Option<(Node, usize)> { + let i = self.size.wrapping_sub(1); + self.node.get(i).map(|&n| (n, self.entry[i].into())) + } + + /// Get the current leaf node. + fn leaf_node(&self) -> Node { + self.node[self.size - 1] + } + + /// Get the current entry in the leaf node. + fn leaf_entry(&self) -> usize { + self.entry[self.size - 1].into() + } + + /// Is this path pointing to the first entry in the tree? + /// This corresponds to the smallest key. + fn at_first_entry(&self) -> bool { + self.entry[0..self.size].iter().all(|&i| i == 0) + } + + /// Get a mutable reference to the current value. + /// This assumes that there is a current value. + pub fn value_mut<'a>(&self, pool: &'a mut NodePool) -> &'a mut F::Value { + &mut pool[self.leaf_node()].unwrap_leaf_mut().1[self.leaf_entry()] + } + + /// Insert the key-value pair at the current position. + /// The current position must be the correct insertion location for the key. + /// This function does not check for duplicate keys. Use `find` or similar for that. + /// Returns the new root node. + pub fn insert(&mut self, key: F::Key, value: F::Value, pool: &mut NodePool) -> Node { + if !self.try_leaf_insert(key, value, pool) { + self.split_and_insert(key, value, pool); + } + self.node[0] + } + + /// Try to insert `key, value` at the current position, but fail and return false if the leaf + /// node is full. + fn try_leaf_insert(&self, key: F::Key, value: F::Value, pool: &mut NodePool) -> bool { + let index = self.leaf_entry(); + + // The case `index == 0` should only ever happen when there are no earlier leaf nodes, + // otherwise we should have appended to the previous leaf node instead. This invariant + // means that we don't need to update keys stored in inner nodes here. + debug_assert!(index > 0 || self.at_first_entry()); + + pool[self.leaf_node()].try_leaf_insert(index, key, value) + } + + /// Split the current leaf node and then insert `key, value`. + /// This should only be used if `try_leaf_insert()` fails. + fn split_and_insert(&mut self, mut key: F::Key, value: F::Value, pool: &mut NodePool) { + let orig_root = self.node[0]; + + // Loop invariant: We need to split the node at `level` and then retry a failed insertion. + // The items to insert are either `(key, ins_node)` or `(key, value)`. + let mut ins_node = None; + let mut split; + for level in (0..self.size).rev() { + // Split the current node. + let mut node = self.node[level]; + let mut entry = self.entry[level].into(); + split = pool[node].split(entry); + let rhs_node = pool.alloc_node(split.rhs_data); + + // Should the path be moved to the new RHS node? + // Prefer the smaller node if we're right in the middle. + // Prefer to append to LHS all other things being equal. + // + // When inserting into an inner node (`ins_node.is_some()`), we must point to a valid + // entry in the current node since the new entry is inserted *after* the insert + // location. + if entry > split.lhs_entries || + (entry == split.lhs_entries && + (split.lhs_entries > split.rhs_entries || ins_node.is_some())) + { + node = rhs_node; + entry -= split.lhs_entries; + self.node[level] = node; + self.entry[level] = entry as u8; + } + + // Now that we have a not-full node, it must be possible to insert. + match ins_node { + None => { + assert!(pool[node].try_leaf_insert(entry, key, value)); + // If we inserted at the front of the new rhs_node leaf, we need to propagate + // the inserted key as the critical key instead of the previous front key. + if entry == 0 && node == rhs_node { + split.crit_key = key; + } + } + Some(n) => { + assert!(pool[node].try_inner_insert(entry, key, n)); + // The lower level was moved to the new RHS node, so make sure that is + // reflected here. + if n == self.node[level + 1] { + self.entry[level] += 1; + } + } + } + + // We are now done with the current level, but `rhs_node` must be inserted in the inner + // node above us. If we're already at level 0, the root node needs to be split. + key = split.crit_key; + ins_node = Some(rhs_node); + if level > 0 { + let pnode = &mut pool[self.node[level - 1]]; + let pentry = self.entry[level - 1].into(); + if pnode.try_inner_insert(pentry, key, rhs_node) { + // If this level level was moved to the new RHS node, update parent entry. + if node == rhs_node { + self.entry[level - 1] += 1; + } + return; + } + } + } + + // If we get here we have split the original root node and need to add an extra level. + let rhs_node = ins_node.expect("empty path"); + let root = pool.alloc_node(NodeData::inner(orig_root, key, rhs_node)); + let entry = if self.node[0] == rhs_node { 1 } else { 0 }; + self.size += 1; + slice_insert(&mut self.node[0..self.size], 0, root); + slice_insert(&mut self.entry[0..self.size], 0, entry); + } + + /// Remove the key-value pair at the current position and advance the path to the next + /// key-value pair, leaving the path in a normalized state. + /// + /// Return the new root node. + pub fn remove(&mut self, pool: &mut NodePool) -> Option { + let e = self.leaf_entry(); + match pool[self.leaf_node()].leaf_remove(e) { + Removed::Healthy => { + if e == 0 { + self.update_crit_key(pool) + } + Some(self.node[0]) + } + status => self.balance_nodes(status, pool), + } + } + + /// Get the critical key for the current node at `level`. + /// + /// The critical key is less than or equal to all keys in the sub-tree at `level` and greater + /// than all keys to the left of the current node at `level`. + /// + /// The left-most node at any level does not have a critical key. + fn current_crit_key(&self, level: usize, pool: &NodePool) -> Option { + // Find the level containing the critical key for the current node. + self.left_sibling_branch_level(level).map(|bl| { + let (keys, _) = pool[self.node[bl]].unwrap_inner(); + keys[usize::from(self.entry[bl]) - 1] + }) + } + + /// Update the critical key after removing the front entry of the leaf node. + fn update_crit_key(&mut self, pool: &mut NodePool) { + // Find the inner level containing the critical key for the current leaf node. + let crit_level = match self.left_sibling_branch_level(self.size - 1) { + None => return, + Some(l) => l, + }; + let crit_kidx = self.entry[crit_level] - 1; + + // Extract the new critical key from the leaf node. + let crit_key = pool[self.leaf_node()].leaf_crit_key(); + let crit_node = self.node[crit_level]; + + match &mut pool[crit_node] { + &mut NodeData::Inner { size, ref mut keys, .. } => { + debug_assert!(crit_kidx < size); + keys[usize::from(crit_kidx)] = crit_key; + } + _ => panic!("Expected inner node"), + } + } + + + /// Given that the current leaf node is in an unhealthy (underflowed or even empty) status, + /// balance it with sibling nodes. + /// + /// Return the new root node. + fn balance_nodes(&mut self, status: Removed, pool: &mut NodePool) -> Option { + // The current leaf node is not in a healthy state, and its critical key may have changed + // too. + // + // Start by dealing with a changed critical key for the leaf level. + if status != Removed::Empty && self.leaf_entry() == 0 { + self.update_crit_key(pool); + } + + let leaf_level = self.size - 1; + if self.heal_level(status, leaf_level, pool) { + // Tree has become empty. + self.size = 0; + return None; + } + + // Discard the root node if it has shrunk to a single sub-tree. + let mut ns = 0; + while let &NodeData::Inner { size: 0, ref tree, .. } = &pool[self.node[ns]] { + ns += 1; + self.node[ns] = tree[0]; + } + + if ns > 0 { + for l in 0..ns { + pool.free_node(self.node[l]); + } + + // Shift the whole array instead of just 0..size because `self.size` may be cleared + // here if the path is pointing off-the-end. + slice_shift(&mut self.node, ns); + slice_shift(&mut self.entry, ns); + + if self.size > 0 { + self.size -= ns; + } + } + + // Return the root node, even when `size=0` indicating that we're at the off-the-end + // position. + Some(self.node[0]) + } + + /// After removing an entry from the node at `level`, check its health and rebalance as needed. + /// + /// Leave the path up to and including `level` in a normalized state where all entries are in + /// bounds. + /// + /// Returns true if the tree becomes empty. + fn heal_level(&mut self, status: Removed, level: usize, pool: &mut NodePool) -> bool { + match status { + Removed::Healthy => {} + Removed::Rightmost => { + // The rightmost entry was removed from the curent node, so move the path so it + // points at the first entry of the next node at this level. + debug_assert_eq!( + usize::from(self.entry[level]), + pool[self.node[level]].entries() + ); + self.next_node(level, pool); + } + Removed::Underflow => self.underflowed_node(level, pool), + Removed::Empty => return self.empty_node(level, pool), + } + false + } + + /// The current node at `level` has underflowed, meaning that it is below half capacity but + /// not completely empty. + /// + /// Handle this by balancing entries with the right sibling node. + /// + /// Leave the path up to and including `level` in a valid state that points to the same entry. + fn underflowed_node(&mut self, level: usize, pool: &mut NodePool) { + // Look for a right sibling node at this level. If none exists, we allow the underflowed + // node to persist as the right-most node at its level. + if let Some((crit_key, rhs_node)) = self.right_sibling(level, pool) { + // New critical key for the updated right sibling node. + let new_ck: Option; + let empty; + // Make a COPY of the sibling node to avoid fighting the borrow checker. + let mut rhs = pool[rhs_node]; + match pool[self.node[level]].balance(crit_key, &mut rhs) { + None => { + // Everything got moved to the RHS node. + new_ck = self.current_crit_key(level, pool); + empty = true; + } + Some(key) => { + // Entries moved from RHS node. + new_ck = Some(key); + empty = false; + } + } + // Put back the updated RHS node data. + pool[rhs_node] = rhs; + // Update the critical key for the RHS node unless it has become a left-most + // node. + if let Some(ck) = new_ck { + self.update_right_crit_key(level, ck, pool); + } + if empty { + let empty_tree = self.empty_node(level, pool); + debug_assert!(!empty_tree); + } + + // Any Removed::Rightmost state must have been cleared above by merging nodes. If the + // current entry[level] was one off the end of the node, it will now point at a proper + // entry. + debug_assert!(usize::from(self.entry[level]) < pool[self.node[level]].entries()); + } else { + // There's no right sibling at this level, so the node can't be rebalanced. + // Check if we are in an off-the-end position. + if usize::from(self.entry[level]) >= pool[self.node[level]].entries() { + self.size = 0; + } + } + } + + /// The current node at `level` has become empty. + /// + /// Remove the node from its parent node and leave the path in a normalized state. This means + /// that the path at this level will go through the right sibling of this node. + /// + /// If the current node has no right sibling, set `self.size = 0`. + /// + /// Returns true if the tree becomes empty. + fn empty_node(&mut self, level: usize, pool: &mut NodePool) -> bool { + pool.free_node(self.node[level]); + if level == 0 { + // We just deleted the root node, so the tree is now empty. + return true; + } + + // Get the right sibling node before recursively removing nodes. + let rhs_node = self.right_sibling(level, pool).map(|(_, n)| n); + + // Remove the current sub-tree from the parent node. + let pl = level - 1; + let pe = self.entry[pl].into(); + let status = pool[self.node[pl]].inner_remove(pe); + self.heal_level(status, pl, pool); + + // Finally update the path at this level. + match rhs_node { + // We'll leave `self.entry[level]` unchanged. It can be non-zero after moving node + // entries to the right sibling node. + Some(rhs) => self.node[level] = rhs, + // We have no right sibling, so we must have deleted the right-most + // entry. The path should be moved to the "off-the-end" position. + None => self.size = 0, + } + false + } + + /// Find the level where the right sibling to the current node at `level` branches off. + /// + /// This will be an inner node with two adjacent sub-trees: In one the current node at level is + /// a right-most node, in the other, the right sibling is a left-most node. + /// + /// Returns `None` if the current node is a right-most node so no right sibling exists. + fn right_sibling_branch_level(&self, level: usize, pool: &NodePool) -> Option { + (0..level).rposition(|l| match &pool[self.node[l]] { + &NodeData::Inner { size, .. } => self.entry[l] < size, + _ => panic!("Expected inner node"), + }) + } + + /// Find the level where the left sibling to the current node at `level` branches off. + fn left_sibling_branch_level(&self, level: usize) -> Option { + self.entry[0..level].iter().rposition(|&e| e != 0) + } + + /// Get the right sibling node to the current node at `level`. + /// Also return the critical key between the current node and the right sibling. + fn right_sibling(&self, level: usize, pool: &NodePool) -> Option<(F::Key, Node)> { + // Find the critical level: The deepest level where two sibling subtrees contain the + // current node and its right sibling. + self.right_sibling_branch_level(level, pool).map(|bl| { + // Extract the critical key and the `bl+1` node. + let be = usize::from(self.entry[bl]); + let crit_key; + let mut node; + { + let (keys, tree) = pool[self.node[bl]].unwrap_inner(); + crit_key = keys[be]; + node = tree[be + 1]; + } + + // Follow left-most links back down to `level`. + for _ in bl + 1..level { + node = pool[node].unwrap_inner().1[0]; + } + + (crit_key, node) + }) + } + + /// Update the critical key for the right sibling node at `level`. + fn update_right_crit_key(&self, level: usize, crit_key: F::Key, pool: &mut NodePool) { + let bl = self.right_sibling_branch_level(level, pool).expect( + "No right sibling exists", + ); + match &mut pool[self.node[bl]] { + &mut NodeData::Inner { ref mut keys, .. } => { + keys[usize::from(self.entry[bl])] = crit_key; + } + _ => panic!("Expected inner node"), + } + } + + /// Normalize the path position such that it is either pointing at a real entry or `size=0` + /// indicating "off-the-end". + pub fn normalize(&mut self, pool: &mut NodePool) { + if let Some((leaf, entry)) = self.leaf_pos() { + if entry >= pool[leaf].entries() { + let leaf_level = self.size - 1; + self.next_node(leaf_level, pool); + } + } + } +} + +#[cfg(test)] +impl Path { + /// Check the internal consistency of this path. + pub fn verify(&self, pool: &NodePool) { + for level in 0..self.size { + match &pool[self.node[level]] { + &NodeData::Inner { size, tree, .. } => { + assert!( + level < self.size - 1, + "Expected leaf node at level {}", + level + ); + assert!( + self.entry[level] <= size, + "OOB inner entry {}/{} at level {}", + self.entry[level], + size, + level + ); + assert_eq!( + self.node[level + 1], + tree[usize::from(self.entry[level])], + "Node mismatch at level {}", + level + ); + } + &NodeData::Leaf { size, .. } => { + assert_eq!(level, self.size - 1, "Expected inner node"); + assert!( + self.entry[level] <= size, + "OOB leaf entry {}/{}", + self.entry[level], + size, + ); + } + &NodeData::Free { .. } => { + panic!("Free {} in path", self.node[level]); + } + } + } + } +} + +#[cfg(test)] +impl fmt::Display for Path { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if self.size == 0 { + write!(f, "") + } else { + write!(f, "{}[{}]", self.node[0], self.entry[0])?; + for i in 1..self.size { + write!(f, "--{}[{}]", self.node[i], self.entry[i])?; + } + Ok(()) + } + } +} + +#[cfg(test)] +mod test { + use std::cmp::Ordering; + use super::*; + use super::super::{Forest, NodePool, NodeData}; + + struct TC(); + + impl BPlusComparator for TC { + fn cmp(&self, a: i32, b: i32) -> Ordering { + a.cmp(&b) + } + } + + struct TF(); + + impl Forest for TF { + type Key = i32; + type Value = char; + type LeafKeys = [i32; 7]; + type LeafValues = [char; 7]; + type Comparator = TC; + + fn splat_key(key: Self::Key) -> Self::LeafKeys { + [key; 7] + } + + fn splat_value(value: Self::Value) -> Self::LeafValues { + [value; 7] + } + } + + #[test] + fn search_single_leaf() { + // Testing Path::new() for trees with a single leaf node. + let mut pool = NodePool::::new(); + let root = pool.alloc_node(NodeData::leaf(10, 'a')); + let mut p = Path::default(); + let comp = TC(); + + // Search for key less than stored key. + assert_eq!(p.find(5, root, &pool, &comp), None); + assert_eq!(p.size, 1); + assert_eq!(p.node[0], root); + assert_eq!(p.entry[0], 0); + + // Search for stored key. + assert_eq!(p.find(10, root, &pool, &comp), Some('a')); + assert_eq!(p.size, 1); + assert_eq!(p.node[0], root); + assert_eq!(p.entry[0], 0); + + // Search for key greater than stored key. + assert_eq!(p.find(15, root, &pool, &comp), None); + assert_eq!(p.size, 1); + assert_eq!(p.node[0], root); + assert_eq!(p.entry[0], 1); + + // Modify leaf node to contain two values. + match pool[root] { + NodeData::Leaf { + ref mut size, + ref mut keys, + ref mut vals, + } => { + *size = 2; + keys[1] = 20; + vals[1] = 'b'; + } + _ => unreachable!(), + } + + // Search for key between stored keys. + assert_eq!(p.find(15, root, &pool, &comp), None); + assert_eq!(p.size, 1); + assert_eq!(p.node[0], root); + assert_eq!(p.entry[0], 1); + + // Search for key greater than stored keys. + assert_eq!(p.find(25, root, &pool, &comp), None); + assert_eq!(p.size, 1); + assert_eq!(p.node[0], root); + assert_eq!(p.entry[0], 2); + } + + #[test] + fn search_single_inner() { + // Testing Path::new() for trees with a single inner node and two leaves. + let mut pool = NodePool::::new(); + let leaf1 = pool.alloc_node(NodeData::leaf(10, 'a')); + let leaf2 = pool.alloc_node(NodeData::leaf(20, 'b')); + let root = pool.alloc_node(NodeData::inner(leaf1, 20, leaf2)); + let mut p = Path::default(); + let comp = TC(); + + // Search for key less than stored keys. + assert_eq!(p.find(5, root, &pool, &comp), None); + assert_eq!(p.size, 2); + assert_eq!(p.node[0], root); + assert_eq!(p.entry[0], 0); + assert_eq!(p.node[1], leaf1); + assert_eq!(p.entry[1], 0); + + assert_eq!(p.find(10, root, &pool, &comp), Some('a')); + assert_eq!(p.size, 2); + assert_eq!(p.node[0], root); + assert_eq!(p.entry[0], 0); + assert_eq!(p.node[1], leaf1); + assert_eq!(p.entry[1], 0); + + // Midway between the two leaf nodes. + assert_eq!(p.find(15, root, &pool, &comp), None); + assert_eq!(p.size, 2); + assert_eq!(p.node[0], root); + assert_eq!(p.entry[0], 0); + assert_eq!(p.node[1], leaf1); + assert_eq!(p.entry[1], 1); + + assert_eq!(p.find(20, root, &pool, &comp), Some('b')); + assert_eq!(p.size, 2); + assert_eq!(p.node[0], root); + assert_eq!(p.entry[0], 1); + assert_eq!(p.node[1], leaf2); + assert_eq!(p.entry[1], 0); + + assert_eq!(p.find(25, root, &pool, &comp), None); + assert_eq!(p.size, 2); + assert_eq!(p.node[0], root); + assert_eq!(p.entry[0], 1); + assert_eq!(p.node[1], leaf2); + assert_eq!(p.entry[1], 1); + } +} diff --git a/lib/cretonne/src/bforest/pool.rs b/lib/cretonne/src/bforest/pool.rs new file mode 100644 index 0000000000..2dfcb35074 --- /dev/null +++ b/lib/cretonne/src/bforest/pool.rs @@ -0,0 +1,197 @@ +//! B+-tree node pool. + +use entity::PrimaryMap; +use std::ops::{Index, IndexMut}; +use super::{Forest, Node, NodeData}; + +/// A pool of nodes, including a free list. +pub(super) struct NodePool { + nodes: PrimaryMap>, + freelist: Option, +} + +impl NodePool { + /// Allocate a new empty pool of nodes. + pub fn new() -> NodePool { + NodePool { + nodes: PrimaryMap::new(), + freelist: None, + } + } + + /// Free all nodes. + pub fn clear(&mut self) { + self.nodes.clear(); + self.freelist = None; + } + + /// Allocate a new node containing `data`. + pub fn alloc_node(&mut self, data: NodeData) -> Node { + debug_assert!(!data.is_free(), "can't allocate free node"); + match self.freelist { + Some(node) => { + // Remove this node from the free list. + match self.nodes[node] { + NodeData::Free { next } => self.freelist = next, + _ => panic!("Invalid {} on free list", node), + } + self.nodes[node] = data; + node + } + None => { + // The free list is empty. Allocate a new node. + self.nodes.push(data) + } + } + } + + /// Free a node. + pub fn free_node(&mut self, node: Node) { + // Quick check for a double free. + debug_assert!(!self.nodes[node].is_free(), "{} is already free", node); + self.nodes[node] = NodeData::Free { next: self.freelist }; + self.freelist = Some(node); + } +} + +#[cfg(test)] +impl NodePool { + /// Verify the consistency of the tree rooted at `node`. + pub fn verify_tree(&self, node: Node, comp: &F::Comparator) + where + NodeData: ::std::fmt::Display, + F::Key: ::std::fmt::Display, + { + use std::borrow::Borrow; + use std::cmp::Ordering; + use super::BPlusComparator; + use entity::SparseSet; + + // The root node can't be an inner node with just a single sub-tree. It should have been + // pruned. + if let &NodeData::Inner { size, .. } = &self[node] { + assert!(size > 0, "Root must have more than one sub-tree"); + } + + let mut done = SparseSet::new(); + let mut todo = Vec::new(); + + // Todo-list entries are: + // 1. Optional LHS key which must be <= all node entries. + // 2. The node reference. + // 3. Optional RHS key which must be > all node entries. + todo.push((None, node, None)); + + while let Some((lkey, node, rkey)) = todo.pop() { + assert_eq!( + done.insert(node), + None, + "Node appears more than once in tree" + ); + let mut lower = lkey; + + match &self[node] { + &NodeData::Inner { size, keys, tree } => { + let size = size as usize; + let capacity = tree.len(); + let keys = &keys[0..size]; + + // Verify occupancy. + // Right-most nodes can be small, but others must be at least half full. + assert!( + rkey.is_none() || (size + 1) * 2 >= capacity, + "Only {}/{} entries in {}:{}, upper={}", + size + 1, + capacity, + node, + self[node], + rkey.unwrap() + ); + + // Queue up the sub-trees, checking for duplicates. + for i in 0..size + 1 { + // Get an upper bound for node[i]. + let upper = keys.get(i).cloned().or(rkey); + + // Check that keys are strictly monotonic. + if let (Some(a), Some(b)) = (lower, upper) { + assert_eq!( + comp.cmp(a, b), + Ordering::Less, + "Key order {} < {} failed in {}: {}", + a, + b, + node, + self[node] + ); + } + + // Queue up the sub-tree. + todo.push((lower, tree[i], upper)); + + // Set a lower bound for the next tree. + lower = upper; + } + } + &NodeData::Leaf { size, keys, .. } => { + let size = size as usize; + let capacity = keys.borrow().len(); + let keys = &keys.borrow()[0..size]; + + // Verify occupancy. + // Right-most nodes can be small, but others must be at least half full. + assert!(size > 0, "Leaf {} is empty", node); + assert!( + rkey.is_none() || size * 2 >= capacity, + "Only {}/{} entries in {}:{}, upper={}", + size, + capacity, + node, + self[node], + rkey.unwrap() + ); + + for i in 0..size + 1 { + let upper = keys.get(i).cloned().or(rkey); + + // Check that keys are strictly monotonic. + if let (Some(a), Some(b)) = (lower, upper) { + let wanted = if i == 0 { + Ordering::Equal + } else { + Ordering::Less + }; + assert_eq!( + comp.cmp(a, b), + wanted, + "Key order for {} - {} failed in {}: {}", + a, + b, + node, + self[node] + ); + } + + // Set a lower bound for the next key. + lower = upper; + } + } + &NodeData::Free { .. } => panic!("Free {} reached", node), + } + } + } +} + +impl Index for NodePool { + type Output = NodeData; + + fn index(&self, index: Node) -> &Self::Output { + self.nodes.index(index) + } +} + +impl IndexMut for NodePool { + fn index_mut(&mut self, index: Node) -> &mut Self::Output { + self.nodes.index_mut(index) + } +} diff --git a/lib/cretonne/src/bforest/set.rs b/lib/cretonne/src/bforest/set.rs new file mode 100644 index 0000000000..e768ab64b9 --- /dev/null +++ b/lib/cretonne/src/bforest/set.rs @@ -0,0 +1,486 @@ +//! Forest of sets. + +use packed_option::PackedOption; +use std::marker::PhantomData; +use super::{INNER_SIZE, BPlusComparator, Forest, NodePool, Node, NodeData, Path, SetValue}; + +/// Tag type defining forest types for a set. +struct SetTypes(PhantomData<(K, C)>); + +impl Forest for SetTypes +where + K: Copy, + C: BPlusComparator, +{ + type Key = K; + type Value = SetValue; + type LeafKeys = [K; 2 * INNER_SIZE - 1]; + type LeafValues = [SetValue; 2 * INNER_SIZE - 1]; + type Comparator = C; + + fn splat_key(key: Self::Key) -> Self::LeafKeys { + [key; 2 * INNER_SIZE - 1] + } + + fn splat_value(value: Self::Value) -> Self::LeafValues { + [value; 2 * INNER_SIZE - 1] + } +} + +/// Memory pool for a forest of `BPlusSet` instances. +pub struct SetForest +where + K: Copy, + C: BPlusComparator, +{ + nodes: NodePool>, +} + +impl SetForest +where + K: Copy, + C: BPlusComparator, +{ + /// Create a new empty forest. + pub fn new() -> SetForest { + SetForest { nodes: NodePool::new() } + } + + /// Clear all sets in the forest. + /// + /// All `BPlusSet` instances belong to this forest are invalidated and should no longer be used. + pub fn clear(&mut self) { + self.nodes.clear(); + } +} + +/// B-tree representing an ordered set of `K`s using `C` for comparing elements. +/// +/// This is not a general-purpose replacement for `BTreeSet`. See the [module +/// documentation](index.html) for more information about design tradeoffs. +pub struct BPlusSet +where + K: Copy, + C: BPlusComparator, +{ + root: PackedOption, + unused: PhantomData<(K, C)>, +} + +impl BPlusSet +where + K: Copy, + C: BPlusComparator, +{ + /// Make an empty set. + pub fn new() -> BPlusSet { + BPlusSet { + root: None.into(), + unused: PhantomData, + } + } + + /// Is this an empty set? + pub fn is_empty(&self) -> bool { + self.root.is_none() + } + + /// Does the set contain `key`?. + pub fn contains(&self, key: K, forest: &SetForest, comp: &C) -> bool { + self.root + .expand() + .and_then(|root| Path::default().find(key, root, &forest.nodes, comp)) + .is_some() + } + + /// Try to insert `key` into the set. + /// + /// If the set did not contain `key`, insert it and return true. + /// + /// If `key` is already present, don't change the set and return false. + pub fn insert(&mut self, key: K, forest: &mut SetForest, comp: &C) -> bool { + self.cursor(forest, comp).insert(key) + } + + /// Remove `key` from the set and return true. + /// + /// If `key` was not present in the set, return false. + pub fn remove(&mut self, key: K, forest: &mut SetForest, comp: &C) -> bool { + let mut c = self.cursor(forest, comp); + if c.goto(key) { + c.remove(); + true + } else { + false + } + } + + /// Create a cursor for navigating this set. The cursor is initially positioned off the end of + /// the set. + pub fn cursor<'a>( + &'a mut self, + forest: &'a mut SetForest, + comp: &'a C, + ) -> SetCursor<'a, K, C> { + SetCursor::new(self, forest, comp) + } +} + +/// A position in a `BPlusSet` used to navigate and modify the ordered set. +/// +/// A cursor always points at an element in the set, or "off the end" which is a position after the +/// last element in the set. +pub struct SetCursor<'a, K, C> +where + K: 'a + Copy, + C: 'a + BPlusComparator, +{ + root: &'a mut PackedOption, + pool: &'a mut NodePool>, + comp: &'a C, + path: Path>, +} + +impl<'a, K, C> SetCursor<'a, K, C> +where + K: Copy, + C: BPlusComparator, +{ + /// Create a cursor with a default (invalid) location. + fn new( + container: &'a mut BPlusSet, + forest: &'a mut SetForest, + comp: &'a C, + ) -> SetCursor<'a, K, C> { + SetCursor { + root: &mut container.root, + pool: &mut forest.nodes, + comp, + path: Path::default(), + } + } + + /// Is this cursor pointing to an empty set? + pub fn is_empty(&self) -> bool { + self.root.is_none() + } + + /// Move cursor to the next element and return it. + /// + /// If the cursor reaches the end, return `None` and leave the cursor at the off-the-end + /// position. + pub fn next(&mut self) -> Option { + self.path.next(self.pool).map(|(k, _)| k) + } + + /// Move cursor to the previous element and return it. + /// + /// If the cursor is already pointing at the first element, leave it there and return `None`. + pub fn prev(&mut self) -> Option { + self.root.expand().and_then(|root| { + self.path.prev(root, self.pool).map(|(k, _)| k) + }) + } + + /// Get the current element, or `None` if the cursor is at the end. + pub fn elem(&self) -> Option { + self.path.leaf_pos().and_then(|(node, entry)| { + self.pool[node].unwrap_leaf().0.get(entry).cloned() + }) + } + + /// Move this cursor to `elem`. + /// + /// If `elem` is in the set, place the cursor at `elem` and return true. + /// + /// If `elem` is not in the set, place the cursor at the next larger element (or the end) and + /// return false. + pub fn goto(&mut self, elem: K) -> bool { + match self.root.expand() { + None => false, + Some(root) => { + if self.path.find(elem, root, self.pool, self.comp).is_some() { + true + } else { + self.path.normalize(self.pool); + false + } + } + } + } + + /// Try to insert `elem` into the set and leave the cursor at the inserted element. + /// + /// If the set did not contain `elem`, insert it and return true. + /// + /// If `elem` is already present, don't change the set, place the cursor at `goto(elem)`, and + /// return false. + pub fn insert(&mut self, elem: K) -> bool { + match self.root.expand() { + None => { + let root = self.pool.alloc_node(NodeData::leaf(elem, SetValue())); + *self.root = root.into(); + self.path.set_root_node(root); + true + } + Some(root) => { + // TODO: Optimize the case where `self.path` is already at the correct insert pos. + if self.path.find(elem, root, self.pool, self.comp).is_none() { + *self.root = self.path.insert(elem, SetValue(), self.pool).into(); + true + } else { + false + } + } + } + } + + /// Remove the current element (if any) and return it. + /// This advances the cursor to the next element after the removed one. + pub fn remove(&mut self) -> Option { + let elem = self.elem(); + if elem.is_some() { + *self.root = self.path.remove(self.pool).into(); + } + elem + } +} + +#[cfg(test)] +impl<'a, K, C> SetCursor<'a, K, C> +where + K: Copy + ::std::fmt::Display, + C: BPlusComparator, +{ + fn verify(&self) { + self.path.verify(self.pool); + self.root.map(|root| self.pool.verify_tree(root, self.comp)); + } + + /// Get a text version of the path to the current position. + fn tpath(&self) -> String { + self.path.to_string() + } +} + +#[cfg(test)] +mod test { + use std::mem; + use super::*; + use super::super::NodeData; + + #[test] + fn node_size() { + // check that nodes are cache line sized when keys are 32 bits. + type F = SetTypes; + assert_eq!(mem::size_of::>(), 64); + } + + #[test] + fn empty() { + let mut f = SetForest::::new(); + f.clear(); + + let mut s = BPlusSet::::new(); + assert!(s.is_empty()); + assert!(!s.contains(7, &f, &())); + + let c = SetCursor::new(&mut s, &mut f, &()); + c.verify(); + assert_eq!(c.elem(), None); + } + + #[test] + fn simple_cursor() { + let mut f = SetForest::::new(); + let mut s = BPlusSet::::new(); + let mut c = SetCursor::new(&mut s, &mut f, &()); + + assert!(c.insert(50)); + c.verify(); + assert_eq!(c.elem(), Some(50)); + + assert!(c.insert(100)); + c.verify(); + assert_eq!(c.elem(), Some(100)); + + assert!(c.insert(10)); + c.verify(); + assert_eq!(c.elem(), Some(10)); + + // Basic movement. + assert_eq!(c.next(), Some(50)); + assert_eq!(c.next(), Some(100)); + assert_eq!(c.next(), None); + assert_eq!(c.next(), None); + assert_eq!(c.prev(), Some(100)); + assert_eq!(c.prev(), Some(50)); + assert_eq!(c.prev(), Some(10)); + assert_eq!(c.prev(), None); + assert_eq!(c.prev(), None); + + assert!(c.goto(50)); + assert_eq!(c.elem(), Some(50)); + assert_eq!(c.remove(), Some(50)); + c.verify(); + + assert_eq!(c.elem(), Some(100)); + assert_eq!(c.remove(), Some(100)); + c.verify(); + assert_eq!(c.elem(), None); + assert_eq!(c.remove(), None); + c.verify(); + } + + #[test] + fn two_level_sparse_tree() { + let mut f = SetForest::::new(); + let mut s = BPlusSet::::new(); + let mut c = SetCursor::new(&mut s, &mut f, &()); + + // Insert enough elements that we get a two-level tree. + // Each leaf node holds 8 elements + assert!(c.is_empty()); + for i in 0..50 { + assert!(c.insert(i)); + assert_eq!(c.elem(), Some(i)); + } + assert!(!c.is_empty()); + + assert!(c.goto(0)); + assert_eq!(c.tpath(), "node2[0]--node0[0]"); + + assert_eq!(c.prev(), None); + for i in 1..50 { + assert_eq!(c.next(), Some(i)); + } + assert_eq!(c.next(), None); + for i in (0..50).rev() { + assert_eq!(c.prev(), Some(i)); + } + assert_eq!(c.prev(), None); + + assert!(c.goto(25)); + for i in 25..50 { + assert_eq!(c.remove(), Some(i)); + assert!(!c.is_empty()); + c.verify(); + } + + for i in (0..25).rev() { + assert!(!c.is_empty()); + assert_eq!(c.elem(), None); + assert_eq!(c.prev(), Some(i)); + assert_eq!(c.remove(), Some(i)); + c.verify(); + } + assert_eq!(c.elem(), None); + assert!(c.is_empty()); + } + + #[test] + fn three_level_sparse_tree() { + let mut f = SetForest::::new(); + let mut s = BPlusSet::::new(); + let mut c = SetCursor::new(&mut s, &mut f, &()); + + // Insert enough elements that we get a 3-level tree. + // Each leaf node holds 8 elements when filled up sequentially. + // Inner nodes hold 8 node pointers. + assert!(c.is_empty()); + for i in 0..150 { + assert!(c.insert(i)); + assert_eq!(c.elem(), Some(i)); + } + assert!(!c.is_empty()); + + assert!(c.goto(0)); + assert_eq!(c.tpath(), "node11[0]--node2[0]--node0[0]"); + + assert_eq!(c.prev(), None); + for i in 1..150 { + assert_eq!(c.next(), Some(i)); + } + assert_eq!(c.next(), None); + for i in (0..150).rev() { + assert_eq!(c.prev(), Some(i)); + } + assert_eq!(c.prev(), None); + + assert!(c.goto(125)); + for i in 125..150 { + assert_eq!(c.remove(), Some(i)); + assert!(!c.is_empty()); + c.verify(); + } + + for i in (0..125).rev() { + assert!(!c.is_empty()); + assert_eq!(c.elem(), None); + assert_eq!(c.prev(), Some(i)); + assert_eq!(c.remove(), Some(i)); + c.verify(); + } + assert_eq!(c.elem(), None); + assert!(c.is_empty()); + } + + // Generate a densely populated 4-level tree. + // + // Level 1: 1 root + // Level 2: 8 inner + // Level 3: 64 inner + // Level 4: 512 leafs, up to 7680 elements + // + // A 3-level tree can hold at most 960 elements. + fn dense4l(f: &mut SetForest) -> BPlusSet { + f.clear(); + let mut s = BPlusSet::new(); + + // Insert 400 elements in 7 passes over the range to avoid the half-full leaf node pattern + // that comes from sequential insertion. This will generate a normal leaf layer. + for n in 0..4000 { + assert!(s.insert((n * 7) % 4000, f, &())); + } + s + } + + #[test] + fn four_level() { + let mut f = SetForest::::new(); + let mut s = dense4l(&mut f); + let mut c = s.cursor(&mut f, &()); + + c.verify(); + + // Peel off a whole sub-tree of the root by deleting from the front. + // The 900 element is near the front of the second sub-tree. + assert!(c.goto(900)); + assert_eq!(c.tpath(), "node48[1]--node47[0]--node26[0]--node20[4]"); + assert!(c.goto(0)); + for i in 0..900 { + assert!(!c.is_empty()); + assert_eq!(c.remove(), Some(i)); + } + c.verify(); + assert_eq!(c.elem(), Some(900)); + + // Delete backwards from somewhere in the middle. + assert!(c.goto(3000)); + for i in (2000..3000).rev() { + assert_eq!(c.prev(), Some(i)); + assert_eq!(c.remove(), Some(i)); + assert_eq!(c.elem(), Some(3000)); + } + c.verify(); + + // Remove everything in a scattered manner, triggering many collapsing patterns. + for i in 0..4000 { + if c.goto((i * 7) % 4000) { + c.remove(); + } + } + assert!(c.is_empty()); + } + +} diff --git a/lib/cretonne/src/lib.rs b/lib/cretonne/src/lib.rs index 6fbc651218..5de3c1dba3 100644 --- a/lib/cretonne/src/lib.rs +++ b/lib/cretonne/src/lib.rs @@ -15,6 +15,7 @@ pub mod dbg; #[macro_use] pub mod entity; +pub mod bforest; pub mod binemit; pub mod bitset; pub mod cursor;