Forests of B+ trees.

Add new ordered set and map data structures based on B+-trees. These are not general-purpose data structures like the BTreeSet and BTreeMap types in the standard library. They are specialized for: - Keys and values are small `Copy` types, optimized for 32-bit entities. - Each set or map has a very small footprint, using only 32 bits of memory when empty. - Keys are compared using a borrowed comparator object which can provide context for comparing tiny types that don't contain enough information to implement `Ord`. - A whole forest of B-trees can be cleared in constant time without having to traverse the whole data structure.
2017-10-24 09:44:23 -07:00
parent 94921c0b74
commit aa6f5c0db3
7 changed files with 3239 additions and 0 deletions
--- a/lib/cretonne/src/bforest/set.rs
+++ b/lib/cretonne/src/bforest/set.rs
@@ -0,0 +1,486 @@
+//! Forest of sets.
+
+use packed_option::PackedOption;
+use std::marker::PhantomData;
+use super::{INNER_SIZE, BPlusComparator, Forest, NodePool, Node, NodeData, Path, SetValue};
+
+/// Tag type defining forest types for a set.
+struct SetTypes<K, C>(PhantomData<(K, C)>);
+
+impl<K, C> Forest for SetTypes<K, C>
+where
+    K: Copy,
+    C: BPlusComparator<K>,
+{
+    type Key = K;
+    type Value = SetValue;
+    type LeafKeys = [K; 2 * INNER_SIZE - 1];
+    type LeafValues = [SetValue; 2 * INNER_SIZE - 1];
+    type Comparator = C;
+
+    fn splat_key(key: Self::Key) -> Self::LeafKeys {
+        [key; 2 * INNER_SIZE - 1]
+    }
+
+    fn splat_value(value: Self::Value) -> Self::LeafValues {
+        [value; 2 * INNER_SIZE - 1]
+    }
+}
+
+/// Memory pool for a forest of `BPlusSet` instances.
+pub struct SetForest<K, C>
+where
+    K: Copy,
+    C: BPlusComparator<K>,
+{
+    nodes: NodePool<SetTypes<K, C>>,
+}
+
+impl<K, C> SetForest<K, C>
+where
+    K: Copy,
+    C: BPlusComparator<K>,
+{
+    /// Create a new empty forest.
+    pub fn new() -> SetForest<K, C> {
+        SetForest { nodes: NodePool::new() }
+    }
+
+    /// Clear all sets in the forest.
+    ///
+    /// All `BPlusSet` instances belong to this forest are invalidated and should no longer be used.
+    pub fn clear(&mut self) {
+        self.nodes.clear();
+    }
+}
+
+/// B-tree representing an ordered set of `K`s using `C` for comparing elements.
+///
+/// This is not a general-purpose replacement for `BTreeSet`. See the [module
+/// documentation](index.html) for more information about design tradeoffs.
+pub struct BPlusSet<K, C>
+where
+    K: Copy,
+    C: BPlusComparator<K>,
+{
+    root: PackedOption<Node>,
+    unused: PhantomData<(K, C)>,
+}
+
+impl<K, C> BPlusSet<K, C>
+where
+    K: Copy,
+    C: BPlusComparator<K>,
+{
+    /// Make an empty set.
+    pub fn new() -> BPlusSet<K, C> {
+        BPlusSet {
+            root: None.into(),
+            unused: PhantomData,
+        }
+    }
+
+    /// Is this an empty set?
+    pub fn is_empty(&self) -> bool {
+        self.root.is_none()
+    }
+
+    /// Does the set contain `key`?.
+    pub fn contains(&self, key: K, forest: &SetForest<K, C>, comp: &C) -> bool {
+        self.root
+            .expand()
+            .and_then(|root| Path::default().find(key, root, &forest.nodes, comp))
+            .is_some()
+    }
+
+    /// Try to insert `key` into the set.
+    ///
+    /// If the set did not contain `key`, insert it and return true.
+    ///
+    /// If `key` is already present, don't change the set and return false.
+    pub fn insert(&mut self, key: K, forest: &mut SetForest<K, C>, comp: &C) -> bool {
+        self.cursor(forest, comp).insert(key)
+    }
+
+    /// Remove `key` from the set and return true.
+    ///
+    /// If `key` was not present in the set, return false.
+    pub fn remove(&mut self, key: K, forest: &mut SetForest<K, C>, comp: &C) -> bool {
+        let mut c = self.cursor(forest, comp);
+        if c.goto(key) {
+            c.remove();
+            true
+        } else {
+            false
+        }
+    }
+
+    /// Create a cursor for navigating this set. The cursor is initially positioned off the end of
+    /// the set.
+    pub fn cursor<'a>(
+        &'a mut self,
+        forest: &'a mut SetForest<K, C>,
+        comp: &'a C,
+    ) -> SetCursor<'a, K, C> {
+        SetCursor::new(self, forest, comp)
+    }
+}
+
+/// A position in a `BPlusSet` used to navigate and modify the ordered set.
+///
+/// A cursor always points at an element in the set, or "off the end" which is a position after the
+/// last element in the set.
+pub struct SetCursor<'a, K, C>
+where
+    K: 'a + Copy,
+    C: 'a + BPlusComparator<K>,
+{
+    root: &'a mut PackedOption<Node>,
+    pool: &'a mut NodePool<SetTypes<K, C>>,
+    comp: &'a C,
+    path: Path<SetTypes<K, C>>,
+}
+
+impl<'a, K, C> SetCursor<'a, K, C>
+where
+    K: Copy,
+    C: BPlusComparator<K>,
+{
+    /// Create a cursor with a default (invalid) location.
+    fn new(
+        container: &'a mut BPlusSet<K, C>,
+        forest: &'a mut SetForest<K, C>,
+        comp: &'a C,
+    ) -> SetCursor<'a, K, C> {
+        SetCursor {
+            root: &mut container.root,
+            pool: &mut forest.nodes,
+            comp,
+            path: Path::default(),
+        }
+    }
+
+    /// Is this cursor pointing to an empty set?
+    pub fn is_empty(&self) -> bool {
+        self.root.is_none()
+    }
+
+    /// Move cursor to the next element and return it.
+    ///
+    /// If the cursor reaches the end, return `None` and leave the cursor at the off-the-end
+    /// position.
+    pub fn next(&mut self) -> Option<K> {
+        self.path.next(self.pool).map(|(k, _)| k)
+    }
+
+    /// Move cursor to the previous element and return it.
+    ///
+    /// If the cursor is already pointing at the first element, leave it there and return `None`.
+    pub fn prev(&mut self) -> Option<K> {
+        self.root.expand().and_then(|root| {
+            self.path.prev(root, self.pool).map(|(k, _)| k)
+        })
+    }
+
+    /// Get the current element, or `None` if the cursor is at the end.
+    pub fn elem(&self) -> Option<K> {
+        self.path.leaf_pos().and_then(|(node, entry)| {
+            self.pool[node].unwrap_leaf().0.get(entry).cloned()
+        })
+    }
+
+    /// Move this cursor to `elem`.
+    ///
+    /// If `elem` is in the set, place the cursor at `elem` and return true.
+    ///
+    /// If `elem` is not in the set, place the cursor at the next larger element (or the end) and
+    /// return false.
+    pub fn goto(&mut self, elem: K) -> bool {
+        match self.root.expand() {
+            None => false,
+            Some(root) => {
+                if self.path.find(elem, root, self.pool, self.comp).is_some() {
+                    true
+                } else {
+                    self.path.normalize(self.pool);
+                    false
+                }
+            }
+        }
+    }
+
+    /// Try to insert `elem` into the set and leave the cursor at the inserted element.
+    ///
+    /// If the set did not contain `elem`, insert it and return true.
+    ///
+    /// If `elem` is already present, don't change the set, place the cursor at `goto(elem)`, and
+    /// return false.
+    pub fn insert(&mut self, elem: K) -> bool {
+        match self.root.expand() {
+            None => {
+                let root = self.pool.alloc_node(NodeData::leaf(elem, SetValue()));
+                *self.root = root.into();
+                self.path.set_root_node(root);
+                true
+            }
+            Some(root) => {
+                // TODO: Optimize the case where `self.path` is already at the correct insert pos.
+                if self.path.find(elem, root, self.pool, self.comp).is_none() {
+                    *self.root = self.path.insert(elem, SetValue(), self.pool).into();
+                    true
+                } else {
+                    false
+                }
+            }
+        }
+    }
+
+    /// Remove the current element (if any) and return it.
+    /// This advances the cursor to the next element after the removed one.
+    pub fn remove(&mut self) -> Option<K> {
+        let elem = self.elem();
+        if elem.is_some() {
+            *self.root = self.path.remove(self.pool).into();
+        }
+        elem
+    }
+}
+
+#[cfg(test)]
+impl<'a, K, C> SetCursor<'a, K, C>
+where
+    K: Copy + ::std::fmt::Display,
+    C: BPlusComparator<K>,
+{
+    fn verify(&self) {
+        self.path.verify(self.pool);
+        self.root.map(|root| self.pool.verify_tree(root, self.comp));
+    }
+
+    /// Get a text version of the path to the current position.
+    fn tpath(&self) -> String {
+        self.path.to_string()
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use std::mem;
+    use super::*;
+    use super::super::NodeData;
+
+    #[test]
+    fn node_size() {
+        // check that nodes are cache line sized when keys are 32 bits.
+        type F = SetTypes<u32, ()>;
+        assert_eq!(mem::size_of::<NodeData<F>>(), 64);
+    }
+
+    #[test]
+    fn empty() {
+        let mut f = SetForest::<u32, ()>::new();
+        f.clear();
+
+        let mut s = BPlusSet::<u32, ()>::new();
+        assert!(s.is_empty());
+        assert!(!s.contains(7, &f, &()));
+
+        let c = SetCursor::new(&mut s, &mut f, &());
+        c.verify();
+        assert_eq!(c.elem(), None);
+    }
+
+    #[test]
+    fn simple_cursor() {
+        let mut f = SetForest::<u32, ()>::new();
+        let mut s = BPlusSet::<u32, ()>::new();
+        let mut c = SetCursor::new(&mut s, &mut f, &());
+
+        assert!(c.insert(50));
+        c.verify();
+        assert_eq!(c.elem(), Some(50));
+
+        assert!(c.insert(100));
+        c.verify();
+        assert_eq!(c.elem(), Some(100));
+
+        assert!(c.insert(10));
+        c.verify();
+        assert_eq!(c.elem(), Some(10));
+
+        // Basic movement.
+        assert_eq!(c.next(), Some(50));
+        assert_eq!(c.next(), Some(100));
+        assert_eq!(c.next(), None);
+        assert_eq!(c.next(), None);
+        assert_eq!(c.prev(), Some(100));
+        assert_eq!(c.prev(), Some(50));
+        assert_eq!(c.prev(), Some(10));
+        assert_eq!(c.prev(), None);
+        assert_eq!(c.prev(), None);
+
+        assert!(c.goto(50));
+        assert_eq!(c.elem(), Some(50));
+        assert_eq!(c.remove(), Some(50));
+        c.verify();
+
+        assert_eq!(c.elem(), Some(100));
+        assert_eq!(c.remove(), Some(100));
+        c.verify();
+        assert_eq!(c.elem(), None);
+        assert_eq!(c.remove(), None);
+        c.verify();
+    }
+
+    #[test]
+    fn two_level_sparse_tree() {
+        let mut f = SetForest::<u32, ()>::new();
+        let mut s = BPlusSet::<u32, ()>::new();
+        let mut c = SetCursor::new(&mut s, &mut f, &());
+
+        // Insert enough elements that we get a two-level tree.
+        // Each leaf node holds 8 elements
+        assert!(c.is_empty());
+        for i in 0..50 {
+            assert!(c.insert(i));
+            assert_eq!(c.elem(), Some(i));
+        }
+        assert!(!c.is_empty());
+
+        assert!(c.goto(0));
+        assert_eq!(c.tpath(), "node2[0]--node0[0]");
+
+        assert_eq!(c.prev(), None);
+        for i in 1..50 {
+            assert_eq!(c.next(), Some(i));
+        }
+        assert_eq!(c.next(), None);
+        for i in (0..50).rev() {
+            assert_eq!(c.prev(), Some(i));
+        }
+        assert_eq!(c.prev(), None);
+
+        assert!(c.goto(25));
+        for i in 25..50 {
+            assert_eq!(c.remove(), Some(i));
+            assert!(!c.is_empty());
+            c.verify();
+        }
+
+        for i in (0..25).rev() {
+            assert!(!c.is_empty());
+            assert_eq!(c.elem(), None);
+            assert_eq!(c.prev(), Some(i));
+            assert_eq!(c.remove(), Some(i));
+            c.verify();
+        }
+        assert_eq!(c.elem(), None);
+        assert!(c.is_empty());
+    }
+
+    #[test]
+    fn three_level_sparse_tree() {
+        let mut f = SetForest::<u32, ()>::new();
+        let mut s = BPlusSet::<u32, ()>::new();
+        let mut c = SetCursor::new(&mut s, &mut f, &());
+
+        // Insert enough elements that we get a 3-level tree.
+        // Each leaf node holds 8 elements when filled up sequentially.
+        // Inner nodes hold 8 node pointers.
+        assert!(c.is_empty());
+        for i in 0..150 {
+            assert!(c.insert(i));
+            assert_eq!(c.elem(), Some(i));
+        }
+        assert!(!c.is_empty());
+
+        assert!(c.goto(0));
+        assert_eq!(c.tpath(), "node11[0]--node2[0]--node0[0]");
+
+        assert_eq!(c.prev(), None);
+        for i in 1..150 {
+            assert_eq!(c.next(), Some(i));
+        }
+        assert_eq!(c.next(), None);
+        for i in (0..150).rev() {
+            assert_eq!(c.prev(), Some(i));
+        }
+        assert_eq!(c.prev(), None);
+
+        assert!(c.goto(125));
+        for i in 125..150 {
+            assert_eq!(c.remove(), Some(i));
+            assert!(!c.is_empty());
+            c.verify();
+        }
+
+        for i in (0..125).rev() {
+            assert!(!c.is_empty());
+            assert_eq!(c.elem(), None);
+            assert_eq!(c.prev(), Some(i));
+            assert_eq!(c.remove(), Some(i));
+            c.verify();
+        }
+        assert_eq!(c.elem(), None);
+        assert!(c.is_empty());
+    }
+
+    // Generate a densely populated 4-level tree.
+    //
+    // Level 1: 1 root
+    // Level 2: 8 inner
+    // Level 3: 64 inner
+    // Level 4: 512 leafs, up to 7680 elements
+    //
+    // A 3-level tree can hold at most 960 elements.
+    fn dense4l(f: &mut SetForest<i32, ()>) -> BPlusSet<i32, ()> {
+        f.clear();
+        let mut s = BPlusSet::new();
+
+        // Insert 400 elements in 7 passes over the range to avoid the half-full leaf node pattern
+        // that comes from sequential insertion. This will generate a normal leaf layer.
+        for n in 0..4000 {
+            assert!(s.insert((n * 7) % 4000, f, &()));
+        }
+        s
+    }
+
+    #[test]
+    fn four_level() {
+        let mut f = SetForest::<i32, ()>::new();
+        let mut s = dense4l(&mut f);
+        let mut c = s.cursor(&mut f, &());
+
+        c.verify();
+
+        // Peel off a whole sub-tree of the root by deleting from the front.
+        // The 900 element is near the front of the second sub-tree.
+        assert!(c.goto(900));
+        assert_eq!(c.tpath(), "node48[1]--node47[0]--node26[0]--node20[4]");
+        assert!(c.goto(0));
+        for i in 0..900 {
+            assert!(!c.is_empty());
+            assert_eq!(c.remove(), Some(i));
+        }
+        c.verify();
+        assert_eq!(c.elem(), Some(900));
+
+        // Delete backwards from somewhere in the middle.
+        assert!(c.goto(3000));
+        for i in (2000..3000).rev() {
+            assert_eq!(c.prev(), Some(i));
+            assert_eq!(c.remove(), Some(i));
+            assert_eq!(c.elem(), Some(3000));
+        }
+        c.verify();
+
+        // Remove everything in a scattered manner, triggering many collapsing patterns.
+        for i in 0..4000 {
+            if c.goto((i * 7) % 4000) {
+                c.remove();
+            }
+        }
+        assert!(c.is_empty());
+    }
+
+}