wasmtime/lib/bforest/src/set.rs

//! Forest of sets.

use super::{Comparator, Forest, Node, NodeData, NodePool, Path, SetValue, INNER_SIZE};
use packed_option::PackedOption;
#[cfg(test)]
use std::fmt;
use std::marker::PhantomData;
#[cfg(test)]
use std::string::String;

/// Tag type defining forest types for a set.
struct SetTypes<K, C>(PhantomData<(K, C)>);

impl<K, C> Forest for SetTypes<K, C>
where
    K: Copy,
    C: Comparator<K>,
{
    type Key = K;
    type Value = SetValue;
    type LeafKeys = [K; 2 * INNER_SIZE - 1];
    type LeafValues = [SetValue; 2 * INNER_SIZE - 1];
    type Comparator = C;

    fn splat_key(key: Self::Key) -> Self::LeafKeys {
        [key; 2 * INNER_SIZE - 1]
    }

    fn splat_value(value: Self::Value) -> Self::LeafValues {
        [value; 2 * INNER_SIZE - 1]
    }
}

/// Memory pool for a forest of `Set` instances.
pub struct SetForest<K, C>
where
    K: Copy,
    C: Comparator<K>,
{
    nodes: NodePool<SetTypes<K, C>>,
}

impl<K, C> SetForest<K, C>
where
    K: Copy,
    C: Comparator<K>,
{
    /// Create a new empty forest.
    pub fn new() -> Self {
        Self {
            nodes: NodePool::new(),
        }
    }

    /// Clear all sets in the forest.
    ///
    /// All `Set` instances belong to this forest are invalidated and should no longer be used.
    pub fn clear(&mut self) {
        self.nodes.clear();
    }
}

/// B-tree representing an ordered set of `K`s using `C` for comparing elements.
///
/// This is not a general-purpose replacement for `BTreeSet`. See the [module
/// documentation](index.html) for more information about design tradeoffs.
///
/// Sets can be cloned, but that operation should only be used as part of cloning the whole forest
/// they belong to. *Cloning a set does not allocate new memory for the clone*. It creates an alias
/// of the same memory.
#[derive(Clone)]
pub struct Set<K, C>
where
    K: Copy,
    C: Comparator<K>,
{
    root: PackedOption<Node>,
    unused: PhantomData<(K, C)>,
}

impl<K, C> Set<K, C>
where
    K: Copy,
    C: Comparator<K>,
{
    /// Make an empty set.
    pub fn new() -> Self {
        Self {
            root: None.into(),
            unused: PhantomData,
        }
    }

    /// Is this an empty set?
    pub fn is_empty(&self) -> bool {
        self.root.is_none()
    }

    /// Does the set contain `key`?.
    pub fn contains(&self, key: K, forest: &SetForest<K, C>, comp: &C) -> bool {
        self.root
            .expand()
            .and_then(|root| Path::default().find(key, root, &forest.nodes, comp))
            .is_some()
    }

    /// Try to insert `key` into the set.
    ///
    /// If the set did not contain `key`, insert it and return true.
    ///
    /// If `key` is already present, don't change the set and return false.
    pub fn insert(&mut self, key: K, forest: &mut SetForest<K, C>, comp: &C) -> bool {
        self.cursor(forest, comp).insert(key)
    }

    /// Remove `key` from the set and return true.
    ///
    /// If `key` was not present in the set, return false.
    pub fn remove(&mut self, key: K, forest: &mut SetForest<K, C>, comp: &C) -> bool {
        let mut c = self.cursor(forest, comp);
        if c.goto(key) {
            c.remove();
            true
        } else {
            false
        }
    }

    /// Remove all entries.
    pub fn clear(&mut self, forest: &mut SetForest<K, C>) {
        if let Some(root) = self.root.take() {
            forest.nodes.free_tree(root);
        }
    }

    /// Retains only the elements specified by the predicate.
    ///
    /// Remove all elements where the predicate returns false.
    pub fn retain<F>(&mut self, forest: &mut SetForest<K, C>, mut predicate: F)
    where
        F: FnMut(K) -> bool,
    {
        let mut path = Path::default();
        if let Some(root) = self.root.expand() {
            path.first(root, &forest.nodes);
        }
        while let Some((node, entry)) = path.leaf_pos() {
            if predicate(forest.nodes[node].unwrap_leaf().0[entry]) {
                path.next(&forest.nodes);
            } else {
                self.root = path.remove(&mut forest.nodes).into();
            }
        }
    }

    /// Create a cursor for navigating this set. The cursor is initially positioned off the end of
    /// the set.
    pub fn cursor<'a>(
        &'a mut self,
        forest: &'a mut SetForest<K, C>,
        comp: &'a C,
    ) -> SetCursor<'a, K, C> {
        SetCursor::new(self, forest, comp)
    }

    /// Create an iterator traversing this set. The iterator type is `K`.
    pub fn iter<'a>(&'a self, forest: &'a SetForest<K, C>) -> SetIter<'a, K, C> {
        SetIter {
            root: self.root,
            pool: &forest.nodes,
            path: Path::default(),
        }
    }
}

impl<K, C> Default for Set<K, C>
where
    K: Copy,
    C: Comparator<K>,
{
    fn default() -> Self {
        Self::new()
    }
}

/// A position in a `Set` used to navigate and modify the ordered set.
///
/// A cursor always points at an element in the set, or "off the end" which is a position after the
/// last element in the set.
pub struct SetCursor<'a, K, C>
where
    K: 'a + Copy,
    C: 'a + Comparator<K>,
{
    root: &'a mut PackedOption<Node>,
    pool: &'a mut NodePool<SetTypes<K, C>>,
    comp: &'a C,
    path: Path<SetTypes<K, C>>,
}

impl<'a, K, C> SetCursor<'a, K, C>
where
    K: Copy,
    C: Comparator<K>,
{
    /// Create a cursor with a default (invalid) location.
    fn new(
        container: &'a mut Set<K, C>,
        forest: &'a mut SetForest<K, C>,
        comp: &'a C,
    ) -> SetCursor<'a, K, C> {
        SetCursor {
            root: &mut container.root,
            pool: &mut forest.nodes,
            comp,
            path: Path::default(),
        }
    }

    /// Is this cursor pointing to an empty set?
    pub fn is_empty(&self) -> bool {
        self.root.is_none()
    }

    /// Move cursor to the next element and return it.
    ///
    /// If the cursor reaches the end, return `None` and leave the cursor at the off-the-end
    /// position.
    pub fn next(&mut self) -> Option<K> {
        self.path.next(self.pool).map(|(k, _)| k)
    }

    /// Move cursor to the previous element and return it.
    ///
    /// If the cursor is already pointing at the first element, leave it there and return `None`.
    pub fn prev(&mut self) -> Option<K> {
        self.root
            .expand()
            .and_then(|root| self.path.prev(root, self.pool).map(|(k, _)| k))
    }

    /// Get the current element, or `None` if the cursor is at the end.
    pub fn elem(&self) -> Option<K> {
        self.path
            .leaf_pos()
            .and_then(|(node, entry)| self.pool[node].unwrap_leaf().0.get(entry).cloned())
    }

    /// Move this cursor to `elem`.
    ///
    /// If `elem` is in the set, place the cursor at `elem` and return true.
    ///
    /// If `elem` is not in the set, place the cursor at the next larger element (or the end) and
    /// return false.
    pub fn goto(&mut self, elem: K) -> bool {
        match self.root.expand() {
            None => false,
            Some(root) => {
                if self.path.find(elem, root, self.pool, self.comp).is_some() {
                    true
                } else {
                    self.path.normalize(self.pool);
                    false
                }
            }
        }
    }

    /// Move this cursor to the first element.
    pub fn goto_first(&mut self) -> Option<K> {
        self.root.map(|root| self.path.first(root, self.pool).0)
    }

    /// Try to insert `elem` into the set and leave the cursor at the inserted element.
    ///
    /// If the set did not contain `elem`, insert it and return true.
    ///
    /// If `elem` is already present, don't change the set, place the cursor at `goto(elem)`, and
    /// return false.
    pub fn insert(&mut self, elem: K) -> bool {
        match self.root.expand() {
            None => {
                let root = self.pool.alloc_node(NodeData::leaf(elem, SetValue()));
                *self.root = root.into();
                self.path.set_root_node(root);
                true
            }
            Some(root) => {
                // TODO: Optimize the case where `self.path` is already at the correct insert pos.
                if self.path.find(elem, root, self.pool, self.comp).is_none() {
                    *self.root = self.path.insert(elem, SetValue(), self.pool).into();
                    true
                } else {
                    false
                }
            }
        }
    }

    /// Remove the current element (if any) and return it.
    /// This advances the cursor to the next element after the removed one.
    pub fn remove(&mut self) -> Option<K> {
        let elem = self.elem();
        if elem.is_some() {
            *self.root = self.path.remove(self.pool).into();
        }
        elem
    }
}

#[cfg(test)]
impl<'a, K, C> SetCursor<'a, K, C>
where
    K: Copy + fmt::Display,
    C: Comparator<K>,
{
    fn verify(&self) {
        self.path.verify(self.pool);
        self.root.map(|root| self.pool.verify_tree(root, self.comp));
    }

    /// Get a text version of the path to the current position.
    fn tpath(&self) -> String {
        use std::string::ToString;
        self.path.to_string()
    }
}

/// An iterator visiting the elements of a `Set`.
pub struct SetIter<'a, K, C>
where
    K: 'a + Copy,
    C: 'a + Comparator<K>,
{
    root: PackedOption<Node>,
    pool: &'a NodePool<SetTypes<K, C>>,
    path: Path<SetTypes<K, C>>,
}

impl<'a, K, C> Iterator for SetIter<'a, K, C>
where
    K: 'a + Copy,
    C: 'a + Comparator<K>,
{
    type Item = K;

    fn next(&mut self) -> Option<Self::Item> {
        // We use `self.root` to indicate if we need to go to the first element. Reset to `None`
        // once we've returned the first element. This also works for an empty tree since the
        // `path.next()` call returns `None` when the path is empty. This also fuses the iterator.
        match self.root.take() {
            Some(root) => Some(self.path.first(root, self.pool).0),
            None => self.path.next(self.pool).map(|(k, _)| k),
        }
    }
}

#[cfg(test)]
mod test {
    use super::super::NodeData;
    use super::*;
    use std::mem;
    use std::vec::Vec;

    #[test]
    fn node_size() {
        // check that nodes are cache line sized when keys are 32 bits.
        type F = SetTypes<u32, ()>;
        assert_eq!(mem::size_of::<NodeData<F>>(), 64);
    }

    #[test]
    fn empty() {
        let mut f = SetForest::<u32, ()>::new();
        f.clear();

        let mut s = Set::<u32, ()>::new();
        assert!(s.is_empty());
        s.clear(&mut f);
        assert!(!s.contains(7, &f, &()));

        // Iterator for an empty set.
        assert_eq!(s.iter(&f).next(), None);

        s.retain(&mut f, |_| unreachable!());

        let mut c = SetCursor::new(&mut s, &mut f, &());
        c.verify();
        assert_eq!(c.elem(), None);

        assert_eq!(c.goto_first(), None);
        assert_eq!(c.tpath(), "<empty path>");
    }

    #[test]
    fn simple_cursor() {
        let mut f = SetForest::<u32, ()>::new();
        let mut s = Set::<u32, ()>::new();
        let mut c = SetCursor::new(&mut s, &mut f, &());

        assert!(c.insert(50));
        c.verify();
        assert_eq!(c.elem(), Some(50));

        assert!(c.insert(100));
        c.verify();
        assert_eq!(c.elem(), Some(100));

        assert!(c.insert(10));
        c.verify();
        assert_eq!(c.elem(), Some(10));

        // Basic movement.
        assert_eq!(c.next(), Some(50));
        assert_eq!(c.next(), Some(100));
        assert_eq!(c.next(), None);
        assert_eq!(c.next(), None);
        assert_eq!(c.prev(), Some(100));
        assert_eq!(c.prev(), Some(50));
        assert_eq!(c.prev(), Some(10));
        assert_eq!(c.prev(), None);
        assert_eq!(c.prev(), None);

        assert!(c.goto(50));
        assert_eq!(c.elem(), Some(50));
        assert_eq!(c.remove(), Some(50));
        c.verify();

        assert_eq!(c.elem(), Some(100));
        assert_eq!(c.remove(), Some(100));
        c.verify();
        assert_eq!(c.elem(), None);
        assert_eq!(c.remove(), None);
        c.verify();
    }

    #[test]
    fn two_level_sparse_tree() {
        let mut f = SetForest::<u32, ()>::new();
        let mut s = Set::<u32, ()>::new();
        let mut c = SetCursor::new(&mut s, &mut f, &());

        // Insert enough elements that we get a two-level tree.
        // Each leaf node holds 8 elements
        assert!(c.is_empty());
        for i in 0..50 {
            assert!(c.insert(i));
            assert_eq!(c.elem(), Some(i));
        }
        assert!(!c.is_empty());

        assert_eq!(c.goto_first(), Some(0));
        assert_eq!(c.tpath(), "node2[0]--node0[0]");

        assert_eq!(c.prev(), None);
        for i in 1..50 {
            assert_eq!(c.next(), Some(i));
        }
        assert_eq!(c.next(), None);
        for i in (0..50).rev() {
            assert_eq!(c.prev(), Some(i));
        }
        assert_eq!(c.prev(), None);

        assert!(c.goto(25));
        for i in 25..50 {
            assert_eq!(c.remove(), Some(i));
            assert!(!c.is_empty());
            c.verify();
        }

        for i in (0..25).rev() {
            assert!(!c.is_empty());
            assert_eq!(c.elem(), None);
            assert_eq!(c.prev(), Some(i));
            assert_eq!(c.remove(), Some(i));
            c.verify();
        }
        assert_eq!(c.elem(), None);
        assert!(c.is_empty());
    }

    #[test]
    fn three_level_sparse_tree() {
        let mut f = SetForest::<u32, ()>::new();
        let mut s = Set::<u32, ()>::new();
        let mut c = SetCursor::new(&mut s, &mut f, &());

        // Insert enough elements that we get a 3-level tree.
        // Each leaf node holds 8 elements when filled up sequentially.
        // Inner nodes hold 8 node pointers.
        assert!(c.is_empty());
        for i in 0..150 {
            assert!(c.insert(i));
            assert_eq!(c.elem(), Some(i));
        }
        assert!(!c.is_empty());

        assert!(c.goto(0));
        assert_eq!(c.tpath(), "node11[0]--node2[0]--node0[0]");

        assert_eq!(c.prev(), None);
        for i in 1..150 {
            assert_eq!(c.next(), Some(i));
        }
        assert_eq!(c.next(), None);
        for i in (0..150).rev() {
            assert_eq!(c.prev(), Some(i));
        }
        assert_eq!(c.prev(), None);

        assert!(c.goto(125));
        for i in 125..150 {
            assert_eq!(c.remove(), Some(i));
            assert!(!c.is_empty());
            c.verify();
        }

        for i in (0..125).rev() {
            assert!(!c.is_empty());
            assert_eq!(c.elem(), None);
            assert_eq!(c.prev(), Some(i));
            assert_eq!(c.remove(), Some(i));
            c.verify();
        }
        assert_eq!(c.elem(), None);
        assert!(c.is_empty());
    }

    // Generate a densely populated 4-level tree.
    //
    // Level 1: 1 root
    // Level 2: 8 inner
    // Level 3: 64 inner
    // Level 4: 512 leafs, up to 7680 elements
    //
    // A 3-level tree can hold at most 960 elements.
    fn dense4l(f: &mut SetForest<i32, ()>) -> Set<i32, ()> {
        f.clear();
        let mut s = Set::new();

        // Insert 400 elements in 7 passes over the range to avoid the half-full leaf node pattern
        // that comes from sequential insertion. This will generate a normal leaf layer.
        for n in 0..4000 {
            assert!(s.insert((n * 7) % 4000, f, &()));
        }
        s
    }

    #[test]
    fn four_level() {
        let mut f = SetForest::<i32, ()>::new();
        let mut s = dense4l(&mut f);

        assert_eq!(
            s.iter(&f).collect::<Vec<_>>()[0..10],
            [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
        );

        let mut c = s.cursor(&mut f, &());

        c.verify();

        // Peel off a whole sub-tree of the root by deleting from the front.
        // The 900 element is near the front of the second sub-tree.
        assert!(c.goto(900));
        assert_eq!(c.tpath(), "node48[1]--node47[0]--node26[0]--node20[4]");
        assert!(c.goto(0));
        for i in 0..900 {
            assert!(!c.is_empty());
            assert_eq!(c.remove(), Some(i));
        }
        c.verify();
        assert_eq!(c.elem(), Some(900));

        // Delete backwards from somewhere in the middle.
        assert!(c.goto(3000));
        for i in (2000..3000).rev() {
            assert_eq!(c.prev(), Some(i));
            assert_eq!(c.remove(), Some(i));
            assert_eq!(c.elem(), Some(3000));
        }
        c.verify();

        // Remove everything in a scattered manner, triggering many collapsing patterns.
        for i in 0..4000 {
            if c.goto((i * 7) % 4000) {
                c.remove();
            }
        }
        assert!(c.is_empty());
    }

    #[test]
    fn four_level_clear() {
        let mut f = SetForest::<i32, ()>::new();
        let mut s = dense4l(&mut f);
        s.clear(&mut f);
    }
}