Rename the 'cretonne' crate to 'cretonne-codegen'.

This fixes the next part of #287.
This commit is contained in:
Dan Gohman
2018-04-17 08:48:02 -07:00
parent 7767186dd0
commit 24fa169e1f
254 changed files with 265 additions and 264 deletions

View File

@@ -0,0 +1,918 @@
//! Forest of maps.
use super::{Comparator, Forest, Node, NodeData, NodePool, Path, INNER_SIZE};
use packed_option::PackedOption;
use std::marker::PhantomData;
/// Tag type defining forest types for a map.
struct MapTypes<K, V, C>(PhantomData<(K, V, C)>);
impl<K, V, C> Forest for MapTypes<K, V, C>
where
K: Copy,
V: Copy,
C: Comparator<K>,
{
type Key = K;
type Value = V;
type LeafKeys = [K; INNER_SIZE - 1];
type LeafValues = [V; INNER_SIZE - 1];
type Comparator = C;
fn splat_key(key: Self::Key) -> Self::LeafKeys {
[key; INNER_SIZE - 1]
}
fn splat_value(value: Self::Value) -> Self::LeafValues {
[value; INNER_SIZE - 1]
}
}
/// Memory pool for a forest of `Map` instances.
pub struct MapForest<K, V, C>
where
K: Copy,
V: Copy,
C: Comparator<K>,
{
nodes: NodePool<MapTypes<K, V, C>>,
}
impl<K, V, C> MapForest<K, V, C>
where
K: Copy,
V: Copy,
C: Comparator<K>,
{
/// Create a new empty forest.
pub fn new() -> MapForest<K, V, C> {
MapForest { nodes: NodePool::new() }
}
/// Clear all maps in the forest.
///
/// All `Map` instances belong to this forest are invalidated and should no longer be used.
pub fn clear(&mut self) {
self.nodes.clear();
}
}
/// B-tree mapping from `K` to `V` using `C` for comparing keys.
///
/// This is not a general-purpose replacement for `BTreeMap`. See the [module
/// documentation](index.html) for more information about design tradeoffs.
///
/// Maps can be cloned, but that operation should only be used as part of cloning the whole forest
/// they belong to. *Cloning a map does not allocate new memory for the clone*. It creates an alias
/// of the same memory.
#[derive(Clone)]
pub struct Map<K, V, C>
where
K: Copy,
V: Copy,
C: Comparator<K>,
{
root: PackedOption<Node>,
unused: PhantomData<(K, V, C)>,
}
impl<K, V, C> Map<K, V, C>
where
K: Copy,
V: Copy,
C: Comparator<K>,
{
/// Make an empty map.
pub fn new() -> Map<K, V, C> {
Map {
root: None.into(),
unused: PhantomData,
}
}
/// Is this an empty map?
pub fn is_empty(&self) -> bool {
self.root.is_none()
}
/// Get the value stored for `key`.
pub fn get(&self, key: K, forest: &MapForest<K, V, C>, comp: &C) -> Option<V> {
self.root.expand().and_then(|root| {
Path::default().find(key, root, &forest.nodes, comp)
})
}
/// Look up the value stored for `key`.
///
/// If it exists, return the stored key-value pair.
///
/// Otherwise, return the last key-value pair with a key that is less than or equal to `key`.
///
/// If no stored keys are less than or equal to `key`, return `None`.
pub fn get_or_less(&self, key: K, forest: &MapForest<K, V, C>, comp: &C) -> Option<(K, V)> {
self.root.expand().and_then(|root| {
let mut path = Path::default();
match path.find(key, root, &forest.nodes, comp) {
Some(v) => Some((key, v)),
None => path.prev(root, &forest.nodes),
}
})
}
/// Insert `key, value` into the map and return the old value stored for `key`, if any.
pub fn insert(
&mut self,
key: K,
value: V,
forest: &mut MapForest<K, V, C>,
comp: &C,
) -> Option<V> {
self.cursor(forest, comp).insert(key, value)
}
/// Remove `key` from the map and return the removed value for `key`, if any.
pub fn remove(&mut self, key: K, forest: &mut MapForest<K, V, C>, comp: &C) -> Option<V> {
let mut c = self.cursor(forest, comp);
if c.goto(key).is_some() {
c.remove()
} else {
None
}
}
/// Remove all entries.
pub fn clear(&mut self, forest: &mut MapForest<K, V, C>) {
if let Some(root) = self.root.take() {
forest.nodes.free_tree(root);
}
}
/// Retains only the elements specified by the predicate.
///
/// Remove all key-value pairs where the predicate returns false.
///
/// The predicate is allowed to update the values stored in the map.
pub fn retain<F>(&mut self, forest: &mut MapForest<K, V, C>, mut predicate: F)
where
F: FnMut(K, &mut V) -> bool,
{
let mut path = Path::default();
if let Some(root) = self.root.expand() {
path.first(root, &forest.nodes);
}
while let Some((node, entry)) = path.leaf_pos() {
let keep = {
let (ks, vs) = forest.nodes[node].unwrap_leaf_mut();
predicate(ks[entry], &mut vs[entry])
};
if keep {
path.next(&forest.nodes);
} else {
self.root = path.remove(&mut forest.nodes).into();
}
}
}
/// Create a cursor for navigating this map. The cursor is initially positioned off the end of
/// the map.
pub fn cursor<'a>(
&'a mut self,
forest: &'a mut MapForest<K, V, C>,
comp: &'a C,
) -> MapCursor<'a, K, V, C> {
MapCursor::new(self, forest, comp)
}
/// Create an iterator traversing this map. The iterator type is `(K, V)`.
pub fn iter<'a>(&'a self, forest: &'a MapForest<K, V, C>) -> MapIter<'a, K, V, C> {
MapIter {
root: self.root,
pool: &forest.nodes,
path: Path::default(),
}
}
}
impl<K, V, C> Default for Map<K, V, C>
where
K: Copy,
V: Copy,
C: Comparator<K>,
{
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
impl<K, V, C> Map<K, V, C>
where
K: Copy + ::std::fmt::Display,
V: Copy,
C: Comparator<K>,
{
/// Verify consistency.
fn verify(&self, forest: &MapForest<K, V, C>, comp: &C)
where
NodeData<MapTypes<K, V, C>>: ::std::fmt::Display,
{
if let Some(root) = self.root.expand() {
forest.nodes.verify_tree(root, comp);
}
}
/// Get a text version of the path to `key`.
fn tpath(&self, key: K, forest: &MapForest<K, V, C>, comp: &C) -> String {
match self.root.expand() {
None => "map(empty)".to_string(),
Some(root) => {
let mut path = Path::default();
path.find(key, root, &forest.nodes, comp);
path.to_string()
}
}
}
}
/// A position in a `Map` used to navigate and modify the ordered map.
///
/// A cursor always points at a key-value pair in the map, or "off the end" which is a position
/// after the last entry in the map.
pub struct MapCursor<'a, K, V, C>
where
K: 'a + Copy,
V: 'a + Copy,
C: 'a + Comparator<K>,
{
root: &'a mut PackedOption<Node>,
pool: &'a mut NodePool<MapTypes<K, V, C>>,
comp: &'a C,
path: Path<MapTypes<K, V, C>>,
}
impl<'a, K, V, C> MapCursor<'a, K, V, C>
where
K: Copy,
V: Copy,
C: Comparator<K>,
{
/// Create a cursor with a default (off-the-end) location.
fn new(
container: &'a mut Map<K, V, C>,
forest: &'a mut MapForest<K, V, C>,
comp: &'a C,
) -> MapCursor<'a, K, V, C> {
MapCursor {
root: &mut container.root,
pool: &mut forest.nodes,
comp,
path: Path::default(),
}
}
/// Is this cursor pointing to an empty map?
pub fn is_empty(&self) -> bool {
self.root.is_none()
}
/// Move cursor to the next key-value pair and return it.
///
/// If the cursor reaches the end, return `None` and leave the cursor at the off-the-end
/// position.
pub fn next(&mut self) -> Option<(K, V)> {
self.path.next(self.pool)
}
/// Move cursor to the previous key-value pair and return it.
///
/// If the cursor is already pointing at the first entry, leave it there and return `None`.
pub fn prev(&mut self) -> Option<(K, V)> {
self.root.expand().and_then(
|root| self.path.prev(root, self.pool),
)
}
/// Get the current key, or `None` if the cursor is at the end.
pub fn key(&self) -> Option<K> {
self.path.leaf_pos().and_then(|(node, entry)| {
self.pool[node].unwrap_leaf().0.get(entry).cloned()
})
}
/// Get the current value, or `None` if the cursor is at the end.
pub fn value(&self) -> Option<V> {
self.path.leaf_pos().and_then(|(node, entry)| {
self.pool[node].unwrap_leaf().1.get(entry).cloned()
})
}
/// Get a mutable reference to the current value, or `None` if the cursor is at the end.
pub fn value_mut(&mut self) -> Option<&mut V> {
self.path.leaf_pos().and_then(move |(node, entry)| {
self.pool[node].unwrap_leaf_mut().1.get_mut(entry)
})
}
/// Move this cursor to `key`.
///
/// If `key` is in the map, place the cursor at `key` and return the corresponding value.
///
/// If `key` is not in the set, place the cursor at the next larger element (or the end) and
/// return `None`.
pub fn goto(&mut self, elem: K) -> Option<V> {
self.root.expand().and_then(|root| {
let v = self.path.find(elem, root, self.pool, self.comp);
if v.is_none() {
self.path.normalize(self.pool);
}
v
})
}
/// Move this cursor to the first element.
pub fn goto_first(&mut self) -> Option<V> {
self.root.map(|root| self.path.first(root, self.pool).1)
}
/// Insert `(key, value))` into the map and leave the cursor at the inserted pair.
///
/// If the map did not contain `key`, return `None`.
///
/// If `key` is already present, replace the existing with `value` and return the old value.
pub fn insert(&mut self, key: K, value: V) -> Option<V> {
match self.root.expand() {
None => {
let root = self.pool.alloc_node(NodeData::leaf(key, value));
*self.root = root.into();
self.path.set_root_node(root);
None
}
Some(root) => {
// TODO: Optimize the case where `self.path` is already at the correct insert pos.
let old = self.path.find(key, root, self.pool, self.comp);
if old.is_some() {
*self.path.value_mut(self.pool) = value;
} else {
*self.root = self.path.insert(key, value, self.pool).into();
}
old
}
}
}
/// Remove the current entry (if any) and return the mapped value.
/// This advances the cursor to the next entry after the removed one.
pub fn remove(&mut self) -> Option<V> {
let value = self.value();
if value.is_some() {
*self.root = self.path.remove(self.pool).into();
}
value
}
}
/// An iterator visiting the key-value pairs of a `Map`.
pub struct MapIter<'a, K, V, C>
where
K: 'a + Copy,
V: 'a + Copy,
C: 'a + Comparator<K>,
{
root: PackedOption<Node>,
pool: &'a NodePool<MapTypes<K, V, C>>,
path: Path<MapTypes<K, V, C>>,
}
impl<'a, K, V, C> Iterator for MapIter<'a, K, V, C>
where
K: 'a + Copy,
V: 'a + Copy,
C: 'a + Comparator<K>,
{
type Item = (K, V);
fn next(&mut self) -> Option<Self::Item> {
// We use `self.root` to indicate if we need to go to the first element. Reset to `None`
// once we've returned the first element. This also works for an empty tree since the
// `path.next()` call returns `None` when the path is empty. This also fuses the iterator.
match self.root.take() {
Some(root) => Some(self.path.first(root, self.pool)),
None => self.path.next(self.pool),
}
}
}
#[cfg(test)]
impl<'a, K, V, C> MapCursor<'a, K, V, C>
where
K: Copy + ::std::fmt::Display,
V: Copy + ::std::fmt::Display,
C: Comparator<K>,
{
fn verify(&self) {
self.path.verify(self.pool);
self.root.map(|root| self.pool.verify_tree(root, self.comp));
}
/// Get a text version of the path to the current position.
fn tpath(&self) -> String {
self.path.to_string()
}
}
#[cfg(test)]
mod test {
use super::super::NodeData;
use super::*;
use std::mem;
use std::vec::Vec;
#[test]
fn node_size() {
// check that nodes are cache line sized when keys and values are 32 bits.
type F = MapTypes<u32, u32, ()>;
assert_eq!(mem::size_of::<NodeData<F>>(), 64);
}
#[test]
fn empty() {
let mut f = MapForest::<u32, f32, ()>::new();
f.clear();
let mut m = Map::<u32, f32, ()>::new();
assert!(m.is_empty());
m.clear(&mut f);
assert_eq!(m.get(7, &f, &()), None);
assert_eq!(m.iter(&f).next(), None);
assert_eq!(m.get_or_less(7, &f, &()), None);
m.retain(&mut f, |_, _| unreachable!());
let mut c = m.cursor(&mut f, &());
assert!(c.is_empty());
assert_eq!(c.key(), None);
assert_eq!(c.value(), None);
assert_eq!(c.next(), None);
assert_eq!(c.prev(), None);
c.verify();
assert_eq!(c.tpath(), "<empty path>");
assert_eq!(c.goto_first(), None);
assert_eq!(c.tpath(), "<empty path>");
}
#[test]
fn inserting() {
let f = &mut MapForest::<u32, f32, ()>::new();
let mut m = Map::<u32, f32, ()>::new();
// The first seven values stay in a single leaf node.
assert_eq!(m.insert(50, 5.0, f, &()), None);
assert_eq!(m.insert(50, 5.5, f, &()), Some(5.0));
assert_eq!(m.insert(20, 2.0, f, &()), None);
assert_eq!(m.insert(80, 8.0, f, &()), None);
assert_eq!(m.insert(40, 4.0, f, &()), None);
assert_eq!(m.insert(60, 6.0, f, &()), None);
assert_eq!(m.insert(90, 9.0, f, &()), None);
assert_eq!(m.insert(200, 20.0, f, &()), None);
m.verify(f, &());
assert_eq!(
m.iter(f).collect::<Vec<_>>(),
[
(20, 2.0),
(40, 4.0),
(50, 5.5),
(60, 6.0),
(80, 8.0),
(90, 9.0),
(200, 20.0),
]
);
assert_eq!(m.get(0, f, &()), None);
assert_eq!(m.get(20, f, &()), Some(2.0));
assert_eq!(m.get(30, f, &()), None);
assert_eq!(m.get(40, f, &()), Some(4.0));
assert_eq!(m.get(50, f, &()), Some(5.5));
assert_eq!(m.get(60, f, &()), Some(6.0));
assert_eq!(m.get(70, f, &()), None);
assert_eq!(m.get(80, f, &()), Some(8.0));
assert_eq!(m.get(100, f, &()), None);
assert_eq!(m.get_or_less(0, f, &()), None);
assert_eq!(m.get_or_less(20, f, &()), Some((20, 2.0)));
assert_eq!(m.get_or_less(30, f, &()), Some((20, 2.0)));
assert_eq!(m.get_or_less(40, f, &()), Some((40, 4.0)));
assert_eq!(m.get_or_less(200, f, &()), Some((200, 20.0)));
assert_eq!(m.get_or_less(201, f, &()), Some((200, 20.0)));
{
let mut c = m.cursor(f, &());
assert_eq!(c.prev(), Some((200, 20.0)));
assert_eq!(c.prev(), Some((90, 9.0)));
assert_eq!(c.prev(), Some((80, 8.0)));
assert_eq!(c.prev(), Some((60, 6.0)));
assert_eq!(c.prev(), Some((50, 5.5)));
assert_eq!(c.prev(), Some((40, 4.0)));
assert_eq!(c.prev(), Some((20, 2.0)));
assert_eq!(c.prev(), None);
}
// Test some removals where the node stays healthy.
assert_eq!(m.tpath(50, f, &()), "node0[2]");
assert_eq!(m.tpath(80, f, &()), "node0[4]");
assert_eq!(m.tpath(200, f, &()), "node0[6]");
assert_eq!(m.remove(80, f, &()), Some(8.0));
assert_eq!(m.tpath(50, f, &()), "node0[2]");
assert_eq!(m.tpath(80, f, &()), "node0[4]");
assert_eq!(m.tpath(200, f, &()), "node0[5]");
assert_eq!(m.remove(80, f, &()), None);
m.verify(f, &());
assert_eq!(m.remove(20, f, &()), Some(2.0));
assert_eq!(m.tpath(50, f, &()), "node0[1]");
assert_eq!(m.tpath(80, f, &()), "node0[3]");
assert_eq!(m.tpath(200, f, &()), "node0[4]");
assert_eq!(m.remove(20, f, &()), None);
m.verify(f, &());
// [ 40 50 60 90 200 ]
{
let mut c = m.cursor(f, &());
assert_eq!(c.goto_first(), Some(4.0));
assert_eq!(c.key(), Some(40));
assert_eq!(c.value(), Some(4.0));
assert_eq!(c.next(), Some((50, 5.5)));
assert_eq!(c.next(), Some((60, 6.0)));
assert_eq!(c.next(), Some((90, 9.0)));
assert_eq!(c.next(), Some((200, 20.0)));
c.verify();
assert_eq!(c.next(), None);
c.verify();
}
// Removals from the root leaf node beyond underflow.
assert_eq!(m.remove(200, f, &()), Some(20.0));
assert_eq!(m.remove(40, f, &()), Some(4.0));
assert_eq!(m.remove(60, f, &()), Some(6.0));
m.verify(f, &());
assert_eq!(m.remove(50, f, &()), Some(5.5));
m.verify(f, &());
assert_eq!(m.remove(90, f, &()), Some(9.0));
m.verify(f, &());
assert!(m.is_empty());
}
#[test]
fn split_level0_leaf() {
// Various ways of splitting a full leaf node at level 0.
let f = &mut MapForest::<u32, f32, ()>::new();
fn full_leaf(f: &mut MapForest<u32, f32, ()>) -> Map<u32, f32, ()> {
let mut m = Map::new();
for n in 1..8 {
m.insert(n * 10, n as f32 * 1.1, f, &());
}
m
}
// Insert at front of leaf.
let mut m = full_leaf(f);
m.insert(5, 4.2, f, &());
m.verify(f, &());
assert_eq!(m.get(5, f, &()), Some(4.2));
// Retain even entries, with altered values.
m.retain(f, |k, v| {
*v = (k / 10) as f32;
(k % 20) == 0
});
assert_eq!(
m.iter(f).collect::<Vec<_>>(),
[(20, 2.0), (40, 4.0), (60, 6.0)]
);
// Insert at back of leaf.
let mut m = full_leaf(f);
m.insert(80, 4.2, f, &());
m.verify(f, &());
assert_eq!(m.get(80, f, &()), Some(4.2));
// Insert before middle (40).
let mut m = full_leaf(f);
m.insert(35, 4.2, f, &());
m.verify(f, &());
assert_eq!(m.get(35, f, &()), Some(4.2));
// Insert after middle (40).
let mut m = full_leaf(f);
m.insert(45, 4.2, f, &());
m.verify(f, &());
assert_eq!(m.get(45, f, &()), Some(4.2));
m.clear(f);
assert!(m.is_empty());
}
#[test]
fn split_level1_leaf() {
// Various ways of splitting a full leaf node at level 1.
let f = &mut MapForest::<u32, f32, ()>::new();
// Return a map whose root node is a full inner node, and the leaf nodes are all full
// containing:
//
// 110, 120, ..., 170
// 210, 220, ..., 270
// ...
// 810, 820, ..., 870
fn full(f: &mut MapForest<u32, f32, ()>) -> Map<u32, f32, ()> {
let mut m = Map::new();
// Start by inserting elements in order.
// This should leave 8 leaf nodes with 4 elements in each.
for row in 1..9 {
for col in 1..5 {
m.insert(row * 100 + col * 10, row as f32 + col as f32 * 0.1, f, &());
}
}
// Then top up the leaf nodes without splitting them.
for row in 1..9 {
for col in 5..8 {
m.insert(row * 100 + col * 10, row as f32 + col as f32 * 0.1, f, &());
}
}
m
}
let mut m = full(f);
// Verify geometry. Get get node2 as the root and leaves node0, 1, 3, ...
m.verify(f, &());
assert_eq!(m.tpath(110, f, &()), "node2[0]--node0[0]");
assert_eq!(m.tpath(140, f, &()), "node2[0]--node0[3]");
assert_eq!(m.tpath(210, f, &()), "node2[1]--node1[0]");
assert_eq!(m.tpath(270, f, &()), "node2[1]--node1[6]");
assert_eq!(m.tpath(310, f, &()), "node2[2]--node3[0]");
assert_eq!(m.tpath(810, f, &()), "node2[7]--node8[0]");
assert_eq!(m.tpath(870, f, &()), "node2[7]--node8[6]");
{
let mut c = m.cursor(f, &());
assert_eq!(c.goto_first(), Some(1.1));
assert_eq!(c.key(), Some(110));
}
// Front of first leaf.
m.insert(0, 4.2, f, &());
m.verify(f, &());
assert_eq!(m.get(0, f, &()), Some(4.2));
// First leaf split 4-4 after appending to LHS.
f.clear();
m = full(f);
m.insert(135, 4.2, f, &());
m.verify(f, &());
assert_eq!(m.get(135, f, &()), Some(4.2));
// First leaf split 4-4 after prepending to RHS.
f.clear();
m = full(f);
m.insert(145, 4.2, f, &());
m.verify(f, &());
assert_eq!(m.get(145, f, &()), Some(4.2));
// First leaf split 4-4 after appending to RHS.
f.clear();
m = full(f);
m.insert(175, 4.2, f, &());
m.verify(f, &());
assert_eq!(m.get(175, f, &()), Some(4.2));
// Left-middle leaf split, ins LHS.
f.clear();
m = full(f);
m.insert(435, 4.2, f, &());
m.verify(f, &());
assert_eq!(m.get(435, f, &()), Some(4.2));
// Left-middle leaf split, ins RHS.
f.clear();
m = full(f);
m.insert(445, 4.2, f, &());
m.verify(f, &());
assert_eq!(m.get(445, f, &()), Some(4.2));
// Right-middle leaf split, ins LHS.
f.clear();
m = full(f);
m.insert(535, 4.2, f, &());
m.verify(f, &());
assert_eq!(m.get(535, f, &()), Some(4.2));
// Right-middle leaf split, ins RHS.
f.clear();
m = full(f);
m.insert(545, 4.2, f, &());
m.verify(f, &());
assert_eq!(m.get(545, f, &()), Some(4.2));
// Last leaf split, ins LHS.
f.clear();
m = full(f);
m.insert(835, 4.2, f, &());
m.verify(f, &());
assert_eq!(m.get(835, f, &()), Some(4.2));
// Last leaf split, ins RHS.
f.clear();
m = full(f);
m.insert(845, 4.2, f, &());
m.verify(f, &());
assert_eq!(m.get(845, f, &()), Some(4.2));
// Front of last leaf.
f.clear();
m = full(f);
m.insert(805, 4.2, f, &());
m.verify(f, &());
assert_eq!(m.get(805, f, &()), Some(4.2));
m.clear(f);
m.verify(f, &());
}
// Make a tree with two barely healthy leaf nodes:
// [ 10 20 30 40 ] [ 50 60 70 80 ]
fn two_leaf(f: &mut MapForest<u32, f32, ()>) -> Map<u32, f32, ()> {
f.clear();
let mut m = Map::new();
for n in 1..9 {
m.insert(n * 10, n as f32, f, &());
}
m
}
#[test]
fn remove_level1() {
let f = &mut MapForest::<u32, f32, ()>::new();
let mut m = two_leaf(f);
// Verify geometry.
m.verify(f, &());
assert_eq!(m.tpath(10, f, &()), "node2[0]--node0[0]");
assert_eq!(m.tpath(40, f, &()), "node2[0]--node0[3]");
assert_eq!(m.tpath(49, f, &()), "node2[0]--node0[4]");
assert_eq!(m.tpath(50, f, &()), "node2[1]--node1[0]");
assert_eq!(m.tpath(80, f, &()), "node2[1]--node1[3]");
// Remove the front entry from a node that stays healthy.
assert_eq!(m.insert(55, 5.5, f, &()), None);
assert_eq!(m.remove(50, f, &()), Some(5.0));
m.verify(f, &());
assert_eq!(m.tpath(49, f, &()), "node2[0]--node0[4]");
assert_eq!(m.tpath(50, f, &()), "node2[0]--node0[4]");
assert_eq!(m.tpath(55, f, &()), "node2[1]--node1[0]");
// Remove the front entry from the first leaf node: No critical key to update.
assert_eq!(m.insert(15, 1.5, f, &()), None);
assert_eq!(m.remove(10, f, &()), Some(1.0));
m.verify(f, &());
// [ 15 20 30 40 ] [ 55 60 70 80 ]
// Remove the front entry from a right-most node that underflows.
// No rebalancing for the right-most node. Still need critical key update.
assert_eq!(m.remove(55, f, &()), Some(5.5));
m.verify(f, &());
assert_eq!(m.tpath(55, f, &()), "node2[0]--node0[4]");
assert_eq!(m.tpath(60, f, &()), "node2[1]--node1[0]");
// [ 15 20 30 40 ] [ 60 70 80 ]
// Replenish the right leaf.
assert_eq!(m.insert(90, 9.0, f, &()), None);
assert_eq!(m.insert(100, 10.0, f, &()), None);
m.verify(f, &());
assert_eq!(m.tpath(55, f, &()), "node2[0]--node0[4]");
assert_eq!(m.tpath(60, f, &()), "node2[1]--node1[0]");
// [ 15 20 30 40 ] [ 60 70 80 90 100 ]
// Removing one entry from the left leaf should trigger a rebalancing from the right
// sibling.
assert_eq!(m.remove(20, f, &()), Some(2.0));
m.verify(f, &());
// [ 15 30 40 60 ] [ 70 80 90 100 ]
// Check that the critical key was updated correctly.
assert_eq!(m.tpath(50, f, &()), "node2[0]--node0[3]");
assert_eq!(m.tpath(60, f, &()), "node2[0]--node0[3]");
assert_eq!(m.tpath(70, f, &()), "node2[1]--node1[0]");
// Remove front entry from the left-most leaf node, underflowing.
// This should cause two leaf nodes to be merged and the root node to go away.
assert_eq!(m.remove(15, f, &()), Some(1.5));
m.verify(f, &());
}
#[test]
fn remove_level1_rightmost() {
let f = &mut MapForest::<u32, f32, ()>::new();
let mut m = two_leaf(f);
// [ 10 20 30 40 ] [ 50 60 70 80 ]
// Remove entries from the right leaf. This doesn't trigger a rebalancing.
assert_eq!(m.remove(60, f, &()), Some(6.0));
assert_eq!(m.remove(80, f, &()), Some(8.0));
assert_eq!(m.remove(50, f, &()), Some(5.0));
m.verify(f, &());
// [ 10 20 30 40 ] [ 70 ]
assert_eq!(m.tpath(50, f, &()), "node2[0]--node0[4]");
assert_eq!(m.tpath(70, f, &()), "node2[1]--node1[0]");
// Removing the last entry from the right leaf should cause a collapse.
assert_eq!(m.remove(70, f, &()), Some(7.0));
m.verify(f, &());
}
// Make a 3-level tree with barely healthy nodes.
// 1 root, 8 inner nodes, 7*4+5=33 leaf nodes, 4 entries each.
fn level3_sparse(f: &mut MapForest<u32, f32, ()>) -> Map<u32, f32, ()> {
f.clear();
let mut m = Map::new();
for n in 1..133 {
m.insert(n * 10, n as f32, f, &());
}
m
}
#[test]
fn level3_removes() {
let f = &mut MapForest::<u32, f32, ()>::new();
let mut m = level3_sparse(f);
m.verify(f, &());
// Check geometry.
// Root: node11
// [ node2 170 node10 330 node16 490 node21 650 node26 810 node31 970 node36 1130 node41 ]
// L1: node11
assert_eq!(m.tpath(0, f, &()), "node11[0]--node2[0]--node0[0]");
assert_eq!(m.tpath(10000, f, &()), "node11[7]--node41[4]--node40[4]");
// 650 is a critical key in the middle of the root.
assert_eq!(m.tpath(640, f, &()), "node11[3]--node21[3]--node19[3]");
assert_eq!(m.tpath(650, f, &()), "node11[4]--node26[0]--node20[0]");
// Deleting 640 triggers a rebalance from node19 to node 20, cascading to n21 -> n26.
assert_eq!(m.remove(640, f, &()), Some(64.0));
m.verify(f, &());
assert_eq!(m.tpath(650, f, &()), "node11[3]--node26[3]--node20[3]");
// 1130 is in the first leaf of the last L1 node. Deleting it triggers a rebalance node35
// -> node37, but no rebalance above where there is no right sibling.
assert_eq!(m.tpath(1130, f, &()), "node11[6]--node41[0]--node35[0]");
assert_eq!(m.tpath(1140, f, &()), "node11[6]--node41[0]--node35[1]");
assert_eq!(m.remove(1130, f, &()), Some(113.0));
m.verify(f, &());
assert_eq!(m.tpath(1140, f, &()), "node11[6]--node41[0]--node37[0]");
}
#[test]
fn insert_many() {
let f = &mut MapForest::<u32, f32, ()>::new();
let mut m = Map::<u32, f32, ()>::new();
let mm = 4096;
let mut x = 0;
for n in 0..mm {
assert_eq!(m.insert(x, n as f32, f, &()), None);
m.verify(f, &());
x = (x + n + 1) % mm;
}
x = 0;
for n in 0..mm {
assert_eq!(m.get(x, f, &()), Some(n as f32));
x = (x + n + 1) % mm;
}
x = 0;
for n in 0..mm {
assert_eq!(m.remove(x, f, &()), Some(n as f32));
m.verify(f, &());
x = (x + n + 1) % mm;
}
assert!(m.is_empty());
}
}

View File

@@ -0,0 +1,172 @@
//! A forest of B+-trees.
//!
//! This module provides a data structures representing a set of small ordered sets or maps.
//! It is implemented as a forest of B+-trees all allocating nodes out of the same pool.
//!
//! **These are not general purpose data structures that are somehow magically faster that the
//! standard library's `BTreeSet` and `BTreeMap` types.**
//!
//! The tradeoffs are different:
//!
//! - Keys and values are expected to be small and copyable. We optimize for 32-bit types.
//! - A comparator object is used to compare keys, allowing smaller "context free" keys.
//! - Empty trees have a very small 32-bit footprint.
//! - All the trees in a forest can be cleared in constant time.
use std::borrow::BorrowMut;
use std::cmp::Ordering;
mod map;
mod node;
mod path;
mod pool;
mod set;
pub use self::map::{Map, MapCursor, MapForest, MapIter};
pub use self::set::{Set, SetCursor, SetForest, SetIter};
use self::node::NodeData;
use self::path::Path;
use self::pool::NodePool;
/// The maximum branching factor of an inner node in a B+-tree.
/// The minimum number of outgoing edges is `INNER_SIZE/2`.
const INNER_SIZE: usize = 8;
/// Given the worst case branching factor of `INNER_SIZE/2` = 4, this is the
/// worst case path length from the root node to a leaf node in a tree with 2^32
/// entries. We would run out of node references before we hit `MAX_PATH`.
const MAX_PATH: usize = 16;
/// Key comparator.
///
/// Keys don't need to implement `Ord`. They are compared using a comparator object which
/// provides a context for comparison.
pub trait Comparator<K>
where
K: Copy,
{
/// Compare keys `a` and `b`.
///
/// This relation must provide a total ordering or the key space.
fn cmp(&self, a: K, b: K) -> Ordering;
/// Binary search for `k` in an ordered slice.
///
/// Assume that `s` is already sorted according to this ordering, search for the key `k`.
///
/// Returns `Ok(idx)` if `k` was found in the slice or `Err(idx)` with the position where it
/// should be inserted to preserve the ordering.
fn search(&self, k: K, s: &[K]) -> Result<usize, usize> {
s.binary_search_by(|x| self.cmp(*x, k))
}
}
/// Trivial comparator that doesn't actually provide any context.
impl<K> Comparator<K> for ()
where
K: Copy + Ord,
{
fn cmp(&self, a: K, b: K) -> Ordering {
a.cmp(&b)
}
}
/// Family of types shared by the map and set forest implementations.
trait Forest {
/// The key type is present for both sets and maps.
type Key: Copy;
/// The value type is `()` for sets.
type Value: Copy;
/// An array of keys for the leaf nodes.
type LeafKeys: Copy + BorrowMut<[Self::Key]>;
/// An array of values for the leaf nodes.
type LeafValues: Copy + BorrowMut<[Self::Value]>;
/// Type used for key comparisons.
type Comparator: Comparator<Self::Key>;
/// Splat a single key into a whole array.
fn splat_key(key: Self::Key) -> Self::LeafKeys;
/// Splat a single value inst a whole array
fn splat_value(value: Self::Value) -> Self::LeafValues;
}
/// A reference to a B+-tree node.
#[derive(Clone, Copy, PartialEq, Eq)]
struct Node(u32);
entity_impl!(Node, "node");
/// Empty type to be used as the "value" in B-trees representing sets.
#[derive(Clone, Copy)]
struct SetValue();
/// Insert `x` into `s` at position `i`, pushing out the last element.
fn slice_insert<T: Copy>(s: &mut [T], i: usize, x: T) {
for j in (i + 1..s.len()).rev() {
s[j] = s[j - 1];
}
s[i] = x;
}
/// Shift elements in `s` to the left by `n` positions.
fn slice_shift<T: Copy>(s: &mut [T], n: usize) {
for j in 0..s.len() - n {
s[j] = s[j + n];
}
}
#[cfg(test)]
mod test {
use super::*;
use entity::EntityRef;
use ir::Ebb;
#[test]
fn comparator() {
let ebb1 = Ebb::new(1);
let ebb2 = Ebb::new(2);
let ebb3 = Ebb::new(3);
let ebb4 = Ebb::new(4);
let vals = [ebb1, ebb2, ebb4];
let comp = ();
assert_eq!(comp.search(ebb1, &vals), Ok(0));
assert_eq!(comp.search(ebb3, &vals), Err(2));
assert_eq!(comp.search(ebb4, &vals), Ok(2));
}
#[test]
fn slice_insertion() {
let mut a = ['a', 'b', 'c', 'd'];
slice_insert(&mut a[0..1], 0, 'e');
assert_eq!(a, ['e', 'b', 'c', 'd']);
slice_insert(&mut a, 0, 'a');
assert_eq!(a, ['a', 'e', 'b', 'c']);
slice_insert(&mut a, 3, 'g');
assert_eq!(a, ['a', 'e', 'b', 'g']);
slice_insert(&mut a, 1, 'h');
assert_eq!(a, ['a', 'h', 'e', 'b']);
}
#[test]
fn slice_shifting() {
let mut a = ['a', 'b', 'c', 'd'];
slice_shift(&mut a[0..1], 1);
assert_eq!(a, ['a', 'b', 'c', 'd']);
slice_shift(&mut a[1..], 1);
assert_eq!(a, ['a', 'c', 'd', 'd']);
slice_shift(&mut a, 2);
assert_eq!(a, ['d', 'd', 'd', 'd']);
}
}

View File

@@ -0,0 +1,814 @@
//! B+-tree nodes.
use super::{slice_insert, slice_shift, Forest, Node, SetValue, INNER_SIZE};
use std::borrow::{Borrow, BorrowMut};
use std::fmt;
/// B+-tree node.
///
/// A B+-tree has different node types for inner nodes and leaf nodes. Inner nodes contain M node
/// references and M-1 keys while leaf nodes contain N keys and values. Values for M and N are
/// chosen such that a node is exactly 64 bytes (a cache line) when keys and values are 32 bits
/// each.
///
/// An inner node contains at least M/2 node references unless it is the right-most node at its
/// level. A leaf node contains at least N/2 keys unless it is the right-most leaf.
pub(super) enum NodeData<F: Forest> {
Inner {
/// The number of keys in this node.
/// The number of node references is always one more.
size: u8,
/// Keys discriminating sub-trees.
///
/// The key in `keys[i]` is greater than all keys in `tree[i]` and less than or equal to
/// all keys in `tree[i+1]`.
keys: [F::Key; INNER_SIZE - 1],
/// Sub-trees.
tree: [Node; INNER_SIZE],
},
Leaf {
/// Number of key-value pairs in this node.
size: u8,
// Key array.
keys: F::LeafKeys,
// Value array.
vals: F::LeafValues,
},
/// An unused node on the free list.
Free { next: Option<Node> },
}
// Implement `Clone` and `Copy` manually, because deriving them would also require `Forest` to
// implement `Clone`.
impl<F: Forest> Copy for NodeData<F> {}
impl<F: Forest> Clone for NodeData<F> {
fn clone(&self) -> Self {
*self
}
}
impl<F: Forest> NodeData<F> {
/// Is this a free/unused node?
pub fn is_free(&self) -> bool {
match *self {
NodeData::Free { .. } => true,
_ => false,
}
}
/// Get the number of entries in this node.
///
/// This is the number of outgoing edges in an inner node, or the number of key-value pairs in
/// a leaf node.
pub fn entries(&self) -> usize {
match *self {
NodeData::Inner { size, .. } => usize::from(size) + 1,
NodeData::Leaf { size, .. } => usize::from(size),
NodeData::Free { .. } => panic!("freed node"),
}
}
/// Create an inner node with a single key and two sub-trees.
pub fn inner(left: Node, key: F::Key, right: Node) -> NodeData<F> {
// Splat the key and right node to the whole array.
// Saves us from inventing a default/reserved value.
let mut tree = [right; INNER_SIZE];
tree[0] = left;
NodeData::Inner {
size: 1,
keys: [key; INNER_SIZE - 1],
tree,
}
}
/// Create a leaf node with a single key-value pair.
pub fn leaf(key: F::Key, value: F::Value) -> NodeData<F> {
NodeData::Leaf {
size: 1,
keys: F::splat_key(key),
vals: F::splat_value(value),
}
}
/// Unwrap an inner node into two slices (keys, trees).
pub fn unwrap_inner(&self) -> (&[F::Key], &[Node]) {
match *self {
NodeData::Inner {
size,
ref keys,
ref tree,
} => {
let size = usize::from(size);
// TODO: We could probably use `get_unchecked()` here since `size` is always in
// range.
(&keys[0..size], &tree[0..size + 1])
}
_ => panic!("Expected inner node"),
}
}
/// Unwrap a leaf node into two slices (keys, values) of the same length.
pub fn unwrap_leaf(&self) -> (&[F::Key], &[F::Value]) {
match *self {
NodeData::Leaf {
size,
ref keys,
ref vals,
} => {
let size = usize::from(size);
let keys = keys.borrow();
let vals = vals.borrow();
// TODO: We could probably use `get_unchecked()` here since `size` is always in
// range.
(&keys[0..size], &vals[0..size])
}
_ => panic!("Expected leaf node"),
}
}
/// Unwrap a mutable leaf node into two slices (keys, values) of the same length.
pub fn unwrap_leaf_mut(&mut self) -> (&mut [F::Key], &mut [F::Value]) {
match *self {
NodeData::Leaf {
size,
ref mut keys,
ref mut vals,
} => {
let size = usize::from(size);
let keys = keys.borrow_mut();
let vals = vals.borrow_mut();
// TODO: We could probably use `get_unchecked_mut()` here since `size` is always in
// range.
(&mut keys[0..size], &mut vals[0..size])
}
_ => panic!("Expected leaf node"),
}
}
/// Get the critical key for a leaf node.
/// This is simply the first key.
pub fn leaf_crit_key(&self) -> F::Key {
match *self {
NodeData::Leaf { size, ref keys, .. } => {
debug_assert!(size > 0, "Empty leaf node");
keys.borrow()[0]
}
_ => panic!("Expected leaf node"),
}
}
/// Try to insert `(key, node)` at key-position `index` in an inner node.
/// This means that `key` is inserted at `keys[i]` and `node` is inserted at `tree[i + 1]`.
/// If the node is full, this leaves the node unchanged and returns false.
pub fn try_inner_insert(&mut self, index: usize, key: F::Key, node: Node) -> bool {
match *self {
NodeData::Inner {
ref mut size,
ref mut keys,
ref mut tree,
} => {
let sz = usize::from(*size);
debug_assert!(sz <= keys.len());
debug_assert!(index <= sz, "Can't insert at {} with {} keys", index, sz);
if let Some(ks) = keys.get_mut(0..sz + 1) {
*size = (sz + 1) as u8;
slice_insert(ks, index, key);
slice_insert(&mut tree[1..sz + 2], index, node);
true
} else {
false
}
}
_ => panic!("Expected inner node"),
}
}
/// Try to insert `key, value` at `index` in a leaf node, but fail and return false if the node
/// is full.
pub fn try_leaf_insert(&mut self, index: usize, key: F::Key, value: F::Value) -> bool {
match *self {
NodeData::Leaf {
ref mut size,
ref mut keys,
ref mut vals,
} => {
let sz = usize::from(*size);
let keys = keys.borrow_mut();
let vals = vals.borrow_mut();
debug_assert!(sz <= keys.len());
debug_assert!(index <= sz);
if let Some(ks) = keys.get_mut(0..sz + 1) {
*size = (sz + 1) as u8;
slice_insert(ks, index, key);
slice_insert(&mut vals[0..sz + 1], index, value);
true
} else {
false
}
}
_ => panic!("Expected leaf node"),
}
}
/// Split off the second half of this node.
/// It is assumed that this a completely full inner or leaf node.
///
/// The `insert_index` parameter is the position where an insertion was tried and failed. The
/// node will be split in half with a bias towards an even split after the insertion is retried.
pub fn split(&mut self, insert_index: usize) -> SplitOff<F> {
match *self {
NodeData::Inner {
ref mut size,
ref keys,
ref tree,
} => {
debug_assert_eq!(usize::from(*size), keys.len(), "Node not full");
// Number of tree entries in the lhs node.
let l_ents = split_pos(tree.len(), insert_index + 1);
let r_ents = tree.len() - l_ents;
// With INNER_SIZE=8, we get l_ents=4 and:
//
// self: [ n0 k0 n1 k1 n2 k2 n3 k3 n4 k4 n5 k5 n6 k6 n7 ]
// lhs: [ n0 k0 n1 k1 n2 k2 n3 ]
// crit_key = k3 (not present in either node)
// rhs: [ n4 k4 n5 k5 n6 k6 n7 ]
// 1. Truncate the LHS.
*size = (l_ents - 1) as u8;
// 2. Copy second half to `rhs_data`.
let mut r_keys = *keys;
r_keys[0..r_ents - 1].copy_from_slice(&keys[l_ents..]);
let mut r_tree = *tree;
r_tree[0..r_ents].copy_from_slice(&tree[l_ents..]);
SplitOff {
lhs_entries: l_ents,
rhs_entries: r_ents,
crit_key: keys[l_ents - 1],
rhs_data: NodeData::Inner {
size: (r_ents - 1) as u8,
keys: r_keys,
tree: r_tree,
},
}
}
NodeData::Leaf {
ref mut size,
ref keys,
ref vals,
} => {
let o_keys = keys.borrow();
let o_vals = vals.borrow();
debug_assert_eq!(usize::from(*size), o_keys.len(), "Node not full");
let l_size = split_pos(o_keys.len(), insert_index);
let r_size = o_keys.len() - l_size;
// 1. Truncate the LHS node at `l_size`.
*size = l_size as u8;
// 2. Copy second half to `rhs_data`.
let mut r_keys = *keys;
r_keys.borrow_mut()[0..r_size].copy_from_slice(&o_keys[l_size..]);
let mut r_vals = *vals;
r_vals.borrow_mut()[0..r_size].copy_from_slice(&o_vals[l_size..]);
SplitOff {
lhs_entries: l_size,
rhs_entries: r_size,
crit_key: o_keys[l_size],
rhs_data: NodeData::Leaf {
size: r_size as u8,
keys: r_keys,
vals: r_vals,
},
}
}
_ => panic!("Expected leaf node"),
}
}
/// Remove the sub-tree at `index` from this inner node.
///
/// Note that `index` refers to a sub-tree entry and not a key entry as it does for
/// `try_inner_insert()`. It is possible to remove the first sub-tree (which can't be inserted
/// by `try_inner_insert()`).
///
/// Return an indication of the node's health (i.e. below half capacity).
pub fn inner_remove(&mut self, index: usize) -> Removed {
match *self {
NodeData::Inner {
ref mut size,
ref mut keys,
ref mut tree,
} => {
let ents = usize::from(*size) + 1;
debug_assert!(ents <= tree.len());
debug_assert!(index < ents);
// Leave an invalid 0xff size when node becomes empty.
*size = ents.wrapping_sub(2) as u8;
if ents > 1 {
slice_shift(&mut keys[index.saturating_sub(1)..ents - 1], 1);
}
slice_shift(&mut tree[index..ents], 1);
Removed::new(index, ents - 1, tree.len())
}
_ => panic!("Expected inner node"),
}
}
/// Remove the key-value pair at `index` from this leaf node.
///
/// Return an indication of the node's health (i.e. below half capacity).
pub fn leaf_remove(&mut self, index: usize) -> Removed {
match *self {
NodeData::Leaf {
ref mut size,
ref mut keys,
ref mut vals,
} => {
let sz = usize::from(*size);
let keys = keys.borrow_mut();
let vals = vals.borrow_mut();
*size -= 1;
slice_shift(&mut keys[index..sz], 1);
slice_shift(&mut vals[index..sz], 1);
Removed::new(index, sz - 1, keys.len())
}
_ => panic!("Expected leaf node"),
}
}
/// Balance this node with its right sibling.
///
/// It is assumed that the current node has underflowed. Look at the right sibling node and do
/// one of two things:
///
/// 1. Move all entries to the right node, leaving this node empty, or
/// 2. Distribute entries evenly between the two nodes.
///
/// In the first case, `None` is returned. In the second case, the new critical key for the
/// right sibling node is returned.
pub fn balance(&mut self, crit_key: F::Key, rhs: &mut NodeData<F>) -> Option<F::Key> {
match (self, rhs) {
(&mut NodeData::Inner {
size: ref mut l_size,
keys: ref mut l_keys,
tree: ref mut l_tree,
},
&mut NodeData::Inner {
size: ref mut r_size,
keys: ref mut r_keys,
tree: ref mut r_tree,
}) => {
let l_ents = usize::from(*l_size) + 1;
let r_ents = usize::from(*r_size) + 1;
let ents = l_ents + r_ents;
if ents <= r_tree.len() {
// All entries will fit in the RHS node.
// We'll leave the LHS node empty, but first use it as a scratch space.
*l_size = 0;
// Insert `crit_key` between the two nodes.
l_keys[l_ents - 1] = crit_key;
l_keys[l_ents..ents - 1].copy_from_slice(&r_keys[0..r_ents - 1]);
r_keys[0..ents - 1].copy_from_slice(&l_keys[0..ents - 1]);
l_tree[l_ents..ents].copy_from_slice(&r_tree[0..r_ents]);
r_tree[0..ents].copy_from_slice(&l_tree[0..ents]);
*r_size = (ents - 1) as u8;
None
} else {
// The entries don't all fit in one node. Distribute some from RHS -> LHS.
// Split evenly with a bias to putting one entry in LHS.
let r_goal = ents / 2;
let l_goal = ents - r_goal;
debug_assert!(l_goal > l_ents, "Node must be underflowed");
l_keys[l_ents - 1] = crit_key;
l_keys[l_ents..l_goal - 1].copy_from_slice(&r_keys[0..l_goal - 1 - l_ents]);
l_tree[l_ents..l_goal].copy_from_slice(&r_tree[0..l_goal - l_ents]);
*l_size = (l_goal - 1) as u8;
let new_crit = r_keys[r_ents - r_goal - 1];
slice_shift(&mut r_keys[0..r_ents - 1], r_ents - r_goal);
slice_shift(&mut r_tree[0..r_ents], r_ents - r_goal);
*r_size = (r_goal - 1) as u8;
Some(new_crit)
}
}
(&mut NodeData::Leaf {
size: ref mut l_size,
keys: ref mut l_keys,
vals: ref mut l_vals,
},
&mut NodeData::Leaf {
size: ref mut r_size,
keys: ref mut r_keys,
vals: ref mut r_vals,
}) => {
let l_ents = usize::from(*l_size);
let l_keys = l_keys.borrow_mut();
let l_vals = l_vals.borrow_mut();
let r_ents = usize::from(*r_size);
let r_keys = r_keys.borrow_mut();
let r_vals = r_vals.borrow_mut();
let ents = l_ents + r_ents;
if ents <= r_vals.len() {
// We can fit all entries in the RHS node.
// We'll leave the LHS node empty, but first use it as a scratch space.
*l_size = 0;
l_keys[l_ents..ents].copy_from_slice(&r_keys[0..r_ents]);
r_keys[0..ents].copy_from_slice(&l_keys[0..ents]);
l_vals[l_ents..ents].copy_from_slice(&r_vals[0..r_ents]);
r_vals[0..ents].copy_from_slice(&l_vals[0..ents]);
*r_size = ents as u8;
None
} else {
// The entries don't all fit in one node. Distribute some from RHS -> LHS.
// Split evenly with a bias to putting one entry in LHS.
let r_goal = ents / 2;
let l_goal = ents - r_goal;
debug_assert!(l_goal > l_ents, "Node must be underflowed");
l_keys[l_ents..l_goal].copy_from_slice(&r_keys[0..l_goal - l_ents]);
l_vals[l_ents..l_goal].copy_from_slice(&r_vals[0..l_goal - l_ents]);
*l_size = l_goal as u8;
slice_shift(&mut r_keys[0..r_ents], r_ents - r_goal);
slice_shift(&mut r_vals[0..r_ents], r_ents - r_goal);
*r_size = r_goal as u8;
Some(r_keys[0])
}
}
_ => panic!("Mismatched nodes"),
}
}
}
/// Find the right split position for halving a full node with `len` entries to recover from a
/// failed insertion at `ins`.
///
/// If `len` is even, we should split straight down the middle regardless of `len`.
///
/// If `len` is odd, we should split the node such that the two halves are the same size after the
/// insertion is retried.
fn split_pos(len: usize, ins: usize) -> usize {
// Anticipate `len` being a compile time constant, so this all folds away when `len` is even.
if ins <= len / 2 {
len / 2
} else {
(len + 1) / 2
}
}
/// The result of splitting off the second half of a node.
pub(super) struct SplitOff<F: Forest> {
/// The number of entries left in the original node which becomes the left-hand-side of the
/// pair. This is the number of outgoing node edges for an inner node, and the number of
/// key-value pairs for a leaf node.
pub lhs_entries: usize,
/// The number of entries in the new RHS node.
pub rhs_entries: usize,
/// The critical key separating the LHS and RHS nodes. All keys in the LHS sub-tree are less
/// than the critical key, and all entries in the RHS sub-tree are greater or equal to the
/// critical key.
pub crit_key: F::Key,
/// The RHS node data containing the elements that were removed from the original node (now the
/// LHS).
pub rhs_data: NodeData<F>,
}
/// The result of removing an entry from a node.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub(super) enum Removed {
/// An entry was removed, and the node is still in good shape.
Healthy,
/// The node is in good shape after removing the rightmost element.
Rightmost,
/// The node has too few entries now, and it should be balanced with a sibling node.
Underflow,
/// The last entry was removed. For an inner node, this means that the `keys` array is empty
/// and there is just a single sub-tree left.
Empty,
}
impl Removed {
/// Create a `Removed` status from a size and capacity.
fn new(removed: usize, new_size: usize, capacity: usize) -> Removed {
if 2 * new_size >= capacity {
if removed == new_size {
Removed::Rightmost
} else {
Removed::Healthy
}
} else if new_size > 0 {
Removed::Underflow
} else {
Removed::Empty
}
}
}
// Display ": value" or nothing at all for `()`.
pub(super) trait ValDisp {
fn valfmt(&self, f: &mut fmt::Formatter) -> fmt::Result;
}
impl ValDisp for SetValue {
fn valfmt(&self, _: &mut fmt::Formatter) -> fmt::Result {
Ok(())
}
}
impl<T: fmt::Display> ValDisp for T {
fn valfmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, ":{}", self)
}
}
impl<F> fmt::Display for NodeData<F>
where
F: Forest,
F::Key: fmt::Display,
F::Value: ValDisp,
{
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
NodeData::Inner { size, keys, tree } => {
write!(f, "[ {}", tree[0])?;
for i in 0..usize::from(size) {
write!(f, " {} {}", keys[i], tree[i + 1])?;
}
write!(f, " ]")
}
NodeData::Leaf { size, keys, vals } => {
let keys = keys.borrow();
let vals = vals.borrow();
write!(f, "[")?;
for i in 0..usize::from(size) {
write!(f, " {}", keys[i])?;
vals[i].valfmt(f)?;
}
write!(f, " ]")
}
NodeData::Free { next: Some(n) } => write!(f, "[ free -> {} ]", n),
NodeData::Free { next: None } => write!(f, "[ free ]"),
}
}
}
#[cfg(test)]
mod test {
use super::*;
use std::mem;
use std::string::ToString;
// Forest impl for a set implementation.
struct TF();
impl Forest for TF {
type Key = char;
type Value = SetValue;
type LeafKeys = [char; 15];
type LeafValues = [SetValue; 15];
type Comparator = ();
fn splat_key(key: Self::Key) -> Self::LeafKeys {
[key; 15]
}
fn splat_value(value: Self::Value) -> Self::LeafValues {
[value; 15]
}
}
#[test]
fn inner() {
let n1 = Node(1);
let n2 = Node(2);
let n3 = Node(3);
let n4 = Node(4);
let mut inner = NodeData::<TF>::inner(n1, 'c', n4);
assert_eq!(mem::size_of_val(&inner), 64);
assert_eq!(inner.to_string(), "[ node1 c node4 ]");
assert!(inner.try_inner_insert(0, 'a', n2));
assert_eq!(inner.to_string(), "[ node1 a node2 c node4 ]");
assert!(inner.try_inner_insert(1, 'b', n3));
assert_eq!(inner.to_string(), "[ node1 a node2 b node3 c node4 ]");
for i in 3..7 {
assert!(inner.try_inner_insert(
usize::from(i),
('a' as u8 + i) as char,
Node(i as u32 + 2),
));
}
assert_eq!(
inner.to_string(),
"[ node1 a node2 b node3 c node4 d node5 e node6 f node7 g node8 ]"
);
// Now the node is full and insertion should fail anywhere.
assert!(!inner.try_inner_insert(0, 'x', n3));
assert!(!inner.try_inner_insert(4, 'x', n3));
assert!(!inner.try_inner_insert(7, 'x', n3));
// Splitting should be independent of the hint because we have an even number of node
// references.
let saved = inner.clone();
let sp = inner.split(1);
assert_eq!(sp.lhs_entries, 4);
assert_eq!(sp.rhs_entries, 4);
assert_eq!(sp.crit_key, 'd');
// The critical key is not present in either of the resulting nodes.
assert_eq!(inner.to_string(), "[ node1 a node2 b node3 c node4 ]");
assert_eq!(sp.rhs_data.to_string(), "[ node5 e node6 f node7 g node8 ]");
assert_eq!(inner.inner_remove(0), Removed::Underflow);
assert_eq!(inner.to_string(), "[ node2 b node3 c node4 ]");
assert_eq!(inner.inner_remove(1), Removed::Underflow);
assert_eq!(inner.to_string(), "[ node2 c node4 ]");
assert_eq!(inner.inner_remove(1), Removed::Underflow);
assert_eq!(inner.to_string(), "[ node2 ]");
assert_eq!(inner.inner_remove(0), Removed::Empty);
inner = saved;
let sp = inner.split(6);
assert_eq!(sp.lhs_entries, 4);
assert_eq!(sp.rhs_entries, 4);
assert_eq!(sp.crit_key, 'd');
assert_eq!(inner.to_string(), "[ node1 a node2 b node3 c node4 ]");
assert_eq!(sp.rhs_data.to_string(), "[ node5 e node6 f node7 g node8 ]");
}
#[test]
fn leaf() {
let mut leaf = NodeData::<TF>::leaf('d', SetValue());
assert_eq!(leaf.to_string(), "[ d ]");
assert!(leaf.try_leaf_insert(0, 'a', SetValue()));
assert_eq!(leaf.to_string(), "[ a d ]");
assert!(leaf.try_leaf_insert(1, 'b', SetValue()));
assert!(leaf.try_leaf_insert(2, 'c', SetValue()));
assert_eq!(leaf.to_string(), "[ a b c d ]");
for i in 4..15 {
assert!(leaf.try_leaf_insert(
usize::from(i),
('a' as u8 + i) as char,
SetValue(),
));
}
assert_eq!(leaf.to_string(), "[ a b c d e f g h i j k l m n o ]");
// Now the node is full and insertion should fail anywhere.
assert!(!leaf.try_leaf_insert(0, 'x', SetValue()));
assert!(!leaf.try_leaf_insert(8, 'x', SetValue()));
assert!(!leaf.try_leaf_insert(15, 'x', SetValue()));
// The index given to `split` is not the split position, it's a hint for balancing the node.
let saved = leaf.clone();
let sp = leaf.split(12);
assert_eq!(sp.lhs_entries, 8);
assert_eq!(sp.rhs_entries, 7);
assert_eq!(sp.crit_key, 'i');
assert_eq!(leaf.to_string(), "[ a b c d e f g h ]");
assert_eq!(sp.rhs_data.to_string(), "[ i j k l m n o ]");
assert!(leaf.try_leaf_insert(8, 'i', SetValue()));
assert_eq!(leaf.leaf_remove(2), Removed::Healthy);
assert_eq!(leaf.to_string(), "[ a b d e f g h i ]");
assert_eq!(leaf.leaf_remove(7), Removed::Underflow);
assert_eq!(leaf.to_string(), "[ a b d e f g h ]");
leaf = saved;
let sp = leaf.split(7);
assert_eq!(sp.lhs_entries, 7);
assert_eq!(sp.rhs_entries, 8);
assert_eq!(sp.crit_key, 'h');
assert_eq!(leaf.to_string(), "[ a b c d e f g ]");
assert_eq!(sp.rhs_data.to_string(), "[ h i j k l m n o ]");
}
#[test]
fn optimal_split_pos() {
// An even split is easy.
assert_eq!(split_pos(8, 0), 4);
assert_eq!(split_pos(8, 8), 4);
// Easy cases for odd splits.
assert_eq!(split_pos(7, 0), 3);
assert_eq!(split_pos(7, 7), 4);
// If the insertion point is the same as the split position, we
// will append to the lhs node.
assert_eq!(split_pos(7, 3), 3);
assert_eq!(split_pos(7, 4), 4);
}
#[test]
fn inner_balance() {
let n1 = Node(1);
let n2 = Node(2);
let n3 = Node(3);
let mut lhs = NodeData::<TF>::inner(n1, 'a', n2);
assert!(lhs.try_inner_insert(1, 'b', n3));
assert_eq!(lhs.to_string(), "[ node1 a node2 b node3 ]");
let n11 = Node(11);
let n12 = Node(12);
let mut rhs = NodeData::<TF>::inner(n11, 'p', n12);
for i in 1..4 {
assert!(rhs.try_inner_insert(
usize::from(i),
('p' as u8 + i) as char,
Node(i as u32 + 12),
));
}
assert_eq!(
rhs.to_string(),
"[ node11 p node12 q node13 r node14 s node15 ]"
);
// 3+5 elements fit in RHS.
assert_eq!(lhs.balance('o', &mut rhs), None);
assert_eq!(
rhs.to_string(),
"[ node1 a node2 b node3 o node11 p node12 q node13 r node14 s node15 ]"
);
// 2+8 elements are redistributed.
lhs = NodeData::<TF>::inner(Node(20), 'x', Node(21));
assert_eq!(lhs.balance('y', &mut rhs), Some('o'));
assert_eq!(
lhs.to_string(),
"[ node20 x node21 y node1 a node2 b node3 ]"
);
assert_eq!(
rhs.to_string(),
"[ node11 p node12 q node13 r node14 s node15 ]"
);
}
#[test]
fn leaf_balance() {
let mut lhs = NodeData::<TF>::leaf('a', SetValue());
for i in 1..6 {
assert!(lhs.try_leaf_insert(
usize::from(i),
('a' as u8 + i) as char,
SetValue(),
));
}
assert_eq!(lhs.to_string(), "[ a b c d e f ]");
let mut rhs = NodeData::<TF>::leaf('0', SetValue());
for i in 1..8 {
assert!(rhs.try_leaf_insert(
usize::from(i),
('0' as u8 + i) as char,
SetValue(),
));
}
assert_eq!(rhs.to_string(), "[ 0 1 2 3 4 5 6 7 ]");
// 6+8 elements all fits in rhs.
assert_eq!(lhs.balance('0', &mut rhs), None);
assert_eq!(rhs.to_string(), "[ a b c d e f 0 1 2 3 4 5 6 7 ]");
assert!(lhs.try_leaf_insert(0, 'x', SetValue()));
assert!(lhs.try_leaf_insert(1, 'y', SetValue()));
assert!(lhs.try_leaf_insert(2, 'z', SetValue()));
assert_eq!(lhs.to_string(), "[ x y z ]");
// 3+14 elements need redistribution.
assert_eq!(lhs.balance('a', &mut rhs), Some('0'));
assert_eq!(lhs.to_string(), "[ x y z a b c d e f ]");
assert_eq!(rhs.to_string(), "[ 0 1 2 3 4 5 6 7 ]");
}
}

View File

@@ -0,0 +1,832 @@
//! A path from the root of a B+-tree to a leaf node.
use super::node::Removed;
use super::{slice_insert, slice_shift, Comparator, Forest, Node, NodeData, NodePool, MAX_PATH};
use std::borrow::Borrow;
use std::marker::PhantomData;
#[cfg(test)]
use std::fmt;
pub(super) struct Path<F: Forest> {
/// Number of path entries including the root and leaf nodes.
size: usize,
/// Path of node references from the root to a leaf node.
node: [Node; MAX_PATH],
/// Entry number in each node.
entry: [u8; MAX_PATH],
unused: PhantomData<F>,
}
impl<F: Forest> Default for Path<F> {
fn default() -> Path<F> {
Path {
size: 0,
node: [Node(0); MAX_PATH],
entry: [0; MAX_PATH],
unused: PhantomData,
}
}
}
impl<F: Forest> Path<F> {
/// Reset path by searching for `key` starting from `root`.
///
/// If `key` is in the tree, returns the corresponding value and leaved the path pointing at
/// the entry. Otherwise returns `None` and:
///
/// - A key smaller than all stored keys returns a path to the first entry of the first leaf.
/// - A key larger than all stored keys returns a path to one beyond the last element of the
/// last leaf.
/// - A key between the stored keys of adjacent leaf nodes returns a path to one beyond the
/// last entry of the first of the leaf nodes.
///
pub fn find(
&mut self,
key: F::Key,
root: Node,
pool: &NodePool<F>,
comp: &F::Comparator,
) -> Option<F::Value> {
let mut node = root;
for level in 0.. {
self.size = level + 1;
self.node[level] = node;
match pool[node] {
NodeData::Inner { size, keys, tree } => {
// Invariant: `tree[i]` contains keys smaller than
// `keys[i]`, greater or equal to `keys[i-1]`.
let i = match comp.search(key, &keys[0..size.into()]) {
// We hit an existing key, so follow the >= branch.
Ok(i) => i + 1,
// Key is less than `keys[i]`, so follow the < branch.
Err(i) => i,
};
self.entry[level] = i as u8;
node = tree[i];
}
NodeData::Leaf { size, keys, vals } => {
// For a leaf we want either the found key or an insert position.
return match comp.search(key, &keys.borrow()[0..size.into()]) {
Ok(i) => {
self.entry[level] = i as u8;
Some(vals.borrow()[i])
}
Err(i) => {
self.entry[level] = i as u8;
None
}
};
}
NodeData::Free { .. } => panic!("Free {} reached from {}", node, root),
}
}
unreachable!();
}
/// Move path to the first entry of the tree starting at `root` and return it.
pub fn first(&mut self, root: Node, pool: &NodePool<F>) -> (F::Key, F::Value) {
let mut node = root;
for level in 0.. {
self.size = level + 1;
self.node[level] = node;
self.entry[level] = 0;
match pool[node] {
NodeData::Inner { tree, .. } => node = tree[0],
NodeData::Leaf { keys, vals, .. } => return (keys.borrow()[0], vals.borrow()[0]),
NodeData::Free { .. } => panic!("Free {} reached from {}", node, root),
}
}
unreachable!();
}
/// Move this path to the next key-value pair and return it.
pub fn next(&mut self, pool: &NodePool<F>) -> Option<(F::Key, F::Value)> {
match self.leaf_pos() {
None => return None,
Some((node, entry)) => {
let (keys, vals) = pool[node].unwrap_leaf();
if entry + 1 < keys.len() {
self.entry[self.size - 1] += 1;
return Some((keys[entry + 1], vals[entry + 1]));
}
}
}
// The current leaf node is exhausted. Move to the next one.
let leaf_level = self.size - 1;
self.next_node(leaf_level, pool).map(|node| {
let (keys, vals) = pool[node].unwrap_leaf();
(keys[0], vals[0])
})
}
/// Move this path to the previous key-value pair and return it.
///
/// If the path is at the off-the-end position, go to the last key-value pair.
///
/// If the path is already at the first key-value pair, leave it there and return `None`.
pub fn prev(&mut self, root: Node, pool: &NodePool<F>) -> Option<(F::Key, F::Value)> {
// We use `size == 0` as a generic off-the-end position.
if self.size == 0 {
self.goto_subtree_last(0, root, pool);
let (node, entry) = self.leaf_pos().unwrap();
let (keys, vals) = pool[node].unwrap_leaf();
return Some((keys[entry], vals[entry]));
}
match self.leaf_pos() {
None => return None,
Some((node, entry)) => {
if entry > 0 {
self.entry[self.size - 1] -= 1;
let (keys, vals) = pool[node].unwrap_leaf();
return Some((keys[entry - 1], vals[entry - 1]));
}
}
}
// The current leaf node is exhausted. Move to the previous one.
self.prev_leaf(pool).map(|node| {
let (keys, vals) = pool[node].unwrap_leaf();
let e = self.leaf_entry();
(keys[e], vals[e])
})
}
/// Move path to the first entry of the next node at level, if one exists.
///
/// Returns the new node if it exists.
///
/// Reset the path to `size = 0` and return `None` if there is no next node.
fn next_node(&mut self, level: usize, pool: &NodePool<F>) -> Option<Node> {
match self.right_sibling_branch_level(level, pool) {
None => {
self.size = 0;
None
}
Some(bl) => {
let (_, bnodes) = pool[self.node[bl]].unwrap_inner();
self.entry[bl] += 1;
let mut node = bnodes[usize::from(self.entry[bl])];
for l in bl + 1..level {
self.node[l] = node;
self.entry[l] = 0;
node = pool[node].unwrap_inner().1[0];
}
self.node[level] = node;
self.entry[level] = 0;
Some(node)
}
}
}
/// Move the path to the last entry of the previous leaf node, if one exists.
///
/// Returns the new leaf node if it exists.
///
/// Leave the path unchanged and returns `None` if we are already at the first leaf node.
fn prev_leaf(&mut self, pool: &NodePool<F>) -> Option<Node> {
self.left_sibling_branch_level(self.size - 1).map(|bl| {
let entry = self.entry[bl] - 1;
self.entry[bl] = entry;
let (_, bnodes) = pool[self.node[bl]].unwrap_inner();
self.goto_subtree_last(bl + 1, bnodes[usize::from(entry)], pool)
})
}
/// Move this path to the last position for the sub-tree at `level, root`.
fn goto_subtree_last(&mut self, level: usize, root: Node, pool: &NodePool<F>) -> Node {
let mut node = root;
for l in level.. {
self.node[l] = node;
match pool[node] {
NodeData::Inner { size, ref tree, .. } => {
self.entry[l] = size;
node = tree[usize::from(size)];
}
NodeData::Leaf { size, .. } => {
self.entry[l] = size - 1;
self.size = l + 1;
break;
}
NodeData::Free { .. } => panic!("Free {} reached from {}", node, root),
}
}
node
}
/// Set the root node and point the path at the first entry of the node.
pub fn set_root_node(&mut self, root: Node) {
self.size = 1;
self.node[0] = root;
self.entry[0] = 0;
}
/// Get the current leaf node and entry, if any.
pub fn leaf_pos(&self) -> Option<(Node, usize)> {
let i = self.size.wrapping_sub(1);
self.node.get(i).map(|&n| (n, self.entry[i].into()))
}
/// Get the current leaf node.
fn leaf_node(&self) -> Node {
self.node[self.size - 1]
}
/// Get the current entry in the leaf node.
fn leaf_entry(&self) -> usize {
self.entry[self.size - 1].into()
}
/// Is this path pointing to the first entry in the tree?
/// This corresponds to the smallest key.
fn at_first_entry(&self) -> bool {
self.entry[0..self.size].iter().all(|&i| i == 0)
}
/// Get a mutable reference to the current value.
/// This assumes that there is a current value.
pub fn value_mut<'a>(&self, pool: &'a mut NodePool<F>) -> &'a mut F::Value {
&mut pool[self.leaf_node()].unwrap_leaf_mut().1[self.leaf_entry()]
}
/// Insert the key-value pair at the current position.
/// The current position must be the correct insertion location for the key.
/// This function does not check for duplicate keys. Use `find` or similar for that.
/// Returns the new root node.
pub fn insert(&mut self, key: F::Key, value: F::Value, pool: &mut NodePool<F>) -> Node {
if !self.try_leaf_insert(key, value, pool) {
self.split_and_insert(key, value, pool);
}
self.node[0]
}
/// Try to insert `key, value` at the current position, but fail and return false if the leaf
/// node is full.
fn try_leaf_insert(&self, key: F::Key, value: F::Value, pool: &mut NodePool<F>) -> bool {
let index = self.leaf_entry();
// The case `index == 0` should only ever happen when there are no earlier leaf nodes,
// otherwise we should have appended to the previous leaf node instead. This invariant
// means that we don't need to update keys stored in inner nodes here.
debug_assert!(index > 0 || self.at_first_entry());
pool[self.leaf_node()].try_leaf_insert(index, key, value)
}
/// Split the current leaf node and then insert `key, value`.
/// This should only be used if `try_leaf_insert()` fails.
fn split_and_insert(&mut self, mut key: F::Key, value: F::Value, pool: &mut NodePool<F>) {
let orig_root = self.node[0];
// Loop invariant: We need to split the node at `level` and then retry a failed insertion.
// The items to insert are either `(key, ins_node)` or `(key, value)`.
let mut ins_node = None;
let mut split;
for level in (0..self.size).rev() {
// Split the current node.
let mut node = self.node[level];
let mut entry = self.entry[level].into();
split = pool[node].split(entry);
let rhs_node = pool.alloc_node(split.rhs_data);
// Should the path be moved to the new RHS node?
// Prefer the smaller node if we're right in the middle.
// Prefer to append to LHS all other things being equal.
//
// When inserting into an inner node (`ins_node.is_some()`), we must point to a valid
// entry in the current node since the new entry is inserted *after* the insert
// location.
if entry > split.lhs_entries ||
(entry == split.lhs_entries &&
(split.lhs_entries > split.rhs_entries || ins_node.is_some()))
{
node = rhs_node;
entry -= split.lhs_entries;
self.node[level] = node;
self.entry[level] = entry as u8;
}
// Now that we have a not-full node, it must be possible to insert.
match ins_node {
None => {
let inserted = pool[node].try_leaf_insert(entry, key, value);
debug_assert!(inserted);
// If we inserted at the front of the new rhs_node leaf, we need to propagate
// the inserted key as the critical key instead of the previous front key.
if entry == 0 && node == rhs_node {
split.crit_key = key;
}
}
Some(n) => {
let inserted = pool[node].try_inner_insert(entry, key, n);
debug_assert!(inserted);
// The lower level was moved to the new RHS node, so make sure that is
// reflected here.
if n == self.node[level + 1] {
self.entry[level] += 1;
}
}
}
// We are now done with the current level, but `rhs_node` must be inserted in the inner
// node above us. If we're already at level 0, the root node needs to be split.
key = split.crit_key;
ins_node = Some(rhs_node);
if level > 0 {
let pnode = &mut pool[self.node[level - 1]];
let pentry = self.entry[level - 1].into();
if pnode.try_inner_insert(pentry, key, rhs_node) {
// If this level level was moved to the new RHS node, update parent entry.
if node == rhs_node {
self.entry[level - 1] += 1;
}
return;
}
}
}
// If we get here we have split the original root node and need to add an extra level.
let rhs_node = ins_node.expect("empty path");
let root = pool.alloc_node(NodeData::inner(orig_root, key, rhs_node));
let entry = if self.node[0] == rhs_node { 1 } else { 0 };
self.size += 1;
slice_insert(&mut self.node[0..self.size], 0, root);
slice_insert(&mut self.entry[0..self.size], 0, entry);
}
/// Remove the key-value pair at the current position and advance the path to the next
/// key-value pair, leaving the path in a normalized state.
///
/// Return the new root node.
pub fn remove(&mut self, pool: &mut NodePool<F>) -> Option<Node> {
let e = self.leaf_entry();
match pool[self.leaf_node()].leaf_remove(e) {
Removed::Healthy => {
if e == 0 {
self.update_crit_key(pool)
}
Some(self.node[0])
}
status => self.balance_nodes(status, pool),
}
}
/// Get the critical key for the current node at `level`.
///
/// The critical key is less than or equal to all keys in the sub-tree at `level` and greater
/// than all keys to the left of the current node at `level`.
///
/// The left-most node at any level does not have a critical key.
fn current_crit_key(&self, level: usize, pool: &NodePool<F>) -> Option<F::Key> {
// Find the level containing the critical key for the current node.
self.left_sibling_branch_level(level).map(|bl| {
let (keys, _) = pool[self.node[bl]].unwrap_inner();
keys[usize::from(self.entry[bl]) - 1]
})
}
/// Update the critical key after removing the front entry of the leaf node.
fn update_crit_key(&mut self, pool: &mut NodePool<F>) {
// Find the inner level containing the critical key for the current leaf node.
let crit_level = match self.left_sibling_branch_level(self.size - 1) {
None => return,
Some(l) => l,
};
let crit_kidx = self.entry[crit_level] - 1;
// Extract the new critical key from the leaf node.
let crit_key = pool[self.leaf_node()].leaf_crit_key();
let crit_node = self.node[crit_level];
match pool[crit_node] {
NodeData::Inner { size, ref mut keys, .. } => {
debug_assert!(crit_kidx < size);
keys[usize::from(crit_kidx)] = crit_key;
}
_ => panic!("Expected inner node"),
}
}
/// Given that the current leaf node is in an unhealthy (underflowed or even empty) status,
/// balance it with sibling nodes.
///
/// Return the new root node.
fn balance_nodes(&mut self, status: Removed, pool: &mut NodePool<F>) -> Option<Node> {
// The current leaf node is not in a healthy state, and its critical key may have changed
// too.
//
// Start by dealing with a changed critical key for the leaf level.
if status != Removed::Empty && self.leaf_entry() == 0 {
self.update_crit_key(pool);
}
let leaf_level = self.size - 1;
if self.heal_level(status, leaf_level, pool) {
// Tree has become empty.
self.size = 0;
return None;
}
// Discard the root node if it has shrunk to a single sub-tree.
let mut ns = 0;
while let NodeData::Inner { size: 0, ref tree, .. } = pool[self.node[ns]] {
ns += 1;
self.node[ns] = tree[0];
}
if ns > 0 {
for l in 0..ns {
pool.free_node(self.node[l]);
}
// Shift the whole array instead of just 0..size because `self.size` may be cleared
// here if the path is pointing off-the-end.
slice_shift(&mut self.node, ns);
slice_shift(&mut self.entry, ns);
if self.size > 0 {
self.size -= ns;
}
}
// Return the root node, even when `size=0` indicating that we're at the off-the-end
// position.
Some(self.node[0])
}
/// After removing an entry from the node at `level`, check its health and rebalance as needed.
///
/// Leave the path up to and including `level` in a normalized state where all entries are in
/// bounds.
///
/// Returns true if the tree becomes empty.
fn heal_level(&mut self, status: Removed, level: usize, pool: &mut NodePool<F>) -> bool {
match status {
Removed::Healthy => {}
Removed::Rightmost => {
// The rightmost entry was removed from the curent node, so move the path so it
// points at the first entry of the next node at this level.
debug_assert_eq!(
usize::from(self.entry[level]),
pool[self.node[level]].entries()
);
self.next_node(level, pool);
}
Removed::Underflow => self.underflowed_node(level, pool),
Removed::Empty => return self.empty_node(level, pool),
}
false
}
/// The current node at `level` has underflowed, meaning that it is below half capacity but
/// not completely empty.
///
/// Handle this by balancing entries with the right sibling node.
///
/// Leave the path up to and including `level` in a valid state that points to the same entry.
fn underflowed_node(&mut self, level: usize, pool: &mut NodePool<F>) {
// Look for a right sibling node at this level. If none exists, we allow the underflowed
// node to persist as the right-most node at its level.
if let Some((crit_key, rhs_node)) = self.right_sibling(level, pool) {
// New critical key for the updated right sibling node.
let new_ck: Option<F::Key>;
let empty;
// Make a COPY of the sibling node to avoid fighting the borrow checker.
let mut rhs = pool[rhs_node];
match pool[self.node[level]].balance(crit_key, &mut rhs) {
None => {
// Everything got moved to the RHS node.
new_ck = self.current_crit_key(level, pool);
empty = true;
}
Some(key) => {
// Entries moved from RHS node.
new_ck = Some(key);
empty = false;
}
}
// Put back the updated RHS node data.
pool[rhs_node] = rhs;
// Update the critical key for the RHS node unless it has become a left-most
// node.
if let Some(ck) = new_ck {
self.update_right_crit_key(level, ck, pool);
}
if empty {
let empty_tree = self.empty_node(level, pool);
debug_assert!(!empty_tree);
}
// Any Removed::Rightmost state must have been cleared above by merging nodes. If the
// current entry[level] was one off the end of the node, it will now point at a proper
// entry.
debug_assert!(usize::from(self.entry[level]) < pool[self.node[level]].entries());
} else if usize::from(self.entry[level]) >= pool[self.node[level]].entries() {
// There's no right sibling at this level, so the node can't be rebalanced.
// Check if we are in an off-the-end position.
self.size = 0;
}
}
/// The current node at `level` has become empty.
///
/// Remove the node from its parent node and leave the path in a normalized state. This means
/// that the path at this level will go through the right sibling of this node.
///
/// If the current node has no right sibling, set `self.size = 0`.
///
/// Returns true if the tree becomes empty.
fn empty_node(&mut self, level: usize, pool: &mut NodePool<F>) -> bool {
pool.free_node(self.node[level]);
if level == 0 {
// We just deleted the root node, so the tree is now empty.
return true;
}
// Get the right sibling node before recursively removing nodes.
let rhs_node = self.right_sibling(level, pool).map(|(_, n)| n);
// Remove the current sub-tree from the parent node.
let pl = level - 1;
let pe = self.entry[pl].into();
let status = pool[self.node[pl]].inner_remove(pe);
self.heal_level(status, pl, pool);
// Finally update the path at this level.
match rhs_node {
// We'll leave `self.entry[level]` unchanged. It can be non-zero after moving node
// entries to the right sibling node.
Some(rhs) => self.node[level] = rhs,
// We have no right sibling, so we must have deleted the right-most
// entry. The path should be moved to the "off-the-end" position.
None => self.size = 0,
}
false
}
/// Find the level where the right sibling to the current node at `level` branches off.
///
/// This will be an inner node with two adjacent sub-trees: In one the current node at level is
/// a right-most node, in the other, the right sibling is a left-most node.
///
/// Returns `None` if the current node is a right-most node so no right sibling exists.
fn right_sibling_branch_level(&self, level: usize, pool: &NodePool<F>) -> Option<usize> {
(0..level).rposition(|l| match pool[self.node[l]] {
NodeData::Inner { size, .. } => self.entry[l] < size,
_ => panic!("Expected inner node"),
})
}
/// Find the level where the left sibling to the current node at `level` branches off.
fn left_sibling_branch_level(&self, level: usize) -> Option<usize> {
self.entry[0..level].iter().rposition(|&e| e != 0)
}
/// Get the right sibling node to the current node at `level`.
/// Also return the critical key between the current node and the right sibling.
fn right_sibling(&self, level: usize, pool: &NodePool<F>) -> Option<(F::Key, Node)> {
// Find the critical level: The deepest level where two sibling subtrees contain the
// current node and its right sibling.
self.right_sibling_branch_level(level, pool).map(|bl| {
// Extract the critical key and the `bl+1` node.
let be = usize::from(self.entry[bl]);
let crit_key;
let mut node;
{
let (keys, tree) = pool[self.node[bl]].unwrap_inner();
crit_key = keys[be];
node = tree[be + 1];
}
// Follow left-most links back down to `level`.
for _ in bl + 1..level {
node = pool[node].unwrap_inner().1[0];
}
(crit_key, node)
})
}
/// Update the critical key for the right sibling node at `level`.
fn update_right_crit_key(&self, level: usize, crit_key: F::Key, pool: &mut NodePool<F>) {
let bl = self.right_sibling_branch_level(level, pool).expect(
"No right sibling exists",
);
match pool[self.node[bl]] {
NodeData::Inner { ref mut keys, .. } => {
keys[usize::from(self.entry[bl])] = crit_key;
}
_ => panic!("Expected inner node"),
}
}
/// Normalize the path position such that it is either pointing at a real entry or `size=0`
/// indicating "off-the-end".
pub fn normalize(&mut self, pool: &mut NodePool<F>) {
if let Some((leaf, entry)) = self.leaf_pos() {
if entry >= pool[leaf].entries() {
let leaf_level = self.size - 1;
self.next_node(leaf_level, pool);
}
}
}
}
#[cfg(test)]
impl<F: Forest> Path<F> {
/// Check the internal consistency of this path.
pub fn verify(&self, pool: &NodePool<F>) {
for level in 0..self.size {
match pool[self.node[level]] {
NodeData::Inner { size, tree, .. } => {
assert!(
level < self.size - 1,
"Expected leaf node at level {}",
level
);
assert!(
self.entry[level] <= size,
"OOB inner entry {}/{} at level {}",
self.entry[level],
size,
level
);
assert_eq!(
self.node[level + 1],
tree[usize::from(self.entry[level])],
"Node mismatch at level {}",
level
);
}
NodeData::Leaf { size, .. } => {
assert_eq!(level, self.size - 1, "Expected inner node");
assert!(
self.entry[level] <= size,
"OOB leaf entry {}/{}",
self.entry[level],
size,
);
}
NodeData::Free { .. } => {
panic!("Free {} in path", self.node[level]);
}
}
}
}
}
#[cfg(test)]
impl<F: Forest> fmt::Display for Path<F> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
if self.size == 0 {
write!(f, "<empty path>")
} else {
write!(f, "{}[{}]", self.node[0], self.entry[0])?;
for i in 1..self.size {
write!(f, "--{}[{}]", self.node[i], self.entry[i])?;
}
Ok(())
}
}
}
#[cfg(test)]
mod test {
use super::super::{Forest, NodeData, NodePool};
use super::*;
use std::cmp::Ordering;
struct TC();
impl Comparator<i32> for TC {
fn cmp(&self, a: i32, b: i32) -> Ordering {
a.cmp(&b)
}
}
struct TF();
impl Forest for TF {
type Key = i32;
type Value = char;
type LeafKeys = [i32; 7];
type LeafValues = [char; 7];
type Comparator = TC;
fn splat_key(key: Self::Key) -> Self::LeafKeys {
[key; 7]
}
fn splat_value(value: Self::Value) -> Self::LeafValues {
[value; 7]
}
}
#[test]
fn search_single_leaf() {
// Testing Path::new() for trees with a single leaf node.
let mut pool = NodePool::<TF>::new();
let root = pool.alloc_node(NodeData::leaf(10, 'a'));
let mut p = Path::default();
let comp = TC();
// Search for key less than stored key.
assert_eq!(p.find(5, root, &pool, &comp), None);
assert_eq!(p.size, 1);
assert_eq!(p.node[0], root);
assert_eq!(p.entry[0], 0);
// Search for stored key.
assert_eq!(p.find(10, root, &pool, &comp), Some('a'));
assert_eq!(p.size, 1);
assert_eq!(p.node[0], root);
assert_eq!(p.entry[0], 0);
// Search for key greater than stored key.
assert_eq!(p.find(15, root, &pool, &comp), None);
assert_eq!(p.size, 1);
assert_eq!(p.node[0], root);
assert_eq!(p.entry[0], 1);
// Modify leaf node to contain two values.
match pool[root] {
NodeData::Leaf {
ref mut size,
ref mut keys,
ref mut vals,
} => {
*size = 2;
keys[1] = 20;
vals[1] = 'b';
}
_ => unreachable!(),
}
// Search for key between stored keys.
assert_eq!(p.find(15, root, &pool, &comp), None);
assert_eq!(p.size, 1);
assert_eq!(p.node[0], root);
assert_eq!(p.entry[0], 1);
// Search for key greater than stored keys.
assert_eq!(p.find(25, root, &pool, &comp), None);
assert_eq!(p.size, 1);
assert_eq!(p.node[0], root);
assert_eq!(p.entry[0], 2);
}
#[test]
fn search_single_inner() {
// Testing Path::new() for trees with a single inner node and two leaves.
let mut pool = NodePool::<TF>::new();
let leaf1 = pool.alloc_node(NodeData::leaf(10, 'a'));
let leaf2 = pool.alloc_node(NodeData::leaf(20, 'b'));
let root = pool.alloc_node(NodeData::inner(leaf1, 20, leaf2));
let mut p = Path::default();
let comp = TC();
// Search for key less than stored keys.
assert_eq!(p.find(5, root, &pool, &comp), None);
assert_eq!(p.size, 2);
assert_eq!(p.node[0], root);
assert_eq!(p.entry[0], 0);
assert_eq!(p.node[1], leaf1);
assert_eq!(p.entry[1], 0);
assert_eq!(p.find(10, root, &pool, &comp), Some('a'));
assert_eq!(p.size, 2);
assert_eq!(p.node[0], root);
assert_eq!(p.entry[0], 0);
assert_eq!(p.node[1], leaf1);
assert_eq!(p.entry[1], 0);
// Midway between the two leaf nodes.
assert_eq!(p.find(15, root, &pool, &comp), None);
assert_eq!(p.size, 2);
assert_eq!(p.node[0], root);
assert_eq!(p.entry[0], 0);
assert_eq!(p.node[1], leaf1);
assert_eq!(p.entry[1], 1);
assert_eq!(p.find(20, root, &pool, &comp), Some('b'));
assert_eq!(p.size, 2);
assert_eq!(p.node[0], root);
assert_eq!(p.entry[0], 1);
assert_eq!(p.node[1], leaf2);
assert_eq!(p.entry[1], 0);
assert_eq!(p.find(25, root, &pool, &comp), None);
assert_eq!(p.size, 2);
assert_eq!(p.node[0], root);
assert_eq!(p.entry[0], 1);
assert_eq!(p.node[1], leaf2);
assert_eq!(p.entry[1], 1);
}
}

View File

@@ -0,0 +1,213 @@
//! B+-tree node pool.
use super::{Forest, Node, NodeData};
use entity::PrimaryMap;
use std::ops::{Index, IndexMut};
/// A pool of nodes, including a free list.
pub(super) struct NodePool<F: Forest> {
nodes: PrimaryMap<Node, NodeData<F>>,
freelist: Option<Node>,
}
impl<F: Forest> NodePool<F> {
/// Allocate a new empty pool of nodes.
pub fn new() -> NodePool<F> {
NodePool {
nodes: PrimaryMap::new(),
freelist: None,
}
}
/// Free all nodes.
pub fn clear(&mut self) {
self.nodes.clear();
self.freelist = None;
}
/// Allocate a new node containing `data`.
pub fn alloc_node(&mut self, data: NodeData<F>) -> Node {
debug_assert!(!data.is_free(), "can't allocate free node");
match self.freelist {
Some(node) => {
// Remove this node from the free list.
match self.nodes[node] {
NodeData::Free { next } => self.freelist = next,
_ => panic!("Invalid {} on free list", node),
}
self.nodes[node] = data;
node
}
None => {
// The free list is empty. Allocate a new node.
self.nodes.push(data)
}
}
}
/// Free a node.
pub fn free_node(&mut self, node: Node) {
// Quick check for a double free.
debug_assert!(!self.nodes[node].is_free(), "{} is already free", node);
self.nodes[node] = NodeData::Free { next: self.freelist };
self.freelist = Some(node);
}
/// Free the entire tree rooted at `node`.
pub fn free_tree(&mut self, node: Node) {
if let NodeData::Inner { size, tree, .. } = self[node] {
// Note that we have to capture `tree` by value to avoid borrow checker trouble.
#[cfg_attr(feature = "cargo-clippy", allow(needless_range_loop))]
for i in 0..usize::from(size + 1) {
// Recursively free sub-trees. This recursion can never be deeper than `MAX_PATH`,
// and since most trees have less than a handful of nodes, it is worthwhile to
// avoid the heap allocation for an iterative tree traversal.
self.free_tree(tree[i]);
}
}
self.free_node(node);
}
}
#[cfg(test)]
impl<F: Forest> NodePool<F> {
/// Verify the consistency of the tree rooted at `node`.
pub fn verify_tree(&self, node: Node, comp: &F::Comparator)
where
NodeData<F>: ::std::fmt::Display,
F::Key: ::std::fmt::Display,
{
use super::Comparator;
use entity::SparseSet;
use std::borrow::Borrow;
use std::cmp::Ordering;
use std::vec::Vec;
// The root node can't be an inner node with just a single sub-tree. It should have been
// pruned.
if let &NodeData::Inner { size, .. } = &self[node] {
assert!(size > 0, "Root must have more than one sub-tree");
}
let mut done = SparseSet::new();
let mut todo = Vec::new();
// Todo-list entries are:
// 1. Optional LHS key which must be <= all node entries.
// 2. The node reference.
// 3. Optional RHS key which must be > all node entries.
todo.push((None, node, None));
while let Some((lkey, node, rkey)) = todo.pop() {
assert_eq!(
done.insert(node),
None,
"Node appears more than once in tree"
);
let mut lower = lkey;
match self[node] {
NodeData::Inner { size, keys, tree } => {
let size = size as usize;
let capacity = tree.len();
let keys = &keys[0..size];
// Verify occupancy.
// Right-most nodes can be small, but others must be at least half full.
assert!(
rkey.is_none() || (size + 1) * 2 >= capacity,
"Only {}/{} entries in {}:{}, upper={}",
size + 1,
capacity,
node,
self[node],
rkey.unwrap()
);
// Queue up the sub-trees, checking for duplicates.
for i in 0..size + 1 {
// Get an upper bound for node[i].
let upper = keys.get(i).cloned().or(rkey);
// Check that keys are strictly monotonic.
if let (Some(a), Some(b)) = (lower, upper) {
assert_eq!(
comp.cmp(a, b),
Ordering::Less,
"Key order {} < {} failed in {}: {}",
a,
b,
node,
self[node]
);
}
// Queue up the sub-tree.
todo.push((lower, tree[i], upper));
// Set a lower bound for the next tree.
lower = upper;
}
}
NodeData::Leaf { size, keys, .. } => {
let size = size as usize;
let capacity = keys.borrow().len();
let keys = &keys.borrow()[0..size];
// Verify occupancy.
// Right-most nodes can be small, but others must be at least half full.
assert!(size > 0, "Leaf {} is empty", node);
assert!(
rkey.is_none() || size * 2 >= capacity,
"Only {}/{} entries in {}:{}, upper={}",
size,
capacity,
node,
self[node],
rkey.unwrap()
);
for i in 0..size + 1 {
let upper = keys.get(i).cloned().or(rkey);
// Check that keys are strictly monotonic.
if let (Some(a), Some(b)) = (lower, upper) {
let wanted = if i == 0 {
Ordering::Equal
} else {
Ordering::Less
};
assert_eq!(
comp.cmp(a, b),
wanted,
"Key order for {} - {} failed in {}: {}",
a,
b,
node,
self[node]
);
}
// Set a lower bound for the next key.
lower = upper;
}
}
NodeData::Free { .. } => panic!("Free {} reached", node),
}
}
}
}
impl<F: Forest> Index<Node> for NodePool<F> {
type Output = NodeData<F>;
fn index(&self, index: Node) -> &Self::Output {
self.nodes.index(index)
}
}
impl<F: Forest> IndexMut<Node> for NodePool<F> {
fn index_mut(&mut self, index: Node) -> &mut Self::Output {
self.nodes.index_mut(index)
}
}

View File

@@ -0,0 +1,593 @@
//! Forest of sets.
use super::{Comparator, Forest, Node, NodeData, NodePool, Path, SetValue, INNER_SIZE};
use packed_option::PackedOption;
use std::marker::PhantomData;
/// Tag type defining forest types for a set.
struct SetTypes<K, C>(PhantomData<(K, C)>);
impl<K, C> Forest for SetTypes<K, C>
where
K: Copy,
C: Comparator<K>,
{
type Key = K;
type Value = SetValue;
type LeafKeys = [K; 2 * INNER_SIZE - 1];
type LeafValues = [SetValue; 2 * INNER_SIZE - 1];
type Comparator = C;
fn splat_key(key: Self::Key) -> Self::LeafKeys {
[key; 2 * INNER_SIZE - 1]
}
fn splat_value(value: Self::Value) -> Self::LeafValues {
[value; 2 * INNER_SIZE - 1]
}
}
/// Memory pool for a forest of `Set` instances.
pub struct SetForest<K, C>
where
K: Copy,
C: Comparator<K>,
{
nodes: NodePool<SetTypes<K, C>>,
}
impl<K, C> SetForest<K, C>
where
K: Copy,
C: Comparator<K>,
{
/// Create a new empty forest.
pub fn new() -> SetForest<K, C> {
SetForest { nodes: NodePool::new() }
}
/// Clear all sets in the forest.
///
/// All `Set` instances belong to this forest are invalidated and should no longer be used.
pub fn clear(&mut self) {
self.nodes.clear();
}
}
/// B-tree representing an ordered set of `K`s using `C` for comparing elements.
///
/// This is not a general-purpose replacement for `BTreeSet`. See the [module
/// documentation](index.html) for more information about design tradeoffs.
///
/// Sets can be cloned, but that operation should only be used as part of cloning the whole forest
/// they belong to. *Cloning a set does not allocate new memory for the clone*. It creates an alias
/// of the same memory.
#[derive(Clone)]
pub struct Set<K, C>
where
K: Copy,
C: Comparator<K>,
{
root: PackedOption<Node>,
unused: PhantomData<(K, C)>,
}
impl<K, C> Set<K, C>
where
K: Copy,
C: Comparator<K>,
{
/// Make an empty set.
pub fn new() -> Set<K, C> {
Set {
root: None.into(),
unused: PhantomData,
}
}
/// Is this an empty set?
pub fn is_empty(&self) -> bool {
self.root.is_none()
}
/// Does the set contain `key`?.
pub fn contains(&self, key: K, forest: &SetForest<K, C>, comp: &C) -> bool {
self.root
.expand()
.and_then(|root| Path::default().find(key, root, &forest.nodes, comp))
.is_some()
}
/// Try to insert `key` into the set.
///
/// If the set did not contain `key`, insert it and return true.
///
/// If `key` is already present, don't change the set and return false.
pub fn insert(&mut self, key: K, forest: &mut SetForest<K, C>, comp: &C) -> bool {
self.cursor(forest, comp).insert(key)
}
/// Remove `key` from the set and return true.
///
/// If `key` was not present in the set, return false.
pub fn remove(&mut self, key: K, forest: &mut SetForest<K, C>, comp: &C) -> bool {
let mut c = self.cursor(forest, comp);
if c.goto(key) {
c.remove();
true
} else {
false
}
}
/// Remove all entries.
pub fn clear(&mut self, forest: &mut SetForest<K, C>) {
if let Some(root) = self.root.take() {
forest.nodes.free_tree(root);
}
}
/// Retains only the elements specified by the predicate.
///
/// Remove all elements where the predicate returns false.
pub fn retain<F>(&mut self, forest: &mut SetForest<K, C>, mut predicate: F)
where
F: FnMut(K) -> bool,
{
let mut path = Path::default();
if let Some(root) = self.root.expand() {
path.first(root, &forest.nodes);
}
while let Some((node, entry)) = path.leaf_pos() {
if predicate(forest.nodes[node].unwrap_leaf().0[entry]) {
path.next(&forest.nodes);
} else {
self.root = path.remove(&mut forest.nodes).into();
}
}
}
/// Create a cursor for navigating this set. The cursor is initially positioned off the end of
/// the set.
pub fn cursor<'a>(
&'a mut self,
forest: &'a mut SetForest<K, C>,
comp: &'a C,
) -> SetCursor<'a, K, C> {
SetCursor::new(self, forest, comp)
}
/// Create an iterator traversing this set. The iterator type is `K`.
pub fn iter<'a>(&'a self, forest: &'a SetForest<K, C>) -> SetIter<'a, K, C> {
SetIter {
root: self.root,
pool: &forest.nodes,
path: Path::default(),
}
}
}
impl<K, C> Default for Set<K, C>
where
K: Copy,
C: Comparator<K>,
{
fn default() -> Self {
Self::new()
}
}
/// A position in a `Set` used to navigate and modify the ordered set.
///
/// A cursor always points at an element in the set, or "off the end" which is a position after the
/// last element in the set.
pub struct SetCursor<'a, K, C>
where
K: 'a + Copy,
C: 'a + Comparator<K>,
{
root: &'a mut PackedOption<Node>,
pool: &'a mut NodePool<SetTypes<K, C>>,
comp: &'a C,
path: Path<SetTypes<K, C>>,
}
impl<'a, K, C> SetCursor<'a, K, C>
where
K: Copy,
C: Comparator<K>,
{
/// Create a cursor with a default (invalid) location.
fn new(
container: &'a mut Set<K, C>,
forest: &'a mut SetForest<K, C>,
comp: &'a C,
) -> SetCursor<'a, K, C> {
SetCursor {
root: &mut container.root,
pool: &mut forest.nodes,
comp,
path: Path::default(),
}
}
/// Is this cursor pointing to an empty set?
pub fn is_empty(&self) -> bool {
self.root.is_none()
}
/// Move cursor to the next element and return it.
///
/// If the cursor reaches the end, return `None` and leave the cursor at the off-the-end
/// position.
pub fn next(&mut self) -> Option<K> {
self.path.next(self.pool).map(|(k, _)| k)
}
/// Move cursor to the previous element and return it.
///
/// If the cursor is already pointing at the first element, leave it there and return `None`.
pub fn prev(&mut self) -> Option<K> {
self.root.expand().and_then(|root| {
self.path.prev(root, self.pool).map(|(k, _)| k)
})
}
/// Get the current element, or `None` if the cursor is at the end.
pub fn elem(&self) -> Option<K> {
self.path.leaf_pos().and_then(|(node, entry)| {
self.pool[node].unwrap_leaf().0.get(entry).cloned()
})
}
/// Move this cursor to `elem`.
///
/// If `elem` is in the set, place the cursor at `elem` and return true.
///
/// If `elem` is not in the set, place the cursor at the next larger element (or the end) and
/// return false.
pub fn goto(&mut self, elem: K) -> bool {
match self.root.expand() {
None => false,
Some(root) => {
if self.path.find(elem, root, self.pool, self.comp).is_some() {
true
} else {
self.path.normalize(self.pool);
false
}
}
}
}
/// Move this cursor to the first element.
pub fn goto_first(&mut self) -> Option<K> {
self.root.map(|root| self.path.first(root, self.pool).0)
}
/// Try to insert `elem` into the set and leave the cursor at the inserted element.
///
/// If the set did not contain `elem`, insert it and return true.
///
/// If `elem` is already present, don't change the set, place the cursor at `goto(elem)`, and
/// return false.
pub fn insert(&mut self, elem: K) -> bool {
match self.root.expand() {
None => {
let root = self.pool.alloc_node(NodeData::leaf(elem, SetValue()));
*self.root = root.into();
self.path.set_root_node(root);
true
}
Some(root) => {
// TODO: Optimize the case where `self.path` is already at the correct insert pos.
if self.path.find(elem, root, self.pool, self.comp).is_none() {
*self.root = self.path.insert(elem, SetValue(), self.pool).into();
true
} else {
false
}
}
}
}
/// Remove the current element (if any) and return it.
/// This advances the cursor to the next element after the removed one.
pub fn remove(&mut self) -> Option<K> {
let elem = self.elem();
if elem.is_some() {
*self.root = self.path.remove(self.pool).into();
}
elem
}
}
#[cfg(test)]
impl<'a, K, C> SetCursor<'a, K, C>
where
K: Copy + ::std::fmt::Display,
C: Comparator<K>,
{
fn verify(&self) {
self.path.verify(self.pool);
self.root.map(|root| self.pool.verify_tree(root, self.comp));
}
/// Get a text version of the path to the current position.
fn tpath(&self) -> String {
self.path.to_string()
}
}
/// An iterator visiting the elements of a `Set`.
pub struct SetIter<'a, K, C>
where
K: 'a + Copy,
C: 'a + Comparator<K>,
{
root: PackedOption<Node>,
pool: &'a NodePool<SetTypes<K, C>>,
path: Path<SetTypes<K, C>>,
}
impl<'a, K, C> Iterator for SetIter<'a, K, C>
where
K: 'a + Copy,
C: 'a + Comparator<K>,
{
type Item = K;
fn next(&mut self) -> Option<Self::Item> {
// We use `self.root` to indicate if we need to go to the first element. Reset to `None`
// once we've returned the first element. This also works for an empty tree since the
// `path.next()` call returns `None` when the path is empty. This also fuses the iterator.
match self.root.take() {
Some(root) => Some(self.path.first(root, self.pool).0),
None => self.path.next(self.pool).map(|(k, _)| k),
}
}
}
#[cfg(test)]
mod test {
use super::super::NodeData;
use super::*;
use std::mem;
use std::vec::Vec;
#[test]
fn node_size() {
// check that nodes are cache line sized when keys are 32 bits.
type F = SetTypes<u32, ()>;
assert_eq!(mem::size_of::<NodeData<F>>(), 64);
}
#[test]
fn empty() {
let mut f = SetForest::<u32, ()>::new();
f.clear();
let mut s = Set::<u32, ()>::new();
assert!(s.is_empty());
s.clear(&mut f);
assert!(!s.contains(7, &f, &()));
// Iterator for an empty set.
assert_eq!(s.iter(&f).next(), None);
s.retain(&mut f, |_| unreachable!());
let mut c = SetCursor::new(&mut s, &mut f, &());
c.verify();
assert_eq!(c.elem(), None);
assert_eq!(c.goto_first(), None);
assert_eq!(c.tpath(), "<empty path>");
}
#[test]
fn simple_cursor() {
let mut f = SetForest::<u32, ()>::new();
let mut s = Set::<u32, ()>::new();
let mut c = SetCursor::new(&mut s, &mut f, &());
assert!(c.insert(50));
c.verify();
assert_eq!(c.elem(), Some(50));
assert!(c.insert(100));
c.verify();
assert_eq!(c.elem(), Some(100));
assert!(c.insert(10));
c.verify();
assert_eq!(c.elem(), Some(10));
// Basic movement.
assert_eq!(c.next(), Some(50));
assert_eq!(c.next(), Some(100));
assert_eq!(c.next(), None);
assert_eq!(c.next(), None);
assert_eq!(c.prev(), Some(100));
assert_eq!(c.prev(), Some(50));
assert_eq!(c.prev(), Some(10));
assert_eq!(c.prev(), None);
assert_eq!(c.prev(), None);
assert!(c.goto(50));
assert_eq!(c.elem(), Some(50));
assert_eq!(c.remove(), Some(50));
c.verify();
assert_eq!(c.elem(), Some(100));
assert_eq!(c.remove(), Some(100));
c.verify();
assert_eq!(c.elem(), None);
assert_eq!(c.remove(), None);
c.verify();
}
#[test]
fn two_level_sparse_tree() {
let mut f = SetForest::<u32, ()>::new();
let mut s = Set::<u32, ()>::new();
let mut c = SetCursor::new(&mut s, &mut f, &());
// Insert enough elements that we get a two-level tree.
// Each leaf node holds 8 elements
assert!(c.is_empty());
for i in 0..50 {
assert!(c.insert(i));
assert_eq!(c.elem(), Some(i));
}
assert!(!c.is_empty());
assert_eq!(c.goto_first(), Some(0));
assert_eq!(c.tpath(), "node2[0]--node0[0]");
assert_eq!(c.prev(), None);
for i in 1..50 {
assert_eq!(c.next(), Some(i));
}
assert_eq!(c.next(), None);
for i in (0..50).rev() {
assert_eq!(c.prev(), Some(i));
}
assert_eq!(c.prev(), None);
assert!(c.goto(25));
for i in 25..50 {
assert_eq!(c.remove(), Some(i));
assert!(!c.is_empty());
c.verify();
}
for i in (0..25).rev() {
assert!(!c.is_empty());
assert_eq!(c.elem(), None);
assert_eq!(c.prev(), Some(i));
assert_eq!(c.remove(), Some(i));
c.verify();
}
assert_eq!(c.elem(), None);
assert!(c.is_empty());
}
#[test]
fn three_level_sparse_tree() {
let mut f = SetForest::<u32, ()>::new();
let mut s = Set::<u32, ()>::new();
let mut c = SetCursor::new(&mut s, &mut f, &());
// Insert enough elements that we get a 3-level tree.
// Each leaf node holds 8 elements when filled up sequentially.
// Inner nodes hold 8 node pointers.
assert!(c.is_empty());
for i in 0..150 {
assert!(c.insert(i));
assert_eq!(c.elem(), Some(i));
}
assert!(!c.is_empty());
assert!(c.goto(0));
assert_eq!(c.tpath(), "node11[0]--node2[0]--node0[0]");
assert_eq!(c.prev(), None);
for i in 1..150 {
assert_eq!(c.next(), Some(i));
}
assert_eq!(c.next(), None);
for i in (0..150).rev() {
assert_eq!(c.prev(), Some(i));
}
assert_eq!(c.prev(), None);
assert!(c.goto(125));
for i in 125..150 {
assert_eq!(c.remove(), Some(i));
assert!(!c.is_empty());
c.verify();
}
for i in (0..125).rev() {
assert!(!c.is_empty());
assert_eq!(c.elem(), None);
assert_eq!(c.prev(), Some(i));
assert_eq!(c.remove(), Some(i));
c.verify();
}
assert_eq!(c.elem(), None);
assert!(c.is_empty());
}
// Generate a densely populated 4-level tree.
//
// Level 1: 1 root
// Level 2: 8 inner
// Level 3: 64 inner
// Level 4: 512 leafs, up to 7680 elements
//
// A 3-level tree can hold at most 960 elements.
fn dense4l(f: &mut SetForest<i32, ()>) -> Set<i32, ()> {
f.clear();
let mut s = Set::new();
// Insert 400 elements in 7 passes over the range to avoid the half-full leaf node pattern
// that comes from sequential insertion. This will generate a normal leaf layer.
for n in 0..4000 {
assert!(s.insert((n * 7) % 4000, f, &()));
}
s
}
#[test]
fn four_level() {
let mut f = SetForest::<i32, ()>::new();
let mut s = dense4l(&mut f);
assert_eq!(
s.iter(&f).collect::<Vec<_>>()[0..10],
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
);
let mut c = s.cursor(&mut f, &());
c.verify();
// Peel off a whole sub-tree of the root by deleting from the front.
// The 900 element is near the front of the second sub-tree.
assert!(c.goto(900));
assert_eq!(c.tpath(), "node48[1]--node47[0]--node26[0]--node20[4]");
assert!(c.goto(0));
for i in 0..900 {
assert!(!c.is_empty());
assert_eq!(c.remove(), Some(i));
}
c.verify();
assert_eq!(c.elem(), Some(900));
// Delete backwards from somewhere in the middle.
assert!(c.goto(3000));
for i in (2000..3000).rev() {
assert_eq!(c.prev(), Some(i));
assert_eq!(c.remove(), Some(i));
assert_eq!(c.elem(), Some(3000));
}
c.verify();
// Remove everything in a scattered manner, triggering many collapsing patterns.
for i in 0..4000 {
if c.goto((i * 7) % 4000) {
c.remove();
}
}
assert!(c.is_empty());
}
#[test]
fn four_level_clear() {
let mut f = SetForest::<i32, ()>::new();
let mut s = dense4l(&mut f);
s.clear(&mut f);
}
}