moved crates in lib/ to src/, renamed crates, modified some files' text (#660)

moved crates in lib/ to src/, renamed crates, modified some files' text (#660)
This commit is contained in:
lazypassion
2019-01-28 18:56:54 -05:00
committed by Dan Gohman
parent 54959cf5bb
commit 747ad3c4c5
508 changed files with 94 additions and 92 deletions

View File

@@ -0,0 +1,24 @@
[package]
authors = ["The Cranelift Project Developers"]
name = "cranelift-bforest"
version = "0.28.0"
description = "A forest of B+-trees"
license = "Apache-2.0 WITH LLVM-exception"
documentation = "https://cranelift.readthedocs.io/"
repository = "https://github.com/CraneStation/cranelift"
categories = ["no-std"]
readme = "README.md"
keywords = ["btree", "forest", "set", "map"]
edition = "2018"
[dependencies]
cranelift-entity = { path = "../cranelift-entity", version = "0.28.0", default-features = false }
[features]
default = ["std"]
std = ["cranelift-entity/std"]
core = []
[badges]
maintenance = { status = "experimental" }
travis-ci = { repository = "CraneStation/cranelift" }

220
cranelift/bforest/LICENSE Normal file
View File

@@ -0,0 +1,220 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
--- LLVM Exceptions to the Apache 2.0 License ----
As an exception, if, as a result of your compiling your source code, portions
of this Software are embedded into an Object form of such source code, you
may redistribute such embedded portions in such Object form without complying
with the conditions of Sections 4(a), 4(b) and 4(d) of the License.
In addition, if you combine or link compiled forms of this Software with
software that is licensed under the GPLv2 ("Combined Software") and if a
court of competent jurisdiction determines that the patent provision (Section
3), the indemnity provision (Section 9) or other Section of the License
conflicts with the conditions of the GPLv2, you may retroactively and
prospectively choose to deem waived or otherwise exclude such Section(s) of
the License, but only in their entirety and only with respect to the Combined
Software.

View File

@@ -0,0 +1,12 @@
This crate contains array-based data structures used by the core Cranelift code
generator which represent a set of small ordered sets or maps.
**These are not general purpose data structures that are somehow magically faster that the
standard library's `BTreeSet` and `BTreeMap` types.**
The tradeoffs are different:
- Keys and values are expected to be small and copyable. We optimize for 32-bit types.
- A comparator object is used to compare keys, allowing smaller "context free" keys.
- Empty trees have a very small 32-bit footprint.
- All the trees in a forest can be cleared in constant time.

View File

@@ -0,0 +1,207 @@
//! A forest of B+-trees.
//!
//! This crate provides a data structures representing a set of small ordered sets or maps.
//! It is implemented as a forest of B+-trees all allocating nodes out of the same pool.
//!
//! **These are not general purpose data structures that are somehow magically faster that the
//! standard library's `BTreeSet` and `BTreeMap` types.**
//!
//! The tradeoffs are different:
//!
//! - Keys and values are expected to be small and copyable. We optimize for 32-bit types.
//! - A comparator object is used to compare keys, allowing smaller "context free" keys.
//! - Empty trees have a very small 32-bit footprint.
//! - All the trees in a forest can be cleared in constant time.
#![deny(missing_docs, trivial_numeric_casts, unused_extern_crates)]
#![warn(unused_import_braces)]
#![cfg_attr(feature = "std", warn(unstable_features))]
#![cfg_attr(feature = "clippy", plugin(clippy(conf_file = "../../clippy.toml")))]
#![cfg_attr(feature = "cargo-clippy", allow(clippy::new_without_default))]
#![cfg_attr(
feature = "cargo-clippy",
warn(
clippy::float_arithmetic,
clippy::mut_mut,
clippy::nonminimal_bool,
clippy::option_map_unwrap_or,
clippy::option_map_unwrap_or_else,
clippy::print_stdout,
clippy::unicode_not_nfc,
clippy::use_self
)
)]
#![no_std]
#![cfg_attr(not(feature = "std"), feature(alloc))]
#[cfg(test)]
#[cfg(not(feature = "std"))]
#[macro_use]
extern crate alloc as std;
#[cfg(test)]
#[cfg(feature = "std")]
#[macro_use]
extern crate std;
#[macro_use]
extern crate cranelift_entity as entity;
use crate::entity::packed_option;
use core::borrow::BorrowMut;
use core::cmp::Ordering;
mod map;
mod node;
mod path;
mod pool;
mod set;
pub use self::map::{Map, MapCursor, MapForest, MapIter};
pub use self::set::{Set, SetCursor, SetForest, SetIter};
use self::node::NodeData;
use self::path::Path;
use self::pool::NodePool;
/// The maximum branching factor of an inner node in a B+-tree.
/// The minimum number of outgoing edges is `INNER_SIZE/2`.
const INNER_SIZE: usize = 8;
/// Given the worst case branching factor of `INNER_SIZE/2` = 4, this is the
/// worst case path length from the root node to a leaf node in a tree with 2^32
/// entries. We would run out of node references before we hit `MAX_PATH`.
const MAX_PATH: usize = 16;
/// Key comparator.
///
/// Keys don't need to implement `Ord`. They are compared using a comparator object which
/// provides a context for comparison.
pub trait Comparator<K>
where
K: Copy,
{
/// Compare keys `a` and `b`.
///
/// This relation must provide a total ordering or the key space.
fn cmp(&self, a: K, b: K) -> Ordering;
/// Binary search for `k` in an ordered slice.
///
/// Assume that `s` is already sorted according to this ordering, search for the key `k`.
///
/// Returns `Ok(idx)` if `k` was found in the slice or `Err(idx)` with the position where it
/// should be inserted to preserve the ordering.
fn search(&self, k: K, s: &[K]) -> Result<usize, usize> {
s.binary_search_by(|x| self.cmp(*x, k))
}
}
/// Trivial comparator that doesn't actually provide any context.
impl<K> Comparator<K> for ()
where
K: Copy + Ord,
{
fn cmp(&self, a: K, b: K) -> Ordering {
a.cmp(&b)
}
}
/// Family of types shared by the map and set forest implementations.
trait Forest {
/// The key type is present for both sets and maps.
type Key: Copy;
/// The value type is `()` for sets.
type Value: Copy;
/// An array of keys for the leaf nodes.
type LeafKeys: Copy + BorrowMut<[Self::Key]>;
/// An array of values for the leaf nodes.
type LeafValues: Copy + BorrowMut<[Self::Value]>;
/// Splat a single key into a whole array.
fn splat_key(key: Self::Key) -> Self::LeafKeys;
/// Splat a single value inst a whole array
fn splat_value(value: Self::Value) -> Self::LeafValues;
}
/// A reference to a B+-tree node.
#[derive(Clone, Copy, PartialEq, Eq)]
struct Node(u32);
entity_impl!(Node, "node");
/// Empty type to be used as the "value" in B-trees representing sets.
#[derive(Clone, Copy)]
struct SetValue();
/// Insert `x` into `s` at position `i`, pushing out the last element.
fn slice_insert<T: Copy>(s: &mut [T], i: usize, x: T) {
for j in (i + 1..s.len()).rev() {
s[j] = s[j - 1];
}
s[i] = x;
}
/// Shift elements in `s` to the left by `n` positions.
fn slice_shift<T: Copy>(s: &mut [T], n: usize) {
for j in 0..s.len() - n {
s[j] = s[j + n];
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::entity::EntityRef;
/// An opaque reference to an extended basic block in a function.
#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct Ebb(u32);
entity_impl!(Ebb, "ebb");
#[test]
fn comparator() {
let ebb1 = Ebb::new(1);
let ebb2 = Ebb::new(2);
let ebb3 = Ebb::new(3);
let ebb4 = Ebb::new(4);
let vals = [ebb1, ebb2, ebb4];
let comp = ();
assert_eq!(comp.search(ebb1, &vals), Ok(0));
assert_eq!(comp.search(ebb3, &vals), Err(2));
assert_eq!(comp.search(ebb4, &vals), Ok(2));
}
#[test]
fn slice_insertion() {
let mut a = ['a', 'b', 'c', 'd'];
slice_insert(&mut a[0..1], 0, 'e');
assert_eq!(a, ['e', 'b', 'c', 'd']);
slice_insert(&mut a, 0, 'a');
assert_eq!(a, ['a', 'e', 'b', 'c']);
slice_insert(&mut a, 3, 'g');
assert_eq!(a, ['a', 'e', 'b', 'g']);
slice_insert(&mut a, 1, 'h');
assert_eq!(a, ['a', 'h', 'e', 'b']);
}
#[test]
fn slice_shifting() {
let mut a = ['a', 'b', 'c', 'd'];
slice_shift(&mut a[0..1], 1);
assert_eq!(a, ['a', 'b', 'c', 'd']);
slice_shift(&mut a[1..], 1);
assert_eq!(a, ['a', 'c', 'd', 'd']);
slice_shift(&mut a, 2);
assert_eq!(a, ['d', 'd', 'd', 'd']);
}
}

View File

@@ -0,0 +1,923 @@
//! Forest of maps.
use super::{Comparator, Forest, Node, NodeData, NodePool, Path, INNER_SIZE};
use crate::packed_option::PackedOption;
#[cfg(test)]
use core::fmt;
use core::marker::PhantomData;
#[cfg(test)]
use std::string::String;
/// Tag type defining forest types for a map.
struct MapTypes<K, V>(PhantomData<(K, V)>);
impl<K, V> Forest for MapTypes<K, V>
where
K: Copy,
V: Copy,
{
type Key = K;
type Value = V;
type LeafKeys = [K; INNER_SIZE - 1];
type LeafValues = [V; INNER_SIZE - 1];
fn splat_key(key: Self::Key) -> Self::LeafKeys {
[key; INNER_SIZE - 1]
}
fn splat_value(value: Self::Value) -> Self::LeafValues {
[value; INNER_SIZE - 1]
}
}
/// Memory pool for a forest of `Map` instances.
pub struct MapForest<K, V>
where
K: Copy,
V: Copy,
{
nodes: NodePool<MapTypes<K, V>>,
}
impl<K, V> MapForest<K, V>
where
K: Copy,
V: Copy,
{
/// Create a new empty forest.
pub fn new() -> Self {
Self {
nodes: NodePool::new(),
}
}
/// Clear all maps in the forest.
///
/// All `Map` instances belong to this forest are invalidated and should no longer be used.
pub fn clear(&mut self) {
self.nodes.clear();
}
}
/// B-tree mapping from `K` to `V`.
///
/// This is not a general-purpose replacement for `BTreeMap`. See the [module
/// documentation](index.html) for more information about design tradeoffs.
///
/// Maps can be cloned, but that operation should only be used as part of cloning the whole forest
/// they belong to. *Cloning a map does not allocate new memory for the clone*. It creates an alias
/// of the same memory.
#[derive(Clone)]
pub struct Map<K, V>
where
K: Copy,
V: Copy,
{
root: PackedOption<Node>,
unused: PhantomData<(K, V)>,
}
impl<K, V> Map<K, V>
where
K: Copy,
V: Copy,
{
/// Make an empty map.
pub fn new() -> Self {
Self {
root: None.into(),
unused: PhantomData,
}
}
/// Is this an empty map?
pub fn is_empty(&self) -> bool {
self.root.is_none()
}
/// Get the value stored for `key`.
pub fn get<C: Comparator<K>>(&self, key: K, forest: &MapForest<K, V>, comp: &C) -> Option<V> {
self.root
.expand()
.and_then(|root| Path::default().find(key, root, &forest.nodes, comp))
}
/// Look up the value stored for `key`.
///
/// If it exists, return the stored key-value pair.
///
/// Otherwise, return the last key-value pair with a key that is less than or equal to `key`.
///
/// If no stored keys are less than or equal to `key`, return `None`.
pub fn get_or_less<C: Comparator<K>>(
&self,
key: K,
forest: &MapForest<K, V>,
comp: &C,
) -> Option<(K, V)> {
self.root.expand().and_then(|root| {
let mut path = Path::default();
match path.find(key, root, &forest.nodes, comp) {
Some(v) => Some((key, v)),
None => path.prev(root, &forest.nodes),
}
})
}
/// Insert `key, value` into the map and return the old value stored for `key`, if any.
pub fn insert<C: Comparator<K>>(
&mut self,
key: K,
value: V,
forest: &mut MapForest<K, V>,
comp: &C,
) -> Option<V> {
self.cursor(forest, comp).insert(key, value)
}
/// Remove `key` from the map and return the removed value for `key`, if any.
pub fn remove<C: Comparator<K>>(
&mut self,
key: K,
forest: &mut MapForest<K, V>,
comp: &C,
) -> Option<V> {
let mut c = self.cursor(forest, comp);
if c.goto(key).is_some() {
c.remove()
} else {
None
}
}
/// Remove all entries.
pub fn clear(&mut self, forest: &mut MapForest<K, V>) {
if let Some(root) = self.root.take() {
forest.nodes.free_tree(root);
}
}
/// Retains only the elements specified by the predicate.
///
/// Remove all key-value pairs where the predicate returns false.
///
/// The predicate is allowed to update the values stored in the map.
pub fn retain<F>(&mut self, forest: &mut MapForest<K, V>, mut predicate: F)
where
F: FnMut(K, &mut V) -> bool,
{
let mut path = Path::default();
if let Some(root) = self.root.expand() {
path.first(root, &forest.nodes);
}
while let Some((node, entry)) = path.leaf_pos() {
let keep = {
let (ks, vs) = forest.nodes[node].unwrap_leaf_mut();
predicate(ks[entry], &mut vs[entry])
};
if keep {
path.next(&forest.nodes);
} else {
self.root = path.remove(&mut forest.nodes).into();
}
}
}
/// Create a cursor for navigating this map. The cursor is initially positioned off the end of
/// the map.
pub fn cursor<'a, C: Comparator<K>>(
&'a mut self,
forest: &'a mut MapForest<K, V>,
comp: &'a C,
) -> MapCursor<'a, K, V, C> {
MapCursor::new(self, forest, comp)
}
/// Create an iterator traversing this map. The iterator type is `(K, V)`.
pub fn iter<'a>(&'a self, forest: &'a MapForest<K, V>) -> MapIter<'a, K, V> {
MapIter {
root: self.root,
pool: &forest.nodes,
path: Path::default(),
}
}
}
impl<K, V> Default for Map<K, V>
where
K: Copy,
V: Copy,
{
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
impl<K, V> Map<K, V>
where
K: Copy + fmt::Display,
V: Copy,
{
/// Verify consistency.
fn verify<C: Comparator<K>>(&self, forest: &MapForest<K, V>, comp: &C)
where
NodeData<MapTypes<K, V>>: fmt::Display,
{
if let Some(root) = self.root.expand() {
forest.nodes.verify_tree(root, comp);
}
}
/// Get a text version of the path to `key`.
fn tpath<C: Comparator<K>>(&self, key: K, forest: &MapForest<K, V>, comp: &C) -> String {
use std::string::ToString;
match self.root.expand() {
None => "map(empty)".to_string(),
Some(root) => {
let mut path = Path::default();
path.find(key, root, &forest.nodes, comp);
path.to_string()
}
}
}
}
/// A position in a `Map` used to navigate and modify the ordered map.
///
/// A cursor always points at a key-value pair in the map, or "off the end" which is a position
/// after the last entry in the map.
pub struct MapCursor<'a, K, V, C>
where
K: 'a + Copy,
V: 'a + Copy,
C: 'a + Comparator<K>,
{
root: &'a mut PackedOption<Node>,
pool: &'a mut NodePool<MapTypes<K, V>>,
comp: &'a C,
path: Path<MapTypes<K, V>>,
}
impl<'a, K, V, C> MapCursor<'a, K, V, C>
where
K: Copy,
V: Copy,
C: Comparator<K>,
{
/// Create a cursor with a default (off-the-end) location.
fn new(container: &'a mut Map<K, V>, forest: &'a mut MapForest<K, V>, comp: &'a C) -> Self {
Self {
root: &mut container.root,
pool: &mut forest.nodes,
comp,
path: Path::default(),
}
}
/// Is this cursor pointing to an empty map?
pub fn is_empty(&self) -> bool {
self.root.is_none()
}
/// Move cursor to the next key-value pair and return it.
///
/// If the cursor reaches the end, return `None` and leave the cursor at the off-the-end
/// position.
#[cfg_attr(feature = "cargo-clippy", allow(clippy::should_implement_trait))]
pub fn next(&mut self) -> Option<(K, V)> {
self.path.next(self.pool)
}
/// Move cursor to the previous key-value pair and return it.
///
/// If the cursor is already pointing at the first entry, leave it there and return `None`.
pub fn prev(&mut self) -> Option<(K, V)> {
self.root
.expand()
.and_then(|root| self.path.prev(root, self.pool))
}
/// Get the current key, or `None` if the cursor is at the end.
pub fn key(&self) -> Option<K> {
self.path
.leaf_pos()
.and_then(|(node, entry)| self.pool[node].unwrap_leaf().0.get(entry).cloned())
}
/// Get the current value, or `None` if the cursor is at the end.
pub fn value(&self) -> Option<V> {
self.path
.leaf_pos()
.and_then(|(node, entry)| self.pool[node].unwrap_leaf().1.get(entry).cloned())
}
/// Get a mutable reference to the current value, or `None` if the cursor is at the end.
pub fn value_mut(&mut self) -> Option<&mut V> {
self.path
.leaf_pos()
.and_then(move |(node, entry)| self.pool[node].unwrap_leaf_mut().1.get_mut(entry))
}
/// Move this cursor to `key`.
///
/// If `key` is in the map, place the cursor at `key` and return the corresponding value.
///
/// If `key` is not in the set, place the cursor at the next larger element (or the end) and
/// return `None`.
pub fn goto(&mut self, elem: K) -> Option<V> {
self.root.expand().and_then(|root| {
let v = self.path.find(elem, root, self.pool, self.comp);
if v.is_none() {
self.path.normalize(self.pool);
}
v
})
}
/// Move this cursor to the first element.
pub fn goto_first(&mut self) -> Option<V> {
self.root.map(|root| self.path.first(root, self.pool).1)
}
/// Insert `(key, value))` into the map and leave the cursor at the inserted pair.
///
/// If the map did not contain `key`, return `None`.
///
/// If `key` is already present, replace the existing with `value` and return the old value.
pub fn insert(&mut self, key: K, value: V) -> Option<V> {
match self.root.expand() {
None => {
let root = self.pool.alloc_node(NodeData::leaf(key, value));
*self.root = root.into();
self.path.set_root_node(root);
None
}
Some(root) => {
// TODO: Optimize the case where `self.path` is already at the correct insert pos.
let old = self.path.find(key, root, self.pool, self.comp);
if old.is_some() {
*self.path.value_mut(self.pool) = value;
} else {
*self.root = self.path.insert(key, value, self.pool).into();
}
old
}
}
}
/// Remove the current entry (if any) and return the mapped value.
/// This advances the cursor to the next entry after the removed one.
pub fn remove(&mut self) -> Option<V> {
let value = self.value();
if value.is_some() {
*self.root = self.path.remove(self.pool).into();
}
value
}
}
/// An iterator visiting the key-value pairs of a `Map`.
pub struct MapIter<'a, K, V>
where
K: 'a + Copy,
V: 'a + Copy,
{
root: PackedOption<Node>,
pool: &'a NodePool<MapTypes<K, V>>,
path: Path<MapTypes<K, V>>,
}
impl<'a, K, V> Iterator for MapIter<'a, K, V>
where
K: 'a + Copy,
V: 'a + Copy,
{
type Item = (K, V);
fn next(&mut self) -> Option<Self::Item> {
// We use `self.root` to indicate if we need to go to the first element. Reset to `None`
// once we've returned the first element. This also works for an empty tree since the
// `path.next()` call returns `None` when the path is empty. This also fuses the iterator.
match self.root.take() {
Some(root) => Some(self.path.first(root, self.pool)),
None => self.path.next(self.pool),
}
}
}
#[cfg(test)]
impl<'a, K, V, C> MapCursor<'a, K, V, C>
where
K: Copy + fmt::Display,
V: Copy + fmt::Display,
C: Comparator<K>,
{
fn verify(&self) {
self.path.verify(self.pool);
self.root.map(|root| self.pool.verify_tree(root, self.comp));
}
/// Get a text version of the path to the current position.
fn tpath(&self) -> String {
use std::string::ToString;
self.path.to_string()
}
}
#[cfg(test)]
mod tests {
use super::super::NodeData;
use super::*;
use core::mem;
use std::vec::Vec;
#[test]
fn node_size() {
// check that nodes are cache line sized when keys and values are 32 bits.
type F = MapTypes<u32, u32>;
assert_eq!(mem::size_of::<NodeData<F>>(), 64);
}
#[test]
fn empty() {
let mut f = MapForest::<u32, f32>::new();
f.clear();
let mut m = Map::<u32, f32>::new();
assert!(m.is_empty());
m.clear(&mut f);
assert_eq!(m.get(7, &f, &()), None);
assert_eq!(m.iter(&f).next(), None);
assert_eq!(m.get_or_less(7, &f, &()), None);
m.retain(&mut f, |_, _| unreachable!());
let mut c = m.cursor(&mut f, &());
assert!(c.is_empty());
assert_eq!(c.key(), None);
assert_eq!(c.value(), None);
assert_eq!(c.next(), None);
assert_eq!(c.prev(), None);
c.verify();
assert_eq!(c.tpath(), "<empty path>");
assert_eq!(c.goto_first(), None);
assert_eq!(c.tpath(), "<empty path>");
}
#[test]
fn inserting() {
let f = &mut MapForest::<u32, f32>::new();
let mut m = Map::<u32, f32>::new();
// The first seven values stay in a single leaf node.
assert_eq!(m.insert(50, 5.0, f, &()), None);
assert_eq!(m.insert(50, 5.5, f, &()), Some(5.0));
assert_eq!(m.insert(20, 2.0, f, &()), None);
assert_eq!(m.insert(80, 8.0, f, &()), None);
assert_eq!(m.insert(40, 4.0, f, &()), None);
assert_eq!(m.insert(60, 6.0, f, &()), None);
assert_eq!(m.insert(90, 9.0, f, &()), None);
assert_eq!(m.insert(200, 20.0, f, &()), None);
m.verify(f, &());
assert_eq!(
m.iter(f).collect::<Vec<_>>(),
[
(20, 2.0),
(40, 4.0),
(50, 5.5),
(60, 6.0),
(80, 8.0),
(90, 9.0),
(200, 20.0),
]
);
assert_eq!(m.get(0, f, &()), None);
assert_eq!(m.get(20, f, &()), Some(2.0));
assert_eq!(m.get(30, f, &()), None);
assert_eq!(m.get(40, f, &()), Some(4.0));
assert_eq!(m.get(50, f, &()), Some(5.5));
assert_eq!(m.get(60, f, &()), Some(6.0));
assert_eq!(m.get(70, f, &()), None);
assert_eq!(m.get(80, f, &()), Some(8.0));
assert_eq!(m.get(100, f, &()), None);
assert_eq!(m.get_or_less(0, f, &()), None);
assert_eq!(m.get_or_less(20, f, &()), Some((20, 2.0)));
assert_eq!(m.get_or_less(30, f, &()), Some((20, 2.0)));
assert_eq!(m.get_or_less(40, f, &()), Some((40, 4.0)));
assert_eq!(m.get_or_less(200, f, &()), Some((200, 20.0)));
assert_eq!(m.get_or_less(201, f, &()), Some((200, 20.0)));
{
let mut c = m.cursor(f, &());
assert_eq!(c.prev(), Some((200, 20.0)));
assert_eq!(c.prev(), Some((90, 9.0)));
assert_eq!(c.prev(), Some((80, 8.0)));
assert_eq!(c.prev(), Some((60, 6.0)));
assert_eq!(c.prev(), Some((50, 5.5)));
assert_eq!(c.prev(), Some((40, 4.0)));
assert_eq!(c.prev(), Some((20, 2.0)));
assert_eq!(c.prev(), None);
}
// Test some removals where the node stays healthy.
assert_eq!(m.tpath(50, f, &()), "node0[2]");
assert_eq!(m.tpath(80, f, &()), "node0[4]");
assert_eq!(m.tpath(200, f, &()), "node0[6]");
assert_eq!(m.remove(80, f, &()), Some(8.0));
assert_eq!(m.tpath(50, f, &()), "node0[2]");
assert_eq!(m.tpath(80, f, &()), "node0[4]");
assert_eq!(m.tpath(200, f, &()), "node0[5]");
assert_eq!(m.remove(80, f, &()), None);
m.verify(f, &());
assert_eq!(m.remove(20, f, &()), Some(2.0));
assert_eq!(m.tpath(50, f, &()), "node0[1]");
assert_eq!(m.tpath(80, f, &()), "node0[3]");
assert_eq!(m.tpath(200, f, &()), "node0[4]");
assert_eq!(m.remove(20, f, &()), None);
m.verify(f, &());
// [ 40 50 60 90 200 ]
{
let mut c = m.cursor(f, &());
assert_eq!(c.goto_first(), Some(4.0));
assert_eq!(c.key(), Some(40));
assert_eq!(c.value(), Some(4.0));
assert_eq!(c.next(), Some((50, 5.5)));
assert_eq!(c.next(), Some((60, 6.0)));
assert_eq!(c.next(), Some((90, 9.0)));
assert_eq!(c.next(), Some((200, 20.0)));
c.verify();
assert_eq!(c.next(), None);
c.verify();
}
// Removals from the root leaf node beyond underflow.
assert_eq!(m.remove(200, f, &()), Some(20.0));
assert_eq!(m.remove(40, f, &()), Some(4.0));
assert_eq!(m.remove(60, f, &()), Some(6.0));
m.verify(f, &());
assert_eq!(m.remove(50, f, &()), Some(5.5));
m.verify(f, &());
assert_eq!(m.remove(90, f, &()), Some(9.0));
m.verify(f, &());
assert!(m.is_empty());
}
#[test]
fn split_level0_leaf() {
// Various ways of splitting a full leaf node at level 0.
let f = &mut MapForest::<u32, f32>::new();
fn full_leaf(f: &mut MapForest<u32, f32>) -> Map<u32, f32> {
let mut m = Map::new();
for n in 1..8 {
m.insert(n * 10, n as f32 * 1.1, f, &());
}
m
}
// Insert at front of leaf.
let mut m = full_leaf(f);
m.insert(5, 4.2, f, &());
m.verify(f, &());
assert_eq!(m.get(5, f, &()), Some(4.2));
// Retain even entries, with altered values.
m.retain(f, |k, v| {
*v = (k / 10) as f32;
(k % 20) == 0
});
assert_eq!(
m.iter(f).collect::<Vec<_>>(),
[(20, 2.0), (40, 4.0), (60, 6.0)]
);
// Insert at back of leaf.
let mut m = full_leaf(f);
m.insert(80, 4.2, f, &());
m.verify(f, &());
assert_eq!(m.get(80, f, &()), Some(4.2));
// Insert before middle (40).
let mut m = full_leaf(f);
m.insert(35, 4.2, f, &());
m.verify(f, &());
assert_eq!(m.get(35, f, &()), Some(4.2));
// Insert after middle (40).
let mut m = full_leaf(f);
m.insert(45, 4.2, f, &());
m.verify(f, &());
assert_eq!(m.get(45, f, &()), Some(4.2));
m.clear(f);
assert!(m.is_empty());
}
#[test]
fn split_level1_leaf() {
// Various ways of splitting a full leaf node at level 1.
let f = &mut MapForest::<u32, f32>::new();
// Return a map whose root node is a full inner node, and the leaf nodes are all full
// containing:
//
// 110, 120, ..., 170
// 210, 220, ..., 270
// ...
// 810, 820, ..., 870
fn full(f: &mut MapForest<u32, f32>) -> Map<u32, f32> {
let mut m = Map::new();
// Start by inserting elements in order.
// This should leave 8 leaf nodes with 4 elements in each.
for row in 1..9 {
for col in 1..5 {
m.insert(row * 100 + col * 10, row as f32 + col as f32 * 0.1, f, &());
}
}
// Then top up the leaf nodes without splitting them.
for row in 1..9 {
for col in 5..8 {
m.insert(row * 100 + col * 10, row as f32 + col as f32 * 0.1, f, &());
}
}
m
}
let mut m = full(f);
// Verify geometry. Get get node2 as the root and leaves node0, 1, 3, ...
m.verify(f, &());
assert_eq!(m.tpath(110, f, &()), "node2[0]--node0[0]");
assert_eq!(m.tpath(140, f, &()), "node2[0]--node0[3]");
assert_eq!(m.tpath(210, f, &()), "node2[1]--node1[0]");
assert_eq!(m.tpath(270, f, &()), "node2[1]--node1[6]");
assert_eq!(m.tpath(310, f, &()), "node2[2]--node3[0]");
assert_eq!(m.tpath(810, f, &()), "node2[7]--node8[0]");
assert_eq!(m.tpath(870, f, &()), "node2[7]--node8[6]");
{
let mut c = m.cursor(f, &());
assert_eq!(c.goto_first(), Some(1.1));
assert_eq!(c.key(), Some(110));
}
// Front of first leaf.
m.insert(0, 4.2, f, &());
m.verify(f, &());
assert_eq!(m.get(0, f, &()), Some(4.2));
// First leaf split 4-4 after appending to LHS.
f.clear();
m = full(f);
m.insert(135, 4.2, f, &());
m.verify(f, &());
assert_eq!(m.get(135, f, &()), Some(4.2));
// First leaf split 4-4 after prepending to RHS.
f.clear();
m = full(f);
m.insert(145, 4.2, f, &());
m.verify(f, &());
assert_eq!(m.get(145, f, &()), Some(4.2));
// First leaf split 4-4 after appending to RHS.
f.clear();
m = full(f);
m.insert(175, 4.2, f, &());
m.verify(f, &());
assert_eq!(m.get(175, f, &()), Some(4.2));
// Left-middle leaf split, ins LHS.
f.clear();
m = full(f);
m.insert(435, 4.2, f, &());
m.verify(f, &());
assert_eq!(m.get(435, f, &()), Some(4.2));
// Left-middle leaf split, ins RHS.
f.clear();
m = full(f);
m.insert(445, 4.2, f, &());
m.verify(f, &());
assert_eq!(m.get(445, f, &()), Some(4.2));
// Right-middle leaf split, ins LHS.
f.clear();
m = full(f);
m.insert(535, 4.2, f, &());
m.verify(f, &());
assert_eq!(m.get(535, f, &()), Some(4.2));
// Right-middle leaf split, ins RHS.
f.clear();
m = full(f);
m.insert(545, 4.2, f, &());
m.verify(f, &());
assert_eq!(m.get(545, f, &()), Some(4.2));
// Last leaf split, ins LHS.
f.clear();
m = full(f);
m.insert(835, 4.2, f, &());
m.verify(f, &());
assert_eq!(m.get(835, f, &()), Some(4.2));
// Last leaf split, ins RHS.
f.clear();
m = full(f);
m.insert(845, 4.2, f, &());
m.verify(f, &());
assert_eq!(m.get(845, f, &()), Some(4.2));
// Front of last leaf.
f.clear();
m = full(f);
m.insert(805, 4.2, f, &());
m.verify(f, &());
assert_eq!(m.get(805, f, &()), Some(4.2));
m.clear(f);
m.verify(f, &());
}
// Make a tree with two barely healthy leaf nodes:
// [ 10 20 30 40 ] [ 50 60 70 80 ]
fn two_leaf(f: &mut MapForest<u32, f32>) -> Map<u32, f32> {
f.clear();
let mut m = Map::new();
for n in 1..9 {
m.insert(n * 10, n as f32, f, &());
}
m
}
#[test]
fn remove_level1() {
let f = &mut MapForest::<u32, f32>::new();
let mut m = two_leaf(f);
// Verify geometry.
m.verify(f, &());
assert_eq!(m.tpath(10, f, &()), "node2[0]--node0[0]");
assert_eq!(m.tpath(40, f, &()), "node2[0]--node0[3]");
assert_eq!(m.tpath(49, f, &()), "node2[0]--node0[4]");
assert_eq!(m.tpath(50, f, &()), "node2[1]--node1[0]");
assert_eq!(m.tpath(80, f, &()), "node2[1]--node1[3]");
// Remove the front entry from a node that stays healthy.
assert_eq!(m.insert(55, 5.5, f, &()), None);
assert_eq!(m.remove(50, f, &()), Some(5.0));
m.verify(f, &());
assert_eq!(m.tpath(49, f, &()), "node2[0]--node0[4]");
assert_eq!(m.tpath(50, f, &()), "node2[0]--node0[4]");
assert_eq!(m.tpath(55, f, &()), "node2[1]--node1[0]");
// Remove the front entry from the first leaf node: No critical key to update.
assert_eq!(m.insert(15, 1.5, f, &()), None);
assert_eq!(m.remove(10, f, &()), Some(1.0));
m.verify(f, &());
// [ 15 20 30 40 ] [ 55 60 70 80 ]
// Remove the front entry from a right-most node that underflows.
// No rebalancing for the right-most node. Still need critical key update.
assert_eq!(m.remove(55, f, &()), Some(5.5));
m.verify(f, &());
assert_eq!(m.tpath(55, f, &()), "node2[0]--node0[4]");
assert_eq!(m.tpath(60, f, &()), "node2[1]--node1[0]");
// [ 15 20 30 40 ] [ 60 70 80 ]
// Replenish the right leaf.
assert_eq!(m.insert(90, 9.0, f, &()), None);
assert_eq!(m.insert(100, 10.0, f, &()), None);
m.verify(f, &());
assert_eq!(m.tpath(55, f, &()), "node2[0]--node0[4]");
assert_eq!(m.tpath(60, f, &()), "node2[1]--node1[0]");
// [ 15 20 30 40 ] [ 60 70 80 90 100 ]
// Removing one entry from the left leaf should trigger a rebalancing from the right
// sibling.
assert_eq!(m.remove(20, f, &()), Some(2.0));
m.verify(f, &());
// [ 15 30 40 60 ] [ 70 80 90 100 ]
// Check that the critical key was updated correctly.
assert_eq!(m.tpath(50, f, &()), "node2[0]--node0[3]");
assert_eq!(m.tpath(60, f, &()), "node2[0]--node0[3]");
assert_eq!(m.tpath(70, f, &()), "node2[1]--node1[0]");
// Remove front entry from the left-most leaf node, underflowing.
// This should cause two leaf nodes to be merged and the root node to go away.
assert_eq!(m.remove(15, f, &()), Some(1.5));
m.verify(f, &());
}
#[test]
fn remove_level1_rightmost() {
let f = &mut MapForest::<u32, f32>::new();
let mut m = two_leaf(f);
// [ 10 20 30 40 ] [ 50 60 70 80 ]
// Remove entries from the right leaf. This doesn't trigger a rebalancing.
assert_eq!(m.remove(60, f, &()), Some(6.0));
assert_eq!(m.remove(80, f, &()), Some(8.0));
assert_eq!(m.remove(50, f, &()), Some(5.0));
m.verify(f, &());
// [ 10 20 30 40 ] [ 70 ]
assert_eq!(m.tpath(50, f, &()), "node2[0]--node0[4]");
assert_eq!(m.tpath(70, f, &()), "node2[1]--node1[0]");
// Removing the last entry from the right leaf should cause a collapse.
assert_eq!(m.remove(70, f, &()), Some(7.0));
m.verify(f, &());
}
// Make a 3-level tree with barely healthy nodes.
// 1 root, 8 inner nodes, 7*4+5=33 leaf nodes, 4 entries each.
fn level3_sparse(f: &mut MapForest<u32, f32>) -> Map<u32, f32> {
f.clear();
let mut m = Map::new();
for n in 1..133 {
m.insert(n * 10, n as f32, f, &());
}
m
}
#[test]
fn level3_removes() {
let f = &mut MapForest::<u32, f32>::new();
let mut m = level3_sparse(f);
m.verify(f, &());
// Check geometry.
// Root: node11
// [ node2 170 node10 330 node16 490 node21 650 node26 810 node31 970 node36 1130 node41 ]
// L1: node11
assert_eq!(m.tpath(0, f, &()), "node11[0]--node2[0]--node0[0]");
assert_eq!(m.tpath(10000, f, &()), "node11[7]--node41[4]--node40[4]");
// 650 is a critical key in the middle of the root.
assert_eq!(m.tpath(640, f, &()), "node11[3]--node21[3]--node19[3]");
assert_eq!(m.tpath(650, f, &()), "node11[4]--node26[0]--node20[0]");
// Deleting 640 triggers a rebalance from node19 to node 20, cascading to n21 -> n26.
assert_eq!(m.remove(640, f, &()), Some(64.0));
m.verify(f, &());
assert_eq!(m.tpath(650, f, &()), "node11[3]--node26[3]--node20[3]");
// 1130 is in the first leaf of the last L1 node. Deleting it triggers a rebalance node35
// -> node37, but no rebalance above where there is no right sibling.
assert_eq!(m.tpath(1130, f, &()), "node11[6]--node41[0]--node35[0]");
assert_eq!(m.tpath(1140, f, &()), "node11[6]--node41[0]--node35[1]");
assert_eq!(m.remove(1130, f, &()), Some(113.0));
m.verify(f, &());
assert_eq!(m.tpath(1140, f, &()), "node11[6]--node41[0]--node37[0]");
}
#[test]
fn insert_many() {
let f = &mut MapForest::<u32, f32>::new();
let mut m = Map::<u32, f32>::new();
let mm = 4096;
let mut x = 0;
for n in 0..mm {
assert_eq!(m.insert(x, n as f32, f, &()), None);
m.verify(f, &());
x = (x + n + 1) % mm;
}
x = 0;
for n in 0..mm {
assert_eq!(m.get(x, f, &()), Some(n as f32));
x = (x + n + 1) % mm;
}
x = 0;
for n in 0..mm {
assert_eq!(m.remove(x, f, &()), Some(n as f32));
m.verify(f, &());
x = (x + n + 1) % mm;
}
assert!(m.is_empty());
}
}

View File

@@ -0,0 +1,805 @@
//! B+-tree nodes.
use super::{slice_insert, slice_shift, Forest, Node, SetValue, INNER_SIZE};
use core::borrow::{Borrow, BorrowMut};
use core::fmt;
/// B+-tree node.
///
/// A B+-tree has different node types for inner nodes and leaf nodes. Inner nodes contain M node
/// references and M-1 keys while leaf nodes contain N keys and values. Values for M and N are
/// chosen such that a node is exactly 64 bytes (a cache line) when keys and values are 32 bits
/// each.
///
/// An inner node contains at least M/2 node references unless it is the right-most node at its
/// level. A leaf node contains at least N/2 keys unless it is the right-most leaf.
pub(super) enum NodeData<F: Forest> {
Inner {
/// The number of keys in this node.
/// The number of node references is always one more.
size: u8,
/// Keys discriminating sub-trees.
///
/// The key in `keys[i]` is greater than all keys in `tree[i]` and less than or equal to
/// all keys in `tree[i+1]`.
keys: [F::Key; INNER_SIZE - 1],
/// Sub-trees.
tree: [Node; INNER_SIZE],
},
Leaf {
/// Number of key-value pairs in this node.
size: u8,
// Key array.
keys: F::LeafKeys,
// Value array.
vals: F::LeafValues,
},
/// An unused node on the free list.
Free { next: Option<Node> },
}
// Implement `Clone` and `Copy` manually, because deriving them would also require `Forest` to
// implement `Clone`.
impl<F: Forest> Copy for NodeData<F> {}
impl<F: Forest> Clone for NodeData<F> {
fn clone(&self) -> Self {
*self
}
}
impl<F: Forest> NodeData<F> {
/// Is this a free/unused node?
pub fn is_free(&self) -> bool {
match *self {
NodeData::Free { .. } => true,
_ => false,
}
}
/// Get the number of entries in this node.
///
/// This is the number of outgoing edges in an inner node, or the number of key-value pairs in
/// a leaf node.
pub fn entries(&self) -> usize {
match *self {
NodeData::Inner { size, .. } => usize::from(size) + 1,
NodeData::Leaf { size, .. } => usize::from(size),
NodeData::Free { .. } => panic!("freed node"),
}
}
/// Create an inner node with a single key and two sub-trees.
pub fn inner(left: Node, key: F::Key, right: Node) -> Self {
// Splat the key and right node to the whole array.
// Saves us from inventing a default/reserved value.
let mut tree = [right; INNER_SIZE];
tree[0] = left;
NodeData::Inner {
size: 1,
keys: [key; INNER_SIZE - 1],
tree,
}
}
/// Create a leaf node with a single key-value pair.
pub fn leaf(key: F::Key, value: F::Value) -> Self {
NodeData::Leaf {
size: 1,
keys: F::splat_key(key),
vals: F::splat_value(value),
}
}
/// Unwrap an inner node into two slices (keys, trees).
pub fn unwrap_inner(&self) -> (&[F::Key], &[Node]) {
match *self {
NodeData::Inner {
size,
ref keys,
ref tree,
} => {
let size = usize::from(size);
// TODO: We could probably use `get_unchecked()` here since `size` is always in
// range.
(&keys[0..size], &tree[0..=size])
}
_ => panic!("Expected inner node"),
}
}
/// Unwrap a leaf node into two slices (keys, values) of the same length.
pub fn unwrap_leaf(&self) -> (&[F::Key], &[F::Value]) {
match *self {
NodeData::Leaf {
size,
ref keys,
ref vals,
} => {
let size = usize::from(size);
let keys = keys.borrow();
let vals = vals.borrow();
// TODO: We could probably use `get_unchecked()` here since `size` is always in
// range.
(&keys[0..size], &vals[0..size])
}
_ => panic!("Expected leaf node"),
}
}
/// Unwrap a mutable leaf node into two slices (keys, values) of the same length.
pub fn unwrap_leaf_mut(&mut self) -> (&mut [F::Key], &mut [F::Value]) {
match *self {
NodeData::Leaf {
size,
ref mut keys,
ref mut vals,
} => {
let size = usize::from(size);
let keys = keys.borrow_mut();
let vals = vals.borrow_mut();
// TODO: We could probably use `get_unchecked_mut()` here since `size` is always in
// range.
(&mut keys[0..size], &mut vals[0..size])
}
_ => panic!("Expected leaf node"),
}
}
/// Get the critical key for a leaf node.
/// This is simply the first key.
pub fn leaf_crit_key(&self) -> F::Key {
match *self {
NodeData::Leaf { size, ref keys, .. } => {
debug_assert!(size > 0, "Empty leaf node");
keys.borrow()[0]
}
_ => panic!("Expected leaf node"),
}
}
/// Try to insert `(key, node)` at key-position `index` in an inner node.
/// This means that `key` is inserted at `keys[i]` and `node` is inserted at `tree[i + 1]`.
/// If the node is full, this leaves the node unchanged and returns false.
pub fn try_inner_insert(&mut self, index: usize, key: F::Key, node: Node) -> bool {
match *self {
NodeData::Inner {
ref mut size,
ref mut keys,
ref mut tree,
} => {
let sz = usize::from(*size);
debug_assert!(sz <= keys.len());
debug_assert!(index <= sz, "Can't insert at {} with {} keys", index, sz);
if let Some(ks) = keys.get_mut(0..=sz) {
*size = (sz + 1) as u8;
slice_insert(ks, index, key);
slice_insert(&mut tree[1..=sz + 1], index, node);
true
} else {
false
}
}
_ => panic!("Expected inner node"),
}
}
/// Try to insert `key, value` at `index` in a leaf node, but fail and return false if the node
/// is full.
pub fn try_leaf_insert(&mut self, index: usize, key: F::Key, value: F::Value) -> bool {
match *self {
NodeData::Leaf {
ref mut size,
ref mut keys,
ref mut vals,
} => {
let sz = usize::from(*size);
let keys = keys.borrow_mut();
let vals = vals.borrow_mut();
debug_assert!(sz <= keys.len());
debug_assert!(index <= sz);
if let Some(ks) = keys.get_mut(0..=sz) {
*size = (sz + 1) as u8;
slice_insert(ks, index, key);
slice_insert(&mut vals[0..=sz], index, value);
true
} else {
false
}
}
_ => panic!("Expected leaf node"),
}
}
/// Split off the second half of this node.
/// It is assumed that this a completely full inner or leaf node.
///
/// The `insert_index` parameter is the position where an insertion was tried and failed. The
/// node will be split in half with a bias towards an even split after the insertion is retried.
pub fn split(&mut self, insert_index: usize) -> SplitOff<F> {
match *self {
NodeData::Inner {
ref mut size,
ref keys,
ref tree,
} => {
debug_assert_eq!(usize::from(*size), keys.len(), "Node not full");
// Number of tree entries in the lhs node.
let l_ents = split_pos(tree.len(), insert_index + 1);
let r_ents = tree.len() - l_ents;
// With INNER_SIZE=8, we get l_ents=4 and:
//
// self: [ n0 k0 n1 k1 n2 k2 n3 k3 n4 k4 n5 k5 n6 k6 n7 ]
// lhs: [ n0 k0 n1 k1 n2 k2 n3 ]
// crit_key = k3 (not present in either node)
// rhs: [ n4 k4 n5 k5 n6 k6 n7 ]
// 1. Truncate the LHS.
*size = (l_ents - 1) as u8;
// 2. Copy second half to `rhs_data`.
let mut r_keys = *keys;
r_keys[0..r_ents - 1].copy_from_slice(&keys[l_ents..]);
let mut r_tree = *tree;
r_tree[0..r_ents].copy_from_slice(&tree[l_ents..]);
SplitOff {
lhs_entries: l_ents,
rhs_entries: r_ents,
crit_key: keys[l_ents - 1],
rhs_data: NodeData::Inner {
size: (r_ents - 1) as u8,
keys: r_keys,
tree: r_tree,
},
}
}
NodeData::Leaf {
ref mut size,
ref keys,
ref vals,
} => {
let o_keys = keys.borrow();
let o_vals = vals.borrow();
debug_assert_eq!(usize::from(*size), o_keys.len(), "Node not full");
let l_size = split_pos(o_keys.len(), insert_index);
let r_size = o_keys.len() - l_size;
// 1. Truncate the LHS node at `l_size`.
*size = l_size as u8;
// 2. Copy second half to `rhs_data`.
let mut r_keys = *keys;
r_keys.borrow_mut()[0..r_size].copy_from_slice(&o_keys[l_size..]);
let mut r_vals = *vals;
r_vals.borrow_mut()[0..r_size].copy_from_slice(&o_vals[l_size..]);
SplitOff {
lhs_entries: l_size,
rhs_entries: r_size,
crit_key: o_keys[l_size],
rhs_data: NodeData::Leaf {
size: r_size as u8,
keys: r_keys,
vals: r_vals,
},
}
}
_ => panic!("Expected leaf node"),
}
}
/// Remove the sub-tree at `index` from this inner node.
///
/// Note that `index` refers to a sub-tree entry and not a key entry as it does for
/// `try_inner_insert()`. It is possible to remove the first sub-tree (which can't be inserted
/// by `try_inner_insert()`).
///
/// Return an indication of the node's health (i.e. below half capacity).
pub fn inner_remove(&mut self, index: usize) -> Removed {
match *self {
NodeData::Inner {
ref mut size,
ref mut keys,
ref mut tree,
} => {
let ents = usize::from(*size) + 1;
debug_assert!(ents <= tree.len());
debug_assert!(index < ents);
// Leave an invalid 0xff size when node becomes empty.
*size = ents.wrapping_sub(2) as u8;
if ents > 1 {
slice_shift(&mut keys[index.saturating_sub(1)..ents - 1], 1);
}
slice_shift(&mut tree[index..ents], 1);
Removed::new(index, ents - 1, tree.len())
}
_ => panic!("Expected inner node"),
}
}
/// Remove the key-value pair at `index` from this leaf node.
///
/// Return an indication of the node's health (i.e. below half capacity).
pub fn leaf_remove(&mut self, index: usize) -> Removed {
match *self {
NodeData::Leaf {
ref mut size,
ref mut keys,
ref mut vals,
} => {
let sz = usize::from(*size);
let keys = keys.borrow_mut();
let vals = vals.borrow_mut();
*size -= 1;
slice_shift(&mut keys[index..sz], 1);
slice_shift(&mut vals[index..sz], 1);
Removed::new(index, sz - 1, keys.len())
}
_ => panic!("Expected leaf node"),
}
}
/// Balance this node with its right sibling.
///
/// It is assumed that the current node has underflowed. Look at the right sibling node and do
/// one of two things:
///
/// 1. Move all entries to the right node, leaving this node empty, or
/// 2. Distribute entries evenly between the two nodes.
///
/// In the first case, `None` is returned. In the second case, the new critical key for the
/// right sibling node is returned.
pub fn balance(&mut self, crit_key: F::Key, rhs: &mut Self) -> Option<F::Key> {
match (self, rhs) {
(
&mut NodeData::Inner {
size: ref mut l_size,
keys: ref mut l_keys,
tree: ref mut l_tree,
},
&mut NodeData::Inner {
size: ref mut r_size,
keys: ref mut r_keys,
tree: ref mut r_tree,
},
) => {
let l_ents = usize::from(*l_size) + 1;
let r_ents = usize::from(*r_size) + 1;
let ents = l_ents + r_ents;
if ents <= r_tree.len() {
// All entries will fit in the RHS node.
// We'll leave the LHS node empty, but first use it as a scratch space.
*l_size = 0;
// Insert `crit_key` between the two nodes.
l_keys[l_ents - 1] = crit_key;
l_keys[l_ents..ents - 1].copy_from_slice(&r_keys[0..r_ents - 1]);
r_keys[0..ents - 1].copy_from_slice(&l_keys[0..ents - 1]);
l_tree[l_ents..ents].copy_from_slice(&r_tree[0..r_ents]);
r_tree[0..ents].copy_from_slice(&l_tree[0..ents]);
*r_size = (ents - 1) as u8;
None
} else {
// The entries don't all fit in one node. Distribute some from RHS -> LHS.
// Split evenly with a bias to putting one entry in LHS.
let r_goal = ents / 2;
let l_goal = ents - r_goal;
debug_assert!(l_goal > l_ents, "Node must be underflowed");
l_keys[l_ents - 1] = crit_key;
l_keys[l_ents..l_goal - 1].copy_from_slice(&r_keys[0..l_goal - 1 - l_ents]);
l_tree[l_ents..l_goal].copy_from_slice(&r_tree[0..l_goal - l_ents]);
*l_size = (l_goal - 1) as u8;
let new_crit = r_keys[r_ents - r_goal - 1];
slice_shift(&mut r_keys[0..r_ents - 1], r_ents - r_goal);
slice_shift(&mut r_tree[0..r_ents], r_ents - r_goal);
*r_size = (r_goal - 1) as u8;
Some(new_crit)
}
}
(
&mut NodeData::Leaf {
size: ref mut l_size,
keys: ref mut l_keys,
vals: ref mut l_vals,
},
&mut NodeData::Leaf {
size: ref mut r_size,
keys: ref mut r_keys,
vals: ref mut r_vals,
},
) => {
let l_ents = usize::from(*l_size);
let l_keys = l_keys.borrow_mut();
let l_vals = l_vals.borrow_mut();
let r_ents = usize::from(*r_size);
let r_keys = r_keys.borrow_mut();
let r_vals = r_vals.borrow_mut();
let ents = l_ents + r_ents;
if ents <= r_vals.len() {
// We can fit all entries in the RHS node.
// We'll leave the LHS node empty, but first use it as a scratch space.
*l_size = 0;
l_keys[l_ents..ents].copy_from_slice(&r_keys[0..r_ents]);
r_keys[0..ents].copy_from_slice(&l_keys[0..ents]);
l_vals[l_ents..ents].copy_from_slice(&r_vals[0..r_ents]);
r_vals[0..ents].copy_from_slice(&l_vals[0..ents]);
*r_size = ents as u8;
None
} else {
// The entries don't all fit in one node. Distribute some from RHS -> LHS.
// Split evenly with a bias to putting one entry in LHS.
let r_goal = ents / 2;
let l_goal = ents - r_goal;
debug_assert!(l_goal > l_ents, "Node must be underflowed");
l_keys[l_ents..l_goal].copy_from_slice(&r_keys[0..l_goal - l_ents]);
l_vals[l_ents..l_goal].copy_from_slice(&r_vals[0..l_goal - l_ents]);
*l_size = l_goal as u8;
slice_shift(&mut r_keys[0..r_ents], r_ents - r_goal);
slice_shift(&mut r_vals[0..r_ents], r_ents - r_goal);
*r_size = r_goal as u8;
Some(r_keys[0])
}
}
_ => panic!("Mismatched nodes"),
}
}
}
/// Find the right split position for halving a full node with `len` entries to recover from a
/// failed insertion at `ins`.
///
/// If `len` is even, we should split straight down the middle regardless of `len`.
///
/// If `len` is odd, we should split the node such that the two halves are the same size after the
/// insertion is retried.
fn split_pos(len: usize, ins: usize) -> usize {
// Anticipate `len` being a compile time constant, so this all folds away when `len` is even.
if ins <= len / 2 {
len / 2
} else {
(len + 1) / 2
}
}
/// The result of splitting off the second half of a node.
pub(super) struct SplitOff<F: Forest> {
/// The number of entries left in the original node which becomes the left-hand-side of the
/// pair. This is the number of outgoing node edges for an inner node, and the number of
/// key-value pairs for a leaf node.
pub lhs_entries: usize,
/// The number of entries in the new RHS node.
pub rhs_entries: usize,
/// The critical key separating the LHS and RHS nodes. All keys in the LHS sub-tree are less
/// than the critical key, and all entries in the RHS sub-tree are greater or equal to the
/// critical key.
pub crit_key: F::Key,
/// The RHS node data containing the elements that were removed from the original node (now the
/// LHS).
pub rhs_data: NodeData<F>,
}
/// The result of removing an entry from a node.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub(super) enum Removed {
/// An entry was removed, and the node is still in good shape.
Healthy,
/// The node is in good shape after removing the rightmost element.
Rightmost,
/// The node has too few entries now, and it should be balanced with a sibling node.
Underflow,
/// The last entry was removed. For an inner node, this means that the `keys` array is empty
/// and there is just a single sub-tree left.
Empty,
}
impl Removed {
/// Create a `Removed` status from a size and capacity.
fn new(removed: usize, new_size: usize, capacity: usize) -> Self {
if 2 * new_size >= capacity {
if removed == new_size {
Removed::Rightmost
} else {
Removed::Healthy
}
} else if new_size > 0 {
Removed::Underflow
} else {
Removed::Empty
}
}
}
// Display ": value" or nothing at all for `()`.
pub(super) trait ValDisp {
fn valfmt(&self, f: &mut fmt::Formatter) -> fmt::Result;
}
impl ValDisp for SetValue {
fn valfmt(&self, _: &mut fmt::Formatter) -> fmt::Result {
Ok(())
}
}
impl<T: fmt::Display> ValDisp for T {
fn valfmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, ":{}", self)
}
}
impl<F> fmt::Display for NodeData<F>
where
F: Forest,
F::Key: fmt::Display,
F::Value: ValDisp,
{
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
NodeData::Inner { size, keys, tree } => {
write!(f, "[ {}", tree[0])?;
for i in 0..usize::from(size) {
write!(f, " {} {}", keys[i], tree[i + 1])?;
}
write!(f, " ]")
}
NodeData::Leaf { size, keys, vals } => {
let keys = keys.borrow();
let vals = vals.borrow();
write!(f, "[")?;
for i in 0..usize::from(size) {
write!(f, " {}", keys[i])?;
vals[i].valfmt(f)?;
}
write!(f, " ]")
}
NodeData::Free { next: Some(n) } => write!(f, "[ free -> {} ]", n),
NodeData::Free { next: None } => write!(f, "[ free ]"),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use core::mem;
use std::string::ToString;
// Forest impl for a set implementation.
struct TF();
impl Forest for TF {
type Key = char;
type Value = SetValue;
type LeafKeys = [char; 15];
type LeafValues = [SetValue; 15];
fn splat_key(key: Self::Key) -> Self::LeafKeys {
[key; 15]
}
fn splat_value(value: Self::Value) -> Self::LeafValues {
[value; 15]
}
}
#[test]
fn inner() {
let n1 = Node(1);
let n2 = Node(2);
let n3 = Node(3);
let n4 = Node(4);
let mut inner = NodeData::<TF>::inner(n1, 'c', n4);
assert_eq!(mem::size_of_val(&inner), 64);
assert_eq!(inner.to_string(), "[ node1 c node4 ]");
assert!(inner.try_inner_insert(0, 'a', n2));
assert_eq!(inner.to_string(), "[ node1 a node2 c node4 ]");
assert!(inner.try_inner_insert(1, 'b', n3));
assert_eq!(inner.to_string(), "[ node1 a node2 b node3 c node4 ]");
for i in 3..7 {
assert!(inner.try_inner_insert(
usize::from(i),
('a' as u8 + i) as char,
Node(i as u32 + 2),
));
}
assert_eq!(
inner.to_string(),
"[ node1 a node2 b node3 c node4 d node5 e node6 f node7 g node8 ]"
);
// Now the node is full and insertion should fail anywhere.
assert!(!inner.try_inner_insert(0, 'x', n3));
assert!(!inner.try_inner_insert(4, 'x', n3));
assert!(!inner.try_inner_insert(7, 'x', n3));
// Splitting should be independent of the hint because we have an even number of node
// references.
let saved = inner.clone();
let sp = inner.split(1);
assert_eq!(sp.lhs_entries, 4);
assert_eq!(sp.rhs_entries, 4);
assert_eq!(sp.crit_key, 'd');
// The critical key is not present in either of the resulting nodes.
assert_eq!(inner.to_string(), "[ node1 a node2 b node3 c node4 ]");
assert_eq!(sp.rhs_data.to_string(), "[ node5 e node6 f node7 g node8 ]");
assert_eq!(inner.inner_remove(0), Removed::Underflow);
assert_eq!(inner.to_string(), "[ node2 b node3 c node4 ]");
assert_eq!(inner.inner_remove(1), Removed::Underflow);
assert_eq!(inner.to_string(), "[ node2 c node4 ]");
assert_eq!(inner.inner_remove(1), Removed::Underflow);
assert_eq!(inner.to_string(), "[ node2 ]");
assert_eq!(inner.inner_remove(0), Removed::Empty);
inner = saved;
let sp = inner.split(6);
assert_eq!(sp.lhs_entries, 4);
assert_eq!(sp.rhs_entries, 4);
assert_eq!(sp.crit_key, 'd');
assert_eq!(inner.to_string(), "[ node1 a node2 b node3 c node4 ]");
assert_eq!(sp.rhs_data.to_string(), "[ node5 e node6 f node7 g node8 ]");
}
#[test]
fn leaf() {
let mut leaf = NodeData::<TF>::leaf('d', SetValue());
assert_eq!(leaf.to_string(), "[ d ]");
assert!(leaf.try_leaf_insert(0, 'a', SetValue()));
assert_eq!(leaf.to_string(), "[ a d ]");
assert!(leaf.try_leaf_insert(1, 'b', SetValue()));
assert!(leaf.try_leaf_insert(2, 'c', SetValue()));
assert_eq!(leaf.to_string(), "[ a b c d ]");
for i in 4..15 {
assert!(leaf.try_leaf_insert(usize::from(i), ('a' as u8 + i) as char, SetValue()));
}
assert_eq!(leaf.to_string(), "[ a b c d e f g h i j k l m n o ]");
// Now the node is full and insertion should fail anywhere.
assert!(!leaf.try_leaf_insert(0, 'x', SetValue()));
assert!(!leaf.try_leaf_insert(8, 'x', SetValue()));
assert!(!leaf.try_leaf_insert(15, 'x', SetValue()));
// The index given to `split` is not the split position, it's a hint for balancing the node.
let saved = leaf.clone();
let sp = leaf.split(12);
assert_eq!(sp.lhs_entries, 8);
assert_eq!(sp.rhs_entries, 7);
assert_eq!(sp.crit_key, 'i');
assert_eq!(leaf.to_string(), "[ a b c d e f g h ]");
assert_eq!(sp.rhs_data.to_string(), "[ i j k l m n o ]");
assert!(leaf.try_leaf_insert(8, 'i', SetValue()));
assert_eq!(leaf.leaf_remove(2), Removed::Healthy);
assert_eq!(leaf.to_string(), "[ a b d e f g h i ]");
assert_eq!(leaf.leaf_remove(7), Removed::Underflow);
assert_eq!(leaf.to_string(), "[ a b d e f g h ]");
leaf = saved;
let sp = leaf.split(7);
assert_eq!(sp.lhs_entries, 7);
assert_eq!(sp.rhs_entries, 8);
assert_eq!(sp.crit_key, 'h');
assert_eq!(leaf.to_string(), "[ a b c d e f g ]");
assert_eq!(sp.rhs_data.to_string(), "[ h i j k l m n o ]");
}
#[test]
fn optimal_split_pos() {
// An even split is easy.
assert_eq!(split_pos(8, 0), 4);
assert_eq!(split_pos(8, 8), 4);
// Easy cases for odd splits.
assert_eq!(split_pos(7, 0), 3);
assert_eq!(split_pos(7, 7), 4);
// If the insertion point is the same as the split position, we
// will append to the lhs node.
assert_eq!(split_pos(7, 3), 3);
assert_eq!(split_pos(7, 4), 4);
}
#[test]
fn inner_balance() {
let n1 = Node(1);
let n2 = Node(2);
let n3 = Node(3);
let mut lhs = NodeData::<TF>::inner(n1, 'a', n2);
assert!(lhs.try_inner_insert(1, 'b', n3));
assert_eq!(lhs.to_string(), "[ node1 a node2 b node3 ]");
let n11 = Node(11);
let n12 = Node(12);
let mut rhs = NodeData::<TF>::inner(n11, 'p', n12);
for i in 1..4 {
assert!(rhs.try_inner_insert(
usize::from(i),
('p' as u8 + i) as char,
Node(i as u32 + 12),
));
}
assert_eq!(
rhs.to_string(),
"[ node11 p node12 q node13 r node14 s node15 ]"
);
// 3+5 elements fit in RHS.
assert_eq!(lhs.balance('o', &mut rhs), None);
assert_eq!(
rhs.to_string(),
"[ node1 a node2 b node3 o node11 p node12 q node13 r node14 s node15 ]"
);
// 2+8 elements are redistributed.
lhs = NodeData::<TF>::inner(Node(20), 'x', Node(21));
assert_eq!(lhs.balance('y', &mut rhs), Some('o'));
assert_eq!(
lhs.to_string(),
"[ node20 x node21 y node1 a node2 b node3 ]"
);
assert_eq!(
rhs.to_string(),
"[ node11 p node12 q node13 r node14 s node15 ]"
);
}
#[test]
fn leaf_balance() {
let mut lhs = NodeData::<TF>::leaf('a', SetValue());
for i in 1..6 {
assert!(lhs.try_leaf_insert(usize::from(i), ('a' as u8 + i) as char, SetValue()));
}
assert_eq!(lhs.to_string(), "[ a b c d e f ]");
let mut rhs = NodeData::<TF>::leaf('0', SetValue());
for i in 1..8 {
assert!(rhs.try_leaf_insert(usize::from(i), ('0' as u8 + i) as char, SetValue()));
}
assert_eq!(rhs.to_string(), "[ 0 1 2 3 4 5 6 7 ]");
// 6+8 elements all fits in rhs.
assert_eq!(lhs.balance('0', &mut rhs), None);
assert_eq!(rhs.to_string(), "[ a b c d e f 0 1 2 3 4 5 6 7 ]");
assert!(lhs.try_leaf_insert(0, 'x', SetValue()));
assert!(lhs.try_leaf_insert(1, 'y', SetValue()));
assert!(lhs.try_leaf_insert(2, 'z', SetValue()));
assert_eq!(lhs.to_string(), "[ x y z ]");
// 3+14 elements need redistribution.
assert_eq!(lhs.balance('a', &mut rhs), Some('0'));
assert_eq!(lhs.to_string(), "[ x y z a b c d e f ]");
assert_eq!(rhs.to_string(), "[ 0 1 2 3 4 5 6 7 ]");
}
}

View File

@@ -0,0 +1,836 @@
//! A path from the root of a B+-tree to a leaf node.
use super::node::Removed;
use super::{slice_insert, slice_shift, Comparator, Forest, Node, NodeData, NodePool, MAX_PATH};
use core::borrow::Borrow;
use core::marker::PhantomData;
#[cfg(test)]
use core::fmt;
pub(super) struct Path<F: Forest> {
/// Number of path entries including the root and leaf nodes.
size: usize,
/// Path of node references from the root to a leaf node.
node: [Node; MAX_PATH],
/// Entry number in each node.
entry: [u8; MAX_PATH],
unused: PhantomData<F>,
}
impl<F: Forest> Default for Path<F> {
fn default() -> Self {
Self {
size: 0,
node: [Node(0); MAX_PATH],
entry: [0; MAX_PATH],
unused: PhantomData,
}
}
}
impl<F: Forest> Path<F> {
/// Reset path by searching for `key` starting from `root`.
///
/// If `key` is in the tree, returns the corresponding value and leaved the path pointing at
/// the entry. Otherwise returns `None` and:
///
/// - A key smaller than all stored keys returns a path to the first entry of the first leaf.
/// - A key larger than all stored keys returns a path to one beyond the last element of the
/// last leaf.
/// - A key between the stored keys of adjacent leaf nodes returns a path to one beyond the
/// last entry of the first of the leaf nodes.
///
pub fn find(
&mut self,
key: F::Key,
root: Node,
pool: &NodePool<F>,
comp: &Comparator<F::Key>,
) -> Option<F::Value> {
let mut node = root;
for level in 0.. {
self.size = level + 1;
self.node[level] = node;
match pool[node] {
NodeData::Inner { size, keys, tree } => {
// Invariant: `tree[i]` contains keys smaller than
// `keys[i]`, greater or equal to `keys[i-1]`.
let i = match comp.search(key, &keys[0..size.into()]) {
// We hit an existing key, so follow the >= branch.
Ok(i) => i + 1,
// Key is less than `keys[i]`, so follow the < branch.
Err(i) => i,
};
self.entry[level] = i as u8;
node = tree[i];
}
NodeData::Leaf { size, keys, vals } => {
// For a leaf we want either the found key or an insert position.
return match comp.search(key, &keys.borrow()[0..size.into()]) {
Ok(i) => {
self.entry[level] = i as u8;
Some(vals.borrow()[i])
}
Err(i) => {
self.entry[level] = i as u8;
None
}
};
}
NodeData::Free { .. } => panic!("Free {} reached from {}", node, root),
}
}
unreachable!();
}
/// Move path to the first entry of the tree starting at `root` and return it.
pub fn first(&mut self, root: Node, pool: &NodePool<F>) -> (F::Key, F::Value) {
let mut node = root;
for level in 0.. {
self.size = level + 1;
self.node[level] = node;
self.entry[level] = 0;
match pool[node] {
NodeData::Inner { tree, .. } => node = tree[0],
NodeData::Leaf { keys, vals, .. } => return (keys.borrow()[0], vals.borrow()[0]),
NodeData::Free { .. } => panic!("Free {} reached from {}", node, root),
}
}
unreachable!();
}
/// Move this path to the next key-value pair and return it.
pub fn next(&mut self, pool: &NodePool<F>) -> Option<(F::Key, F::Value)> {
match self.leaf_pos() {
None => return None,
Some((node, entry)) => {
let (keys, vals) = pool[node].unwrap_leaf();
if entry + 1 < keys.len() {
self.entry[self.size - 1] += 1;
return Some((keys[entry + 1], vals[entry + 1]));
}
}
}
// The current leaf node is exhausted. Move to the next one.
let leaf_level = self.size - 1;
self.next_node(leaf_level, pool).map(|node| {
let (keys, vals) = pool[node].unwrap_leaf();
(keys[0], vals[0])
})
}
/// Move this path to the previous key-value pair and return it.
///
/// If the path is at the off-the-end position, go to the last key-value pair.
///
/// If the path is already at the first key-value pair, leave it there and return `None`.
pub fn prev(&mut self, root: Node, pool: &NodePool<F>) -> Option<(F::Key, F::Value)> {
// We use `size == 0` as a generic off-the-end position.
if self.size == 0 {
self.goto_subtree_last(0, root, pool);
let (node, entry) = self.leaf_pos().unwrap();
let (keys, vals) = pool[node].unwrap_leaf();
return Some((keys[entry], vals[entry]));
}
match self.leaf_pos() {
None => return None,
Some((node, entry)) => {
if entry > 0 {
self.entry[self.size - 1] -= 1;
let (keys, vals) = pool[node].unwrap_leaf();
return Some((keys[entry - 1], vals[entry - 1]));
}
}
}
// The current leaf node is exhausted. Move to the previous one.
self.prev_leaf(pool).map(|node| {
let (keys, vals) = pool[node].unwrap_leaf();
let e = self.leaf_entry();
(keys[e], vals[e])
})
}
/// Move path to the first entry of the next node at level, if one exists.
///
/// Returns the new node if it exists.
///
/// Reset the path to `size = 0` and return `None` if there is no next node.
fn next_node(&mut self, level: usize, pool: &NodePool<F>) -> Option<Node> {
match self.right_sibling_branch_level(level, pool) {
None => {
self.size = 0;
None
}
Some(bl) => {
let (_, bnodes) = pool[self.node[bl]].unwrap_inner();
self.entry[bl] += 1;
let mut node = bnodes[usize::from(self.entry[bl])];
for l in bl + 1..level {
self.node[l] = node;
self.entry[l] = 0;
node = pool[node].unwrap_inner().1[0];
}
self.node[level] = node;
self.entry[level] = 0;
Some(node)
}
}
}
/// Move the path to the last entry of the previous leaf node, if one exists.
///
/// Returns the new leaf node if it exists.
///
/// Leave the path unchanged and returns `None` if we are already at the first leaf node.
fn prev_leaf(&mut self, pool: &NodePool<F>) -> Option<Node> {
self.left_sibling_branch_level(self.size - 1).map(|bl| {
let entry = self.entry[bl] - 1;
self.entry[bl] = entry;
let (_, bnodes) = pool[self.node[bl]].unwrap_inner();
self.goto_subtree_last(bl + 1, bnodes[usize::from(entry)], pool)
})
}
/// Move this path to the last position for the sub-tree at `level, root`.
fn goto_subtree_last(&mut self, level: usize, root: Node, pool: &NodePool<F>) -> Node {
let mut node = root;
for l in level.. {
self.node[l] = node;
match pool[node] {
NodeData::Inner { size, ref tree, .. } => {
self.entry[l] = size;
node = tree[usize::from(size)];
}
NodeData::Leaf { size, .. } => {
self.entry[l] = size - 1;
self.size = l + 1;
break;
}
NodeData::Free { .. } => panic!("Free {} reached from {}", node, root),
}
}
node
}
/// Set the root node and point the path at the first entry of the node.
pub fn set_root_node(&mut self, root: Node) {
self.size = 1;
self.node[0] = root;
self.entry[0] = 0;
}
/// Get the current leaf node and entry, if any.
pub fn leaf_pos(&self) -> Option<(Node, usize)> {
let i = self.size.wrapping_sub(1);
self.node.get(i).map(|&n| (n, self.entry[i].into()))
}
/// Get the current leaf node.
fn leaf_node(&self) -> Node {
self.node[self.size - 1]
}
/// Get the current entry in the leaf node.
fn leaf_entry(&self) -> usize {
self.entry[self.size - 1].into()
}
/// Is this path pointing to the first entry in the tree?
/// This corresponds to the smallest key.
fn at_first_entry(&self) -> bool {
self.entry[0..self.size].iter().all(|&i| i == 0)
}
/// Get a mutable reference to the current value.
/// This assumes that there is a current value.
pub fn value_mut<'a>(&self, pool: &'a mut NodePool<F>) -> &'a mut F::Value {
&mut pool[self.leaf_node()].unwrap_leaf_mut().1[self.leaf_entry()]
}
/// Insert the key-value pair at the current position.
/// The current position must be the correct insertion location for the key.
/// This function does not check for duplicate keys. Use `find` or similar for that.
/// Returns the new root node.
pub fn insert(&mut self, key: F::Key, value: F::Value, pool: &mut NodePool<F>) -> Node {
if !self.try_leaf_insert(key, value, pool) {
self.split_and_insert(key, value, pool);
}
self.node[0]
}
/// Try to insert `key, value` at the current position, but fail and return false if the leaf
/// node is full.
fn try_leaf_insert(&self, key: F::Key, value: F::Value, pool: &mut NodePool<F>) -> bool {
let index = self.leaf_entry();
// The case `index == 0` should only ever happen when there are no earlier leaf nodes,
// otherwise we should have appended to the previous leaf node instead. This invariant
// means that we don't need to update keys stored in inner nodes here.
debug_assert!(index > 0 || self.at_first_entry());
pool[self.leaf_node()].try_leaf_insert(index, key, value)
}
/// Split the current leaf node and then insert `key, value`.
/// This should only be used if `try_leaf_insert()` fails.
fn split_and_insert(&mut self, mut key: F::Key, value: F::Value, pool: &mut NodePool<F>) {
let orig_root = self.node[0];
// Loop invariant: We need to split the node at `level` and then retry a failed insertion.
// The items to insert are either `(key, ins_node)` or `(key, value)`.
let mut ins_node = None;
let mut split;
for level in (0..self.size).rev() {
// Split the current node.
let mut node = self.node[level];
let mut entry = self.entry[level].into();
split = pool[node].split(entry);
let rhs_node = pool.alloc_node(split.rhs_data);
// Should the path be moved to the new RHS node?
// Prefer the smaller node if we're right in the middle.
// Prefer to append to LHS all other things being equal.
//
// When inserting into an inner node (`ins_node.is_some()`), we must point to a valid
// entry in the current node since the new entry is inserted *after* the insert
// location.
if entry > split.lhs_entries
|| (entry == split.lhs_entries
&& (split.lhs_entries > split.rhs_entries || ins_node.is_some()))
{
node = rhs_node;
entry -= split.lhs_entries;
self.node[level] = node;
self.entry[level] = entry as u8;
}
// Now that we have a not-full node, it must be possible to insert.
match ins_node {
None => {
let inserted = pool[node].try_leaf_insert(entry, key, value);
debug_assert!(inserted);
// If we inserted at the front of the new rhs_node leaf, we need to propagate
// the inserted key as the critical key instead of the previous front key.
if entry == 0 && node == rhs_node {
split.crit_key = key;
}
}
Some(n) => {
let inserted = pool[node].try_inner_insert(entry, key, n);
debug_assert!(inserted);
// The lower level was moved to the new RHS node, so make sure that is
// reflected here.
if n == self.node[level + 1] {
self.entry[level] += 1;
}
}
}
// We are now done with the current level, but `rhs_node` must be inserted in the inner
// node above us. If we're already at level 0, the root node needs to be split.
key = split.crit_key;
ins_node = Some(rhs_node);
if level > 0 {
let pnode = &mut pool[self.node[level - 1]];
let pentry = self.entry[level - 1].into();
if pnode.try_inner_insert(pentry, key, rhs_node) {
// If this level level was moved to the new RHS node, update parent entry.
if node == rhs_node {
self.entry[level - 1] += 1;
}
return;
}
}
}
// If we get here we have split the original root node and need to add an extra level.
let rhs_node = ins_node.expect("empty path");
let root = pool.alloc_node(NodeData::inner(orig_root, key, rhs_node));
let entry = if self.node[0] == rhs_node { 1 } else { 0 };
self.size += 1;
slice_insert(&mut self.node[0..self.size], 0, root);
slice_insert(&mut self.entry[0..self.size], 0, entry);
}
/// Remove the key-value pair at the current position and advance the path to the next
/// key-value pair, leaving the path in a normalized state.
///
/// Return the new root node.
pub fn remove(&mut self, pool: &mut NodePool<F>) -> Option<Node> {
let e = self.leaf_entry();
match pool[self.leaf_node()].leaf_remove(e) {
Removed::Healthy => {
if e == 0 {
self.update_crit_key(pool)
}
Some(self.node[0])
}
status => self.balance_nodes(status, pool),
}
}
/// Get the critical key for the current node at `level`.
///
/// The critical key is less than or equal to all keys in the sub-tree at `level` and greater
/// than all keys to the left of the current node at `level`.
///
/// The left-most node at any level does not have a critical key.
fn current_crit_key(&self, level: usize, pool: &NodePool<F>) -> Option<F::Key> {
// Find the level containing the critical key for the current node.
self.left_sibling_branch_level(level).map(|bl| {
let (keys, _) = pool[self.node[bl]].unwrap_inner();
keys[usize::from(self.entry[bl]) - 1]
})
}
/// Update the critical key after removing the front entry of the leaf node.
fn update_crit_key(&mut self, pool: &mut NodePool<F>) {
// Find the inner level containing the critical key for the current leaf node.
let crit_level = match self.left_sibling_branch_level(self.size - 1) {
None => return,
Some(l) => l,
};
let crit_kidx = self.entry[crit_level] - 1;
// Extract the new critical key from the leaf node.
let crit_key = pool[self.leaf_node()].leaf_crit_key();
let crit_node = self.node[crit_level];
match pool[crit_node] {
NodeData::Inner {
size, ref mut keys, ..
} => {
debug_assert!(crit_kidx < size);
keys[usize::from(crit_kidx)] = crit_key;
}
_ => panic!("Expected inner node"),
}
}
/// Given that the current leaf node is in an unhealthy (underflowed or even empty) status,
/// balance it with sibling nodes.
///
/// Return the new root node.
fn balance_nodes(&mut self, status: Removed, pool: &mut NodePool<F>) -> Option<Node> {
// The current leaf node is not in a healthy state, and its critical key may have changed
// too.
//
// Start by dealing with a changed critical key for the leaf level.
if status != Removed::Empty && self.leaf_entry() == 0 {
self.update_crit_key(pool);
}
let leaf_level = self.size - 1;
if self.heal_level(status, leaf_level, pool) {
// Tree has become empty.
self.size = 0;
return None;
}
// Discard the root node if it has shrunk to a single sub-tree.
let mut ns = 0;
while let NodeData::Inner {
size: 0, ref tree, ..
} = pool[self.node[ns]]
{
ns += 1;
self.node[ns] = tree[0];
}
if ns > 0 {
for l in 0..ns {
pool.free_node(self.node[l]);
}
// Shift the whole array instead of just 0..size because `self.size` may be cleared
// here if the path is pointing off-the-end.
slice_shift(&mut self.node, ns);
slice_shift(&mut self.entry, ns);
if self.size > 0 {
self.size -= ns;
}
}
// Return the root node, even when `size=0` indicating that we're at the off-the-end
// position.
Some(self.node[0])
}
/// After removing an entry from the node at `level`, check its health and rebalance as needed.
///
/// Leave the path up to and including `level` in a normalized state where all entries are in
/// bounds.
///
/// Returns true if the tree becomes empty.
fn heal_level(&mut self, status: Removed, level: usize, pool: &mut NodePool<F>) -> bool {
match status {
Removed::Healthy => {}
Removed::Rightmost => {
// The rightmost entry was removed from the current node, so move the path so it
// points at the first entry of the next node at this level.
debug_assert_eq!(
usize::from(self.entry[level]),
pool[self.node[level]].entries()
);
self.next_node(level, pool);
}
Removed::Underflow => self.underflowed_node(level, pool),
Removed::Empty => return self.empty_node(level, pool),
}
false
}
/// The current node at `level` has underflowed, meaning that it is below half capacity but
/// not completely empty.
///
/// Handle this by balancing entries with the right sibling node.
///
/// Leave the path up to and including `level` in a valid state that points to the same entry.
fn underflowed_node(&mut self, level: usize, pool: &mut NodePool<F>) {
// Look for a right sibling node at this level. If none exists, we allow the underflowed
// node to persist as the right-most node at its level.
if let Some((crit_key, rhs_node)) = self.right_sibling(level, pool) {
// New critical key for the updated right sibling node.
let new_ck: Option<F::Key>;
let empty;
// Make a COPY of the sibling node to avoid fighting the borrow checker.
let mut rhs = pool[rhs_node];
match pool[self.node[level]].balance(crit_key, &mut rhs) {
None => {
// Everything got moved to the RHS node.
new_ck = self.current_crit_key(level, pool);
empty = true;
}
Some(key) => {
// Entries moved from RHS node.
new_ck = Some(key);
empty = false;
}
}
// Put back the updated RHS node data.
pool[rhs_node] = rhs;
// Update the critical key for the RHS node unless it has become a left-most
// node.
if let Some(ck) = new_ck {
self.update_right_crit_key(level, ck, pool);
}
if empty {
let empty_tree = self.empty_node(level, pool);
debug_assert!(!empty_tree);
}
// Any Removed::Rightmost state must have been cleared above by merging nodes. If the
// current entry[level] was one off the end of the node, it will now point at a proper
// entry.
debug_assert!(usize::from(self.entry[level]) < pool[self.node[level]].entries());
} else if usize::from(self.entry[level]) >= pool[self.node[level]].entries() {
// There's no right sibling at this level, so the node can't be rebalanced.
// Check if we are in an off-the-end position.
self.size = 0;
}
}
/// The current node at `level` has become empty.
///
/// Remove the node from its parent node and leave the path in a normalized state. This means
/// that the path at this level will go through the right sibling of this node.
///
/// If the current node has no right sibling, set `self.size = 0`.
///
/// Returns true if the tree becomes empty.
fn empty_node(&mut self, level: usize, pool: &mut NodePool<F>) -> bool {
pool.free_node(self.node[level]);
if level == 0 {
// We just deleted the root node, so the tree is now empty.
return true;
}
// Get the right sibling node before recursively removing nodes.
let rhs_node = self.right_sibling(level, pool).map(|(_, n)| n);
// Remove the current sub-tree from the parent node.
let pl = level - 1;
let pe = self.entry[pl].into();
let status = pool[self.node[pl]].inner_remove(pe);
self.heal_level(status, pl, pool);
// Finally update the path at this level.
match rhs_node {
// We'll leave `self.entry[level]` unchanged. It can be non-zero after moving node
// entries to the right sibling node.
Some(rhs) => self.node[level] = rhs,
// We have no right sibling, so we must have deleted the right-most
// entry. The path should be moved to the "off-the-end" position.
None => self.size = 0,
}
false
}
/// Find the level where the right sibling to the current node at `level` branches off.
///
/// This will be an inner node with two adjacent sub-trees: In one the current node at level is
/// a right-most node, in the other, the right sibling is a left-most node.
///
/// Returns `None` if the current node is a right-most node so no right sibling exists.
fn right_sibling_branch_level(&self, level: usize, pool: &NodePool<F>) -> Option<usize> {
(0..level).rposition(|l| match pool[self.node[l]] {
NodeData::Inner { size, .. } => self.entry[l] < size,
_ => panic!("Expected inner node"),
})
}
/// Find the level where the left sibling to the current node at `level` branches off.
fn left_sibling_branch_level(&self, level: usize) -> Option<usize> {
self.entry[0..level].iter().rposition(|&e| e != 0)
}
/// Get the right sibling node to the current node at `level`.
/// Also return the critical key between the current node and the right sibling.
fn right_sibling(&self, level: usize, pool: &NodePool<F>) -> Option<(F::Key, Node)> {
// Find the critical level: The deepest level where two sibling subtrees contain the
// current node and its right sibling.
self.right_sibling_branch_level(level, pool).map(|bl| {
// Extract the critical key and the `bl+1` node.
let be = usize::from(self.entry[bl]);
let crit_key;
let mut node;
{
let (keys, tree) = pool[self.node[bl]].unwrap_inner();
crit_key = keys[be];
node = tree[be + 1];
}
// Follow left-most links back down to `level`.
for _ in bl + 1..level {
node = pool[node].unwrap_inner().1[0];
}
(crit_key, node)
})
}
/// Update the critical key for the right sibling node at `level`.
fn update_right_crit_key(&self, level: usize, crit_key: F::Key, pool: &mut NodePool<F>) {
let bl = self
.right_sibling_branch_level(level, pool)
.expect("No right sibling exists");
match pool[self.node[bl]] {
NodeData::Inner { ref mut keys, .. } => {
keys[usize::from(self.entry[bl])] = crit_key;
}
_ => panic!("Expected inner node"),
}
}
/// Normalize the path position such that it is either pointing at a real entry or `size=0`
/// indicating "off-the-end".
pub fn normalize(&mut self, pool: &mut NodePool<F>) {
if let Some((leaf, entry)) = self.leaf_pos() {
if entry >= pool[leaf].entries() {
let leaf_level = self.size - 1;
self.next_node(leaf_level, pool);
}
}
}
}
#[cfg(test)]
impl<F: Forest> Path<F> {
/// Check the internal consistency of this path.
pub fn verify(&self, pool: &NodePool<F>) {
for level in 0..self.size {
match pool[self.node[level]] {
NodeData::Inner { size, tree, .. } => {
assert!(
level < self.size - 1,
"Expected leaf node at level {}",
level
);
assert!(
self.entry[level] <= size,
"OOB inner entry {}/{} at level {}",
self.entry[level],
size,
level
);
assert_eq!(
self.node[level + 1],
tree[usize::from(self.entry[level])],
"Node mismatch at level {}",
level
);
}
NodeData::Leaf { size, .. } => {
assert_eq!(level, self.size - 1, "Expected inner node");
assert!(
self.entry[level] <= size,
"OOB leaf entry {}/{}",
self.entry[level],
size,
);
}
NodeData::Free { .. } => {
panic!("Free {} in path", self.node[level]);
}
}
}
}
}
#[cfg(test)]
impl<F: Forest> fmt::Display for Path<F> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
if self.size == 0 {
write!(f, "<empty path>")
} else {
write!(f, "{}[{}]", self.node[0], self.entry[0])?;
for i in 1..self.size {
write!(f, "--{}[{}]", self.node[i], self.entry[i])?;
}
Ok(())
}
}
}
#[cfg(test)]
mod tests {
use super::super::{Forest, NodeData, NodePool};
use super::*;
use core::cmp::Ordering;
struct TC();
impl Comparator<i32> for TC {
fn cmp(&self, a: i32, b: i32) -> Ordering {
a.cmp(&b)
}
}
struct TF();
impl Forest for TF {
type Key = i32;
type Value = char;
type LeafKeys = [i32; 7];
type LeafValues = [char; 7];
fn splat_key(key: Self::Key) -> Self::LeafKeys {
[key; 7]
}
fn splat_value(value: Self::Value) -> Self::LeafValues {
[value; 7]
}
}
#[test]
fn search_single_leaf() {
// Testing Path::new() for trees with a single leaf node.
let mut pool = NodePool::<TF>::new();
let root = pool.alloc_node(NodeData::leaf(10, 'a'));
let mut p = Path::default();
let comp = TC();
// Search for key less than stored key.
assert_eq!(p.find(5, root, &pool, &comp), None);
assert_eq!(p.size, 1);
assert_eq!(p.node[0], root);
assert_eq!(p.entry[0], 0);
// Search for stored key.
assert_eq!(p.find(10, root, &pool, &comp), Some('a'));
assert_eq!(p.size, 1);
assert_eq!(p.node[0], root);
assert_eq!(p.entry[0], 0);
// Search for key greater than stored key.
assert_eq!(p.find(15, root, &pool, &comp), None);
assert_eq!(p.size, 1);
assert_eq!(p.node[0], root);
assert_eq!(p.entry[0], 1);
// Modify leaf node to contain two values.
match pool[root] {
NodeData::Leaf {
ref mut size,
ref mut keys,
ref mut vals,
} => {
*size = 2;
keys[1] = 20;
vals[1] = 'b';
}
_ => unreachable!(),
}
// Search for key between stored keys.
assert_eq!(p.find(15, root, &pool, &comp), None);
assert_eq!(p.size, 1);
assert_eq!(p.node[0], root);
assert_eq!(p.entry[0], 1);
// Search for key greater than stored keys.
assert_eq!(p.find(25, root, &pool, &comp), None);
assert_eq!(p.size, 1);
assert_eq!(p.node[0], root);
assert_eq!(p.entry[0], 2);
}
#[test]
fn search_single_inner() {
// Testing Path::new() for trees with a single inner node and two leaves.
let mut pool = NodePool::<TF>::new();
let leaf1 = pool.alloc_node(NodeData::leaf(10, 'a'));
let leaf2 = pool.alloc_node(NodeData::leaf(20, 'b'));
let root = pool.alloc_node(NodeData::inner(leaf1, 20, leaf2));
let mut p = Path::default();
let comp = TC();
// Search for key less than stored keys.
assert_eq!(p.find(5, root, &pool, &comp), None);
assert_eq!(p.size, 2);
assert_eq!(p.node[0], root);
assert_eq!(p.entry[0], 0);
assert_eq!(p.node[1], leaf1);
assert_eq!(p.entry[1], 0);
assert_eq!(p.find(10, root, &pool, &comp), Some('a'));
assert_eq!(p.size, 2);
assert_eq!(p.node[0], root);
assert_eq!(p.entry[0], 0);
assert_eq!(p.node[1], leaf1);
assert_eq!(p.entry[1], 0);
// Midway between the two leaf nodes.
assert_eq!(p.find(15, root, &pool, &comp), None);
assert_eq!(p.size, 2);
assert_eq!(p.node[0], root);
assert_eq!(p.entry[0], 0);
assert_eq!(p.node[1], leaf1);
assert_eq!(p.entry[1], 1);
assert_eq!(p.find(20, root, &pool, &comp), Some('b'));
assert_eq!(p.size, 2);
assert_eq!(p.node[0], root);
assert_eq!(p.entry[0], 1);
assert_eq!(p.node[1], leaf2);
assert_eq!(p.entry[1], 0);
assert_eq!(p.find(25, root, &pool, &comp), None);
assert_eq!(p.size, 2);
assert_eq!(p.node[0], root);
assert_eq!(p.entry[0], 1);
assert_eq!(p.node[1], leaf2);
assert_eq!(p.entry[1], 1);
}
}

View File

@@ -0,0 +1,218 @@
//! B+-tree node pool.
#[cfg(test)]
use super::Comparator;
use super::{Forest, Node, NodeData};
use crate::entity::PrimaryMap;
#[cfg(test)]
use core::fmt;
use core::ops::{Index, IndexMut};
/// A pool of nodes, including a free list.
pub(super) struct NodePool<F: Forest> {
nodes: PrimaryMap<Node, NodeData<F>>,
freelist: Option<Node>,
}
impl<F: Forest> NodePool<F> {
/// Allocate a new empty pool of nodes.
pub fn new() -> Self {
Self {
nodes: PrimaryMap::new(),
freelist: None,
}
}
/// Free all nodes.
pub fn clear(&mut self) {
self.nodes.clear();
self.freelist = None;
}
/// Allocate a new node containing `data`.
pub fn alloc_node(&mut self, data: NodeData<F>) -> Node {
debug_assert!(!data.is_free(), "can't allocate free node");
match self.freelist {
Some(node) => {
// Remove this node from the free list.
match self.nodes[node] {
NodeData::Free { next } => self.freelist = next,
_ => panic!("Invalid {} on free list", node),
}
self.nodes[node] = data;
node
}
None => {
// The free list is empty. Allocate a new node.
self.nodes.push(data)
}
}
}
/// Free a node.
pub fn free_node(&mut self, node: Node) {
// Quick check for a double free.
debug_assert!(!self.nodes[node].is_free(), "{} is already free", node);
self.nodes[node] = NodeData::Free {
next: self.freelist,
};
self.freelist = Some(node);
}
/// Free the entire tree rooted at `node`.
pub fn free_tree(&mut self, node: Node) {
if let NodeData::Inner { size, tree, .. } = self[node] {
// Note that we have to capture `tree` by value to avoid borrow checker trouble.
#[cfg_attr(feature = "cargo-clippy", allow(clippy::needless_range_loop))]
for i in 0..usize::from(size + 1) {
// Recursively free sub-trees. This recursion can never be deeper than `MAX_PATH`,
// and since most trees have less than a handful of nodes, it is worthwhile to
// avoid the heap allocation for an iterative tree traversal.
self.free_tree(tree[i]);
}
}
self.free_node(node);
}
}
#[cfg(test)]
impl<F: Forest> NodePool<F> {
/// Verify the consistency of the tree rooted at `node`.
pub fn verify_tree<C: Comparator<F::Key>>(&self, node: Node, comp: &C)
where
NodeData<F>: fmt::Display,
F::Key: fmt::Display,
{
use crate::entity::SparseSet;
use core::borrow::Borrow;
use core::cmp::Ordering;
use std::vec::Vec;
// The root node can't be an inner node with just a single sub-tree. It should have been
// pruned.
if let NodeData::Inner { size, .. } = self[node] {
assert!(size > 0, "Root must have more than one sub-tree");
}
let mut done = SparseSet::new();
let mut todo = Vec::new();
// Todo-list entries are:
// 1. Optional LHS key which must be <= all node entries.
// 2. The node reference.
// 3. Optional RHS key which must be > all node entries.
todo.push((None, node, None));
while let Some((lkey, node, rkey)) = todo.pop() {
assert_eq!(
done.insert(node),
None,
"Node appears more than once in tree"
);
let mut lower = lkey;
match self[node] {
NodeData::Inner { size, keys, tree } => {
let size = size as usize;
let capacity = tree.len();
let keys = &keys[0..size];
// Verify occupancy.
// Right-most nodes can be small, but others must be at least half full.
assert!(
rkey.is_none() || (size + 1) * 2 >= capacity,
"Only {}/{} entries in {}:{}, upper={}",
size + 1,
capacity,
node,
self[node],
rkey.unwrap()
);
// Queue up the sub-trees, checking for duplicates.
for i in 0..size + 1 {
// Get an upper bound for node[i].
let upper = keys.get(i).cloned().or(rkey);
// Check that keys are strictly monotonic.
if let (Some(a), Some(b)) = (lower, upper) {
assert_eq!(
comp.cmp(a, b),
Ordering::Less,
"Key order {} < {} failed in {}: {}",
a,
b,
node,
self[node]
);
}
// Queue up the sub-tree.
todo.push((lower, tree[i], upper));
// Set a lower bound for the next tree.
lower = upper;
}
}
NodeData::Leaf { size, keys, .. } => {
let size = size as usize;
let capacity = keys.borrow().len();
let keys = &keys.borrow()[0..size];
// Verify occupancy.
// Right-most nodes can be small, but others must be at least half full.
assert!(size > 0, "Leaf {} is empty", node);
assert!(
rkey.is_none() || size * 2 >= capacity,
"Only {}/{} entries in {}:{}, upper={}",
size,
capacity,
node,
self[node],
rkey.unwrap()
);
for i in 0..size + 1 {
let upper = keys.get(i).cloned().or(rkey);
// Check that keys are strictly monotonic.
if let (Some(a), Some(b)) = (lower, upper) {
let wanted = if i == 0 {
Ordering::Equal
} else {
Ordering::Less
};
assert_eq!(
comp.cmp(a, b),
wanted,
"Key order for {} - {} failed in {}: {}",
a,
b,
node,
self[node]
);
}
// Set a lower bound for the next key.
lower = upper;
}
}
NodeData::Free { .. } => panic!("Free {} reached", node),
}
}
}
}
impl<F: Forest> Index<Node> for NodePool<F> {
type Output = NodeData<F>;
fn index(&self, index: Node) -> &Self::Output {
self.nodes.index(index)
}
}
impl<F: Forest> IndexMut<Node> for NodePool<F> {
fn index_mut(&mut self, index: Node) -> &mut Self::Output {
self.nodes.index_mut(index)
}
}

View File

@@ -0,0 +1,598 @@
//! Forest of sets.
use super::{Comparator, Forest, Node, NodeData, NodePool, Path, SetValue, INNER_SIZE};
use crate::packed_option::PackedOption;
#[cfg(test)]
use core::fmt;
use core::marker::PhantomData;
#[cfg(test)]
use std::string::String;
/// Tag type defining forest types for a set.
struct SetTypes<K>(PhantomData<K>);
impl<K> Forest for SetTypes<K>
where
K: Copy,
{
type Key = K;
type Value = SetValue;
type LeafKeys = [K; 2 * INNER_SIZE - 1];
type LeafValues = [SetValue; 2 * INNER_SIZE - 1];
fn splat_key(key: Self::Key) -> Self::LeafKeys {
[key; 2 * INNER_SIZE - 1]
}
fn splat_value(value: Self::Value) -> Self::LeafValues {
[value; 2 * INNER_SIZE - 1]
}
}
/// Memory pool for a forest of `Set` instances.
pub struct SetForest<K>
where
K: Copy,
{
nodes: NodePool<SetTypes<K>>,
}
impl<K> SetForest<K>
where
K: Copy,
{
/// Create a new empty forest.
pub fn new() -> Self {
Self {
nodes: NodePool::new(),
}
}
/// Clear all sets in the forest.
///
/// All `Set` instances belong to this forest are invalidated and should no longer be used.
pub fn clear(&mut self) {
self.nodes.clear();
}
}
/// B-tree representing an ordered set of `K`s using `C` for comparing elements.
///
/// This is not a general-purpose replacement for `BTreeSet`. See the [module
/// documentation](index.html) for more information about design tradeoffs.
///
/// Sets can be cloned, but that operation should only be used as part of cloning the whole forest
/// they belong to. *Cloning a set does not allocate new memory for the clone*. It creates an alias
/// of the same memory.
#[derive(Clone)]
pub struct Set<K>
where
K: Copy,
{
root: PackedOption<Node>,
unused: PhantomData<K>,
}
impl<K> Set<K>
where
K: Copy,
{
/// Make an empty set.
pub fn new() -> Self {
Self {
root: None.into(),
unused: PhantomData,
}
}
/// Is this an empty set?
pub fn is_empty(&self) -> bool {
self.root.is_none()
}
/// Does the set contain `key`?.
pub fn contains<C: Comparator<K>>(&self, key: K, forest: &SetForest<K>, comp: &C) -> bool {
self.root
.expand()
.and_then(|root| Path::default().find(key, root, &forest.nodes, comp))
.is_some()
}
/// Try to insert `key` into the set.
///
/// If the set did not contain `key`, insert it and return true.
///
/// If `key` is already present, don't change the set and return false.
pub fn insert<C: Comparator<K>>(
&mut self,
key: K,
forest: &mut SetForest<K>,
comp: &C,
) -> bool {
self.cursor(forest, comp).insert(key)
}
/// Remove `key` from the set and return true.
///
/// If `key` was not present in the set, return false.
pub fn remove<C: Comparator<K>>(
&mut self,
key: K,
forest: &mut SetForest<K>,
comp: &C,
) -> bool {
let mut c = self.cursor(forest, comp);
if c.goto(key) {
c.remove();
true
} else {
false
}
}
/// Remove all entries.
pub fn clear(&mut self, forest: &mut SetForest<K>) {
if let Some(root) = self.root.take() {
forest.nodes.free_tree(root);
}
}
/// Retains only the elements specified by the predicate.
///
/// Remove all elements where the predicate returns false.
pub fn retain<F>(&mut self, forest: &mut SetForest<K>, mut predicate: F)
where
F: FnMut(K) -> bool,
{
let mut path = Path::default();
if let Some(root) = self.root.expand() {
path.first(root, &forest.nodes);
}
while let Some((node, entry)) = path.leaf_pos() {
if predicate(forest.nodes[node].unwrap_leaf().0[entry]) {
path.next(&forest.nodes);
} else {
self.root = path.remove(&mut forest.nodes).into();
}
}
}
/// Create a cursor for navigating this set. The cursor is initially positioned off the end of
/// the set.
pub fn cursor<'a, C: Comparator<K>>(
&'a mut self,
forest: &'a mut SetForest<K>,
comp: &'a C,
) -> SetCursor<'a, K, C> {
SetCursor::new(self, forest, comp)
}
/// Create an iterator traversing this set. The iterator type is `K`.
pub fn iter<'a>(&'a self, forest: &'a SetForest<K>) -> SetIter<'a, K> {
SetIter {
root: self.root,
pool: &forest.nodes,
path: Path::default(),
}
}
}
impl<K> Default for Set<K>
where
K: Copy,
{
fn default() -> Self {
Self::new()
}
}
/// A position in a `Set` used to navigate and modify the ordered set.
///
/// A cursor always points at an element in the set, or "off the end" which is a position after the
/// last element in the set.
pub struct SetCursor<'a, K, C>
where
K: 'a + Copy,
C: 'a + Comparator<K>,
{
root: &'a mut PackedOption<Node>,
pool: &'a mut NodePool<SetTypes<K>>,
comp: &'a C,
path: Path<SetTypes<K>>,
}
impl<'a, K, C> SetCursor<'a, K, C>
where
K: Copy,
C: Comparator<K>,
{
/// Create a cursor with a default (invalid) location.
fn new(container: &'a mut Set<K>, forest: &'a mut SetForest<K>, comp: &'a C) -> Self {
Self {
root: &mut container.root,
pool: &mut forest.nodes,
comp,
path: Path::default(),
}
}
/// Is this cursor pointing to an empty set?
pub fn is_empty(&self) -> bool {
self.root.is_none()
}
/// Move cursor to the next element and return it.
///
/// If the cursor reaches the end, return `None` and leave the cursor at the off-the-end
/// position.
#[cfg_attr(feature = "cargo-clippy", allow(clippy::should_implement_trait))]
pub fn next(&mut self) -> Option<K> {
self.path.next(self.pool).map(|(k, _)| k)
}
/// Move cursor to the previous element and return it.
///
/// If the cursor is already pointing at the first element, leave it there and return `None`.
pub fn prev(&mut self) -> Option<K> {
self.root
.expand()
.and_then(|root| self.path.prev(root, self.pool).map(|(k, _)| k))
}
/// Get the current element, or `None` if the cursor is at the end.
pub fn elem(&self) -> Option<K> {
self.path
.leaf_pos()
.and_then(|(node, entry)| self.pool[node].unwrap_leaf().0.get(entry).cloned())
}
/// Move this cursor to `elem`.
///
/// If `elem` is in the set, place the cursor at `elem` and return true.
///
/// If `elem` is not in the set, place the cursor at the next larger element (or the end) and
/// return false.
pub fn goto(&mut self, elem: K) -> bool {
match self.root.expand() {
None => false,
Some(root) => {
if self.path.find(elem, root, self.pool, self.comp).is_some() {
true
} else {
self.path.normalize(self.pool);
false
}
}
}
}
/// Move this cursor to the first element.
pub fn goto_first(&mut self) -> Option<K> {
self.root.map(|root| self.path.first(root, self.pool).0)
}
/// Try to insert `elem` into the set and leave the cursor at the inserted element.
///
/// If the set did not contain `elem`, insert it and return true.
///
/// If `elem` is already present, don't change the set, place the cursor at `goto(elem)`, and
/// return false.
pub fn insert(&mut self, elem: K) -> bool {
match self.root.expand() {
None => {
let root = self.pool.alloc_node(NodeData::leaf(elem, SetValue()));
*self.root = root.into();
self.path.set_root_node(root);
true
}
Some(root) => {
// TODO: Optimize the case where `self.path` is already at the correct insert pos.
if self.path.find(elem, root, self.pool, self.comp).is_none() {
*self.root = self.path.insert(elem, SetValue(), self.pool).into();
true
} else {
false
}
}
}
}
/// Remove the current element (if any) and return it.
/// This advances the cursor to the next element after the removed one.
pub fn remove(&mut self) -> Option<K> {
let elem = self.elem();
if elem.is_some() {
*self.root = self.path.remove(self.pool).into();
}
elem
}
}
#[cfg(test)]
impl<'a, K, C> SetCursor<'a, K, C>
where
K: Copy + fmt::Display,
C: Comparator<K>,
{
fn verify(&self) {
self.path.verify(self.pool);
self.root.map(|root| self.pool.verify_tree(root, self.comp));
}
/// Get a text version of the path to the current position.
fn tpath(&self) -> String {
use std::string::ToString;
self.path.to_string()
}
}
/// An iterator visiting the elements of a `Set`.
pub struct SetIter<'a, K>
where
K: 'a + Copy,
{
root: PackedOption<Node>,
pool: &'a NodePool<SetTypes<K>>,
path: Path<SetTypes<K>>,
}
impl<'a, K> Iterator for SetIter<'a, K>
where
K: 'a + Copy,
{
type Item = K;
fn next(&mut self) -> Option<Self::Item> {
// We use `self.root` to indicate if we need to go to the first element. Reset to `None`
// once we've returned the first element. This also works for an empty tree since the
// `path.next()` call returns `None` when the path is empty. This also fuses the iterator.
match self.root.take() {
Some(root) => Some(self.path.first(root, self.pool).0),
None => self.path.next(self.pool).map(|(k, _)| k),
}
}
}
#[cfg(test)]
mod tests {
use super::super::NodeData;
use super::*;
use core::mem;
use std::vec::Vec;
#[test]
fn node_size() {
// check that nodes are cache line sized when keys are 32 bits.
type F = SetTypes<u32>;
assert_eq!(mem::size_of::<NodeData<F>>(), 64);
}
#[test]
fn empty() {
let mut f = SetForest::<u32>::new();
f.clear();
let mut s = Set::<u32>::new();
assert!(s.is_empty());
s.clear(&mut f);
assert!(!s.contains(7, &f, &()));
// Iterator for an empty set.
assert_eq!(s.iter(&f).next(), None);
s.retain(&mut f, |_| unreachable!());
let mut c = SetCursor::new(&mut s, &mut f, &());
c.verify();
assert_eq!(c.elem(), None);
assert_eq!(c.goto_first(), None);
assert_eq!(c.tpath(), "<empty path>");
}
#[test]
fn simple_cursor() {
let mut f = SetForest::<u32>::new();
let mut s = Set::<u32>::new();
let mut c = SetCursor::new(&mut s, &mut f, &());
assert!(c.insert(50));
c.verify();
assert_eq!(c.elem(), Some(50));
assert!(c.insert(100));
c.verify();
assert_eq!(c.elem(), Some(100));
assert!(c.insert(10));
c.verify();
assert_eq!(c.elem(), Some(10));
// Basic movement.
assert_eq!(c.next(), Some(50));
assert_eq!(c.next(), Some(100));
assert_eq!(c.next(), None);
assert_eq!(c.next(), None);
assert_eq!(c.prev(), Some(100));
assert_eq!(c.prev(), Some(50));
assert_eq!(c.prev(), Some(10));
assert_eq!(c.prev(), None);
assert_eq!(c.prev(), None);
assert!(c.goto(50));
assert_eq!(c.elem(), Some(50));
assert_eq!(c.remove(), Some(50));
c.verify();
assert_eq!(c.elem(), Some(100));
assert_eq!(c.remove(), Some(100));
c.verify();
assert_eq!(c.elem(), None);
assert_eq!(c.remove(), None);
c.verify();
}
#[test]
fn two_level_sparse_tree() {
let mut f = SetForest::<u32>::new();
let mut s = Set::<u32>::new();
let mut c = SetCursor::new(&mut s, &mut f, &());
// Insert enough elements that we get a two-level tree.
// Each leaf node holds 8 elements
assert!(c.is_empty());
for i in 0..50 {
assert!(c.insert(i));
assert_eq!(c.elem(), Some(i));
}
assert!(!c.is_empty());
assert_eq!(c.goto_first(), Some(0));
assert_eq!(c.tpath(), "node2[0]--node0[0]");
assert_eq!(c.prev(), None);
for i in 1..50 {
assert_eq!(c.next(), Some(i));
}
assert_eq!(c.next(), None);
for i in (0..50).rev() {
assert_eq!(c.prev(), Some(i));
}
assert_eq!(c.prev(), None);
assert!(c.goto(25));
for i in 25..50 {
assert_eq!(c.remove(), Some(i));
assert!(!c.is_empty());
c.verify();
}
for i in (0..25).rev() {
assert!(!c.is_empty());
assert_eq!(c.elem(), None);
assert_eq!(c.prev(), Some(i));
assert_eq!(c.remove(), Some(i));
c.verify();
}
assert_eq!(c.elem(), None);
assert!(c.is_empty());
}
#[test]
fn three_level_sparse_tree() {
let mut f = SetForest::<u32>::new();
let mut s = Set::<u32>::new();
let mut c = SetCursor::new(&mut s, &mut f, &());
// Insert enough elements that we get a 3-level tree.
// Each leaf node holds 8 elements when filled up sequentially.
// Inner nodes hold 8 node pointers.
assert!(c.is_empty());
for i in 0..150 {
assert!(c.insert(i));
assert_eq!(c.elem(), Some(i));
}
assert!(!c.is_empty());
assert!(c.goto(0));
assert_eq!(c.tpath(), "node11[0]--node2[0]--node0[0]");
assert_eq!(c.prev(), None);
for i in 1..150 {
assert_eq!(c.next(), Some(i));
}
assert_eq!(c.next(), None);
for i in (0..150).rev() {
assert_eq!(c.prev(), Some(i));
}
assert_eq!(c.prev(), None);
assert!(c.goto(125));
for i in 125..150 {
assert_eq!(c.remove(), Some(i));
assert!(!c.is_empty());
c.verify();
}
for i in (0..125).rev() {
assert!(!c.is_empty());
assert_eq!(c.elem(), None);
assert_eq!(c.prev(), Some(i));
assert_eq!(c.remove(), Some(i));
c.verify();
}
assert_eq!(c.elem(), None);
assert!(c.is_empty());
}
// Generate a densely populated 4-level tree.
//
// Level 1: 1 root
// Level 2: 8 inner
// Level 3: 64 inner
// Level 4: 512 leafs, up to 7680 elements
//
// A 3-level tree can hold at most 960 elements.
fn dense4l(f: &mut SetForest<i32>) -> Set<i32> {
f.clear();
let mut s = Set::new();
// Insert 400 elements in 7 passes over the range to avoid the half-full leaf node pattern
// that comes from sequential insertion. This will generate a normal leaf layer.
for n in 0..4000 {
assert!(s.insert((n * 7) % 4000, f, &()));
}
s
}
#[test]
fn four_level() {
let mut f = SetForest::<i32>::new();
let mut s = dense4l(&mut f);
assert_eq!(
s.iter(&f).collect::<Vec<_>>()[0..10],
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
);
let mut c = s.cursor(&mut f, &());
c.verify();
// Peel off a whole sub-tree of the root by deleting from the front.
// The 900 element is near the front of the second sub-tree.
assert!(c.goto(900));
assert_eq!(c.tpath(), "node48[1]--node47[0]--node26[0]--node20[4]");
assert!(c.goto(0));
for i in 0..900 {
assert!(!c.is_empty());
assert_eq!(c.remove(), Some(i));
}
c.verify();
assert_eq!(c.elem(), Some(900));
// Delete backwards from somewhere in the middle.
assert!(c.goto(3000));
for i in (2000..3000).rev() {
assert_eq!(c.prev(), Some(i));
assert_eq!(c.remove(), Some(i));
assert_eq!(c.elem(), Some(3000));
}
c.verify();
// Remove everything in a scattered manner, triggering many collapsing patterns.
for i in 0..4000 {
if c.goto((i * 7) % 4000) {
c.remove();
}
}
assert!(c.is_empty());
}
#[test]
fn four_level_clear() {
let mut f = SetForest::<i32>::new();
let mut s = dense4l(&mut f);
s.clear(&mut f);
}
}