//! A hashmap with "external hashing": nodes are hashed or compared for //! equality only with some external context provided on lookup/insert. //! This allows very memory-efficient data structures where //! node-internal data references some other storage (e.g., offsets into //! an array or pool of shared data). use super::unionfind::UnionFind; use hashbrown::raw::{Bucket, RawTable}; use std::hash::{Hash, Hasher}; use std::marker::PhantomData; /// Trait that allows for equality comparison given some external /// context. /// /// Note that this trait is implemented by the *context*, rather than /// the item type, for somewhat complex lifetime reasons (lack of GATs /// to allow `for<'ctx> Ctx<'ctx>`-like associated types in traits on /// the value type). /// /// Furthermore, the `ctx_eq` method includes a `UnionFind` parameter, /// because in practice we require this and a borrow to it cannot be /// included in the context type without GATs (similarly to above). pub trait CtxEq { /// Determine whether `a` and `b` are equal, given the context in /// `self` and the union-find data structure `uf`. fn ctx_eq(&self, a: &V1, b: &V2, uf: &mut UnionFind) -> bool; } /// Trait that allows for hashing given some external context. pub trait CtxHash: CtxEq { /// Compute the hash of `value`, given the context in `self` and /// the union-find data structure `uf`. fn ctx_hash(&self, value: &Value, uf: &mut UnionFind) -> u64; } /// A null-comparator context type for underlying value types that /// already have `Eq` and `Hash`. #[derive(Default)] pub struct NullCtx; impl CtxEq for NullCtx { fn ctx_eq(&self, a: &V, b: &V, _: &mut UnionFind) -> bool { a.eq(b) } } impl CtxHash for NullCtx { fn ctx_hash(&self, value: &V, _: &mut UnionFind) -> u64 { let mut state = fxhash::FxHasher::default(); value.hash(&mut state); state.finish() } } /// A bucket in the hash table. /// /// Some performance-related design notes: we cache the hashcode for /// speed, as this often buys a few percent speed in /// interning-table-heavy workloads. We only keep the low 32 bits of /// the hashcode, for memory efficiency: in common use, `K` and `V` /// are often 32 bits also, and a 12-byte bucket is measurably better /// than a 16-byte bucket. struct BucketData { hash: u32, k: K, v: V, } /// A HashMap that takes external context for all operations. pub struct CtxHashMap { raw: RawTable>, } impl CtxHashMap { /// Create an empty hashmap. pub fn new() -> Self { Self { raw: RawTable::new(), } } /// Create an empty hashmap with pre-allocated space for the given /// capacity. pub fn with_capacity(capacity: usize) -> Self { Self { raw: RawTable::with_capacity(capacity), } } } impl CtxHashMap { /// Insert a new key-value pair, returning the old value associated /// with this key (if any). pub fn insert + CtxHash>( &mut self, k: K, v: V, ctx: &Ctx, uf: &mut UnionFind, ) -> Option { let hash = ctx.ctx_hash(&k, uf) as u32; match self.raw.find(hash as u64, |bucket| { hash == bucket.hash && ctx.ctx_eq(&bucket.k, &k, uf) }) { Some(bucket) => { let data = unsafe { bucket.as_mut() }; Some(std::mem::replace(&mut data.v, v)) } None => { let data = BucketData { hash, k, v }; self.raw .insert_entry(hash as u64, data, |bucket| bucket.hash as u64); None } } } /// Look up a key, returning a borrow of the value if present. pub fn get<'a, Q, Ctx: CtxEq + CtxHash + CtxHash>( &'a self, k: &Q, ctx: &Ctx, uf: &mut UnionFind, ) -> Option<&'a V> { let hash = ctx.ctx_hash(k, uf) as u32; self.raw .find(hash as u64, |bucket| { hash == bucket.hash && ctx.ctx_eq(&bucket.k, k, uf) }) .map(|bucket| { let data = unsafe { bucket.as_ref() }; &data.v }) } /// Return an Entry cursor on a given bucket for a key, allowing /// for fetching the current value or inserting a new one. #[inline(always)] pub fn entry<'a, Ctx: CtxEq + CtxHash>( &'a mut self, k: K, ctx: &'a Ctx, uf: &mut UnionFind, ) -> Entry<'a, K, V> { let hash = ctx.ctx_hash(&k, uf) as u32; match self.raw.find(hash as u64, |bucket| { hash == bucket.hash && ctx.ctx_eq(&bucket.k, &k, uf) }) { Some(bucket) => Entry::Occupied(OccupiedEntry { bucket, _phantom: PhantomData, }), None => Entry::Vacant(VacantEntry { raw: &mut self.raw, hash, key: k, }), } } } /// An entry in the hashmap. pub enum Entry<'a, K: 'a, V> { Occupied(OccupiedEntry<'a, K, V>), Vacant(VacantEntry<'a, K, V>), } /// An occupied entry. pub struct OccupiedEntry<'a, K, V> { bucket: Bucket>, _phantom: PhantomData<&'a ()>, } impl<'a, K: 'a, V> OccupiedEntry<'a, K, V> { /// Get the value. pub fn get(&self) -> &'a V { let bucket = unsafe { self.bucket.as_ref() }; &bucket.v } } /// A vacant entry. pub struct VacantEntry<'a, K, V> { raw: &'a mut RawTable>, hash: u32, key: K, } impl<'a, K, V> VacantEntry<'a, K, V> { /// Insert a value. pub fn insert(self, v: V) -> &'a V { let bucket = self.raw.insert( self.hash as u64, BucketData { hash: self.hash, k: self.key, v, }, |bucket| bucket.hash as u64, ); let data = unsafe { bucket.as_ref() }; &data.v } } #[cfg(test)] mod test { use super::*; use std::hash::Hash; #[derive(Clone, Copy, Debug)] struct Key { index: u32, } struct Ctx { vals: &'static [&'static str], } impl CtxEq for Ctx { fn ctx_eq(&self, a: &Key, b: &Key, _: &mut UnionFind) -> bool { self.vals[a.index as usize].eq(self.vals[b.index as usize]) } } impl CtxHash for Ctx { fn ctx_hash(&self, value: &Key, _: &mut UnionFind) -> u64 { let mut state = fxhash::FxHasher::default(); self.vals[value.index as usize].hash(&mut state); state.finish() } } #[test] fn test_basic() { let ctx = Ctx { vals: &["a", "b", "a"], }; let mut uf = UnionFind::new(); let k0 = Key { index: 0 }; let k1 = Key { index: 1 }; let k2 = Key { index: 2 }; assert!(ctx.ctx_eq(&k0, &k2, &mut uf)); assert!(!ctx.ctx_eq(&k0, &k1, &mut uf)); assert!(!ctx.ctx_eq(&k2, &k1, &mut uf)); let mut map: CtxHashMap = CtxHashMap::new(); assert_eq!(map.insert(k0, 42, &ctx, &mut uf), None); assert_eq!(map.insert(k2, 84, &ctx, &mut uf), Some(42)); assert_eq!(map.get(&k1, &ctx, &mut uf), None); assert_eq!(*map.get(&k0, &ctx, &mut uf).unwrap(), 84); } #[test] fn test_entry() { let mut ctx = Ctx { vals: &["a", "b", "a"], }; let mut uf = UnionFind::new(); let k0 = Key { index: 0 }; let k1 = Key { index: 1 }; let k2 = Key { index: 2 }; let mut map: CtxHashMap = CtxHashMap::new(); match map.entry(k0, &mut ctx, &mut uf) { Entry::Vacant(v) => { v.insert(1); } _ => panic!(), } match map.entry(k1, &mut ctx, &mut uf) { Entry::Vacant(_) => {} Entry::Occupied(_) => panic!(), } match map.entry(k2, &mut ctx, &mut uf) { Entry::Occupied(o) => { assert_eq!(*o.get(), 1); } _ => panic!(), } } }