* egraphs: a few miscellaneous compile-time optimizations. These optimizations together are worth about a 2% compile-time reduction, as measured on one core with spidermonkey.wasm as an input, using `hyperfine` on `wasmtime compile`. The changes included are: - Some better pre-allocation (blockparams and side-effects concatenated list vecs); - Avoiding the indirection of storing list-of-types for every Pure and Inst node, when almost all nodes produce only a single result; instead, store arity and single type if it exists, and allow result projection nodes to fill in types otherwise; - Pack the `MemoryState` enum into one `u32` (this together with the above removal of the type slice allows `Node` to shrink from 48 bytes to 32 bytes); - always-inline an accessor (`entry` on `CtxHash`) that wasn't (`always(inline)` appears to be load-bearing, rather than just `inline`); - Split the update-analysis path into two hotpaths, one for the union case and one for the new-node case (and the former can avoid recomputing for the contained node when replacing a node with node-and-child eclass entry). * Review feedback. * Fix test build. * Fix to lowering when unused output with invalid type is present.
282 lines
8.2 KiB
Rust
282 lines
8.2 KiB
Rust
//! A hashmap with "external hashing": nodes are hashed or compared for
|
|
//! equality only with some external context provided on lookup/insert.
|
|
//! This allows very memory-efficient data structures where
|
|
//! node-internal data references some other storage (e.g., offsets into
|
|
//! an array or pool of shared data).
|
|
|
|
use super::unionfind::UnionFind;
|
|
use hashbrown::raw::{Bucket, RawTable};
|
|
use std::hash::{Hash, Hasher};
|
|
use std::marker::PhantomData;
|
|
|
|
/// Trait that allows for equality comparison given some external
|
|
/// context.
|
|
///
|
|
/// Note that this trait is implemented by the *context*, rather than
|
|
/// the item type, for somewhat complex lifetime reasons (lack of GATs
|
|
/// to allow `for<'ctx> Ctx<'ctx>`-like associated types in traits on
|
|
/// the value type).
|
|
///
|
|
/// Furthermore, the `ctx_eq` method includes a `UnionFind` parameter,
|
|
/// because in practice we require this and a borrow to it cannot be
|
|
/// included in the context type without GATs (similarly to above).
|
|
pub trait CtxEq<V1: ?Sized, V2: ?Sized> {
|
|
/// Determine whether `a` and `b` are equal, given the context in
|
|
/// `self` and the union-find data structure `uf`.
|
|
fn ctx_eq(&self, a: &V1, b: &V2, uf: &mut UnionFind) -> bool;
|
|
}
|
|
|
|
/// Trait that allows for hashing given some external context.
|
|
pub trait CtxHash<Value: ?Sized>: CtxEq<Value, Value> {
|
|
/// Compute the hash of `value`, given the context in `self` and
|
|
/// the union-find data structure `uf`.
|
|
fn ctx_hash(&self, value: &Value, uf: &mut UnionFind) -> u64;
|
|
}
|
|
|
|
/// A null-comparator context type for underlying value types that
|
|
/// already have `Eq` and `Hash`.
|
|
#[derive(Default)]
|
|
pub struct NullCtx;
|
|
|
|
impl<V: Eq + Hash> CtxEq<V, V> for NullCtx {
|
|
fn ctx_eq(&self, a: &V, b: &V, _: &mut UnionFind) -> bool {
|
|
a.eq(b)
|
|
}
|
|
}
|
|
impl<V: Eq + Hash> CtxHash<V> for NullCtx {
|
|
fn ctx_hash(&self, value: &V, _: &mut UnionFind) -> u64 {
|
|
let mut state = fxhash::FxHasher::default();
|
|
value.hash(&mut state);
|
|
state.finish()
|
|
}
|
|
}
|
|
|
|
/// A bucket in the hash table.
|
|
///
|
|
/// Some performance-related design notes: we cache the hashcode for
|
|
/// speed, as this often buys a few percent speed in
|
|
/// interning-table-heavy workloads. We only keep the low 32 bits of
|
|
/// the hashcode, for memory efficiency: in common use, `K` and `V`
|
|
/// are often 32 bits also, and a 12-byte bucket is measurably better
|
|
/// than a 16-byte bucket.
|
|
struct BucketData<K, V> {
|
|
hash: u32,
|
|
k: K,
|
|
v: V,
|
|
}
|
|
|
|
/// A HashMap that takes external context for all operations.
|
|
pub struct CtxHashMap<K, V> {
|
|
raw: RawTable<BucketData<K, V>>,
|
|
}
|
|
|
|
impl<K, V> CtxHashMap<K, V> {
|
|
/// Create an empty hashmap.
|
|
pub fn new() -> Self {
|
|
Self {
|
|
raw: RawTable::new(),
|
|
}
|
|
}
|
|
|
|
/// Create an empty hashmap with pre-allocated space for the given
|
|
/// capacity.
|
|
pub fn with_capacity(capacity: usize) -> Self {
|
|
Self {
|
|
raw: RawTable::with_capacity(capacity),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<K, V> CtxHashMap<K, V> {
|
|
/// Insert a new key-value pair, returning the old value associated
|
|
/// with this key (if any).
|
|
pub fn insert<Ctx: CtxEq<K, K> + CtxHash<K>>(
|
|
&mut self,
|
|
k: K,
|
|
v: V,
|
|
ctx: &Ctx,
|
|
uf: &mut UnionFind,
|
|
) -> Option<V> {
|
|
let hash = ctx.ctx_hash(&k, uf) as u32;
|
|
match self.raw.find(hash as u64, |bucket| {
|
|
hash == bucket.hash && ctx.ctx_eq(&bucket.k, &k, uf)
|
|
}) {
|
|
Some(bucket) => {
|
|
let data = unsafe { bucket.as_mut() };
|
|
Some(std::mem::replace(&mut data.v, v))
|
|
}
|
|
None => {
|
|
let data = BucketData { hash, k, v };
|
|
self.raw
|
|
.insert_entry(hash as u64, data, |bucket| bucket.hash as u64);
|
|
None
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Look up a key, returning a borrow of the value if present.
|
|
pub fn get<'a, Q, Ctx: CtxEq<K, Q> + CtxHash<Q> + CtxHash<K>>(
|
|
&'a self,
|
|
k: &Q,
|
|
ctx: &Ctx,
|
|
uf: &mut UnionFind,
|
|
) -> Option<&'a V> {
|
|
let hash = ctx.ctx_hash(k, uf) as u32;
|
|
self.raw
|
|
.find(hash as u64, |bucket| {
|
|
hash == bucket.hash && ctx.ctx_eq(&bucket.k, k, uf)
|
|
})
|
|
.map(|bucket| {
|
|
let data = unsafe { bucket.as_ref() };
|
|
&data.v
|
|
})
|
|
}
|
|
|
|
/// Return an Entry cursor on a given bucket for a key, allowing
|
|
/// for fetching the current value or inserting a new one.
|
|
#[inline(always)]
|
|
pub fn entry<'a, Ctx: CtxEq<K, K> + CtxHash<K>>(
|
|
&'a mut self,
|
|
k: K,
|
|
ctx: &'a Ctx,
|
|
uf: &mut UnionFind,
|
|
) -> Entry<'a, K, V> {
|
|
let hash = ctx.ctx_hash(&k, uf) as u32;
|
|
match self.raw.find(hash as u64, |bucket| {
|
|
hash == bucket.hash && ctx.ctx_eq(&bucket.k, &k, uf)
|
|
}) {
|
|
Some(bucket) => Entry::Occupied(OccupiedEntry {
|
|
bucket,
|
|
_phantom: PhantomData,
|
|
}),
|
|
None => Entry::Vacant(VacantEntry {
|
|
raw: &mut self.raw,
|
|
hash,
|
|
key: k,
|
|
}),
|
|
}
|
|
}
|
|
}
|
|
|
|
/// An entry in the hashmap.
|
|
pub enum Entry<'a, K: 'a, V> {
|
|
Occupied(OccupiedEntry<'a, K, V>),
|
|
Vacant(VacantEntry<'a, K, V>),
|
|
}
|
|
|
|
/// An occupied entry.
|
|
pub struct OccupiedEntry<'a, K, V> {
|
|
bucket: Bucket<BucketData<K, V>>,
|
|
_phantom: PhantomData<&'a ()>,
|
|
}
|
|
|
|
impl<'a, K: 'a, V> OccupiedEntry<'a, K, V> {
|
|
/// Get the value.
|
|
pub fn get(&self) -> &'a V {
|
|
let bucket = unsafe { self.bucket.as_ref() };
|
|
&bucket.v
|
|
}
|
|
}
|
|
|
|
/// A vacant entry.
|
|
pub struct VacantEntry<'a, K, V> {
|
|
raw: &'a mut RawTable<BucketData<K, V>>,
|
|
hash: u32,
|
|
key: K,
|
|
}
|
|
|
|
impl<'a, K, V> VacantEntry<'a, K, V> {
|
|
/// Insert a value.
|
|
pub fn insert(self, v: V) -> &'a V {
|
|
let bucket = self.raw.insert(
|
|
self.hash as u64,
|
|
BucketData {
|
|
hash: self.hash,
|
|
k: self.key,
|
|
v,
|
|
},
|
|
|bucket| bucket.hash as u64,
|
|
);
|
|
let data = unsafe { bucket.as_ref() };
|
|
&data.v
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod test {
|
|
use super::*;
|
|
use std::hash::Hash;
|
|
|
|
#[derive(Clone, Copy, Debug)]
|
|
struct Key {
|
|
index: u32,
|
|
}
|
|
struct Ctx {
|
|
vals: &'static [&'static str],
|
|
}
|
|
impl CtxEq<Key, Key> for Ctx {
|
|
fn ctx_eq(&self, a: &Key, b: &Key, _: &mut UnionFind) -> bool {
|
|
self.vals[a.index as usize].eq(self.vals[b.index as usize])
|
|
}
|
|
}
|
|
impl CtxHash<Key> for Ctx {
|
|
fn ctx_hash(&self, value: &Key, _: &mut UnionFind) -> u64 {
|
|
let mut state = fxhash::FxHasher::default();
|
|
self.vals[value.index as usize].hash(&mut state);
|
|
state.finish()
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_basic() {
|
|
let ctx = Ctx {
|
|
vals: &["a", "b", "a"],
|
|
};
|
|
let mut uf = UnionFind::new();
|
|
|
|
let k0 = Key { index: 0 };
|
|
let k1 = Key { index: 1 };
|
|
let k2 = Key { index: 2 };
|
|
|
|
assert!(ctx.ctx_eq(&k0, &k2, &mut uf));
|
|
assert!(!ctx.ctx_eq(&k0, &k1, &mut uf));
|
|
assert!(!ctx.ctx_eq(&k2, &k1, &mut uf));
|
|
|
|
let mut map: CtxHashMap<Key, u64> = CtxHashMap::new();
|
|
assert_eq!(map.insert(k0, 42, &ctx, &mut uf), None);
|
|
assert_eq!(map.insert(k2, 84, &ctx, &mut uf), Some(42));
|
|
assert_eq!(map.get(&k1, &ctx, &mut uf), None);
|
|
assert_eq!(*map.get(&k0, &ctx, &mut uf).unwrap(), 84);
|
|
}
|
|
|
|
#[test]
|
|
fn test_entry() {
|
|
let mut ctx = Ctx {
|
|
vals: &["a", "b", "a"],
|
|
};
|
|
let mut uf = UnionFind::new();
|
|
|
|
let k0 = Key { index: 0 };
|
|
let k1 = Key { index: 1 };
|
|
let k2 = Key { index: 2 };
|
|
|
|
let mut map: CtxHashMap<Key, u64> = CtxHashMap::new();
|
|
match map.entry(k0, &mut ctx, &mut uf) {
|
|
Entry::Vacant(v) => {
|
|
v.insert(1);
|
|
}
|
|
_ => panic!(),
|
|
}
|
|
match map.entry(k1, &mut ctx, &mut uf) {
|
|
Entry::Vacant(_) => {}
|
|
Entry::Occupied(_) => panic!(),
|
|
}
|
|
match map.entry(k2, &mut ctx, &mut uf) {
|
|
Entry::Occupied(o) => {
|
|
assert_eq!(*o.get(), 1);
|
|
}
|
|
_ => panic!(),
|
|
}
|
|
}
|
|
}
|