diff --git a/Cargo.lock b/Cargo.lock index 597d049030..c53c263e93 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -557,6 +557,18 @@ dependencies = [ name = "cranelift-codegen-shared" version = "0.89.0" +[[package]] +name = "cranelift-egraph" +version = "0.89.0" +dependencies = [ + "cranelift-entity", + "fxhash", + "hashbrown", + "indexmap", + "log", + "smallvec", +] + [[package]] name = "cranelift-entity" version = "0.89.0" @@ -1328,9 +1340,9 @@ checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" [[package]] name = "hashbrown" -version = "0.12.1" +version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db0d4cf898abf0081f964436dc980e96670a0f36863e4b83aaacdb65c9d7ccc3" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" dependencies = [ "ahash", ] diff --git a/Cargo.toml b/Cargo.toml index 764be58102..9c6d4d8fff 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -77,6 +77,7 @@ opt-level = 0 resolver = '2' members = [ "cranelift", + "cranelift/egraph", "cranelift/isle/fuzz", "cranelift/isle/islec", "cranelift/serde", diff --git a/cranelift/egraph/Cargo.toml b/cranelift/egraph/Cargo.toml new file mode 100644 index 0000000000..819ad25159 --- /dev/null +++ b/cranelift/egraph/Cargo.toml @@ -0,0 +1,24 @@ +[package] +authors = ["The Cranelift Project Developers"] +name = "cranelift-egraph" +version = "0.89.0" +description = "acyclic-egraph (aegraph) implementation for Cranelift" +license = "Apache-2.0 WITH LLVM-exception" +documentation = "https://docs.rs/cranelift-egraph" +repository = "https://github.com/bytecodealliance/wasmtime" +edition = "2021" + +[dependencies] +cranelift-entity = { path = "../entity", version = "0.89.0" } +log = { version = "0.4.6", default-features = false } +smallvec = { version = "1.6.1" } +indexmap = { version = "1.9.1" } +hashbrown = { version = "0.12.2", features = ["raw"] } +fxhash = "0.2.1" + +[features] +default = [] + +# Enable detailed trace-level debug logging. Excluded by default to +# omit the dynamic overhead of checking the logging level. +trace-log = [] \ No newline at end of file diff --git a/cranelift/egraph/src/bumpvec.rs b/cranelift/egraph/src/bumpvec.rs new file mode 100644 index 0000000000..7c8d210cb9 --- /dev/null +++ b/cranelift/egraph/src/bumpvec.rs @@ -0,0 +1,524 @@ +//! Vectors allocated in arenas, with small per-vector overhead. + +use std::marker::PhantomData; +use std::mem::MaybeUninit; +use std::ops::Range; + +/// A vector of `T` stored within a `BumpArena`. +/// +/// This is something like a normal `Vec`, except that all accesses +/// and updates require a separate borrow of the `BumpArena`. This, in +/// turn, makes the Vec itself very compact: only three `u32`s (12 +/// bytes). The `BumpSlice` variant is only two `u32`s (8 bytes) and +/// is sufficient to reconstruct a slice, but not grow the vector. +/// +/// The `BumpVec` does *not* implement `Clone` or `Copy`; it +/// represents unique ownership of a range of indices in the arena. If +/// dropped, those indices will be unavailable until the arena is +/// freed. This is "fine" (it is normally how arena allocation +/// works). To explicitly free and make available for some +/// allocations, a very rudimentary reuse mechanism exists via +/// `BumpVec::free(arena)`. (The allocation path opportunistically +/// checks the first range on the freelist, and can carve off a piece +/// of it if larger than needed, but it does not attempt to traverse +/// the entire freelist; this is a compromise between bump-allocation +/// speed and memory efficiency, which also influences speed through +/// cached-memory reuse.) +/// +/// The type `T` should not have a `Drop` implementation. This +/// typically means that it does not own any boxed memory, +/// sub-collections, or other resources. This is important for the +/// efficiency of the data structure (otherwise, to call `Drop` impls, +/// the arena needs to track which indices are live or dead; the +/// BumpVec itself cannot do the drop because it does not retain a +/// reference to the arena). Note that placing a `T` with a `Drop` +/// impl in the arena is still *safe*, because leaking (that is, never +/// calling `Drop::drop()`) is safe. It is merely less efficient, and +/// so should be avoided if possible. +#[derive(Debug)] +pub struct BumpVec { + base: u32, + len: u32, + cap: u32, + _phantom: PhantomData, +} + +/// A slice in an arena: like a `BumpVec`, but has a fixed size that +/// cannot grow. The size of this struct is one 32-bit word smaller +/// than `BumpVec`. It is copyable/cloneable because it will never be +/// freed. +#[derive(Debug, Clone, Copy)] +pub struct BumpSlice { + base: u32, + len: u32, + _phantom: PhantomData, +} + +#[derive(Default)] +pub struct BumpArena { + vec: Vec>, + freelist: Vec>, +} + +impl BumpArena { + /// Create a new arena into which one can allocate `BumpVec`s. + pub fn new() -> Self { + Self { + vec: vec![], + freelist: vec![], + } + } + + /// Create a new arena, pre-allocating space for `cap` total `T` + /// elements. + pub fn arena_with_capacity(cap: usize) -> Self { + Self { + vec: Vec::with_capacity(cap), + freelist: Vec::with_capacity(cap / 16), + } + } + + /// Create a new `BumpVec` with the given pre-allocated capacity + /// and zero length. + pub fn vec_with_capacity(&mut self, cap: usize) -> BumpVec { + let cap = u32::try_from(cap).unwrap(); + if let Some(range) = self.maybe_freelist_alloc(cap) { + BumpVec { + base: range.start, + len: 0, + cap, + _phantom: PhantomData, + } + } else { + let base = self.vec.len() as u32; + for _ in 0..cap { + self.vec.push(MaybeUninit::uninit()); + } + BumpVec { + base, + len: 0, + cap, + _phantom: PhantomData, + } + } + } + + /// Create a new `BumpVec` with a single element. The capacity is + /// also only one element; growing the vector further will require + /// a reallocation. + pub fn single(&mut self, t: T) -> BumpVec { + let mut vec = self.vec_with_capacity(1); + unsafe { + self.write_into_index(vec.base, t); + } + vec.len = 1; + vec + } + + /// Create a new `BumpVec` with the sequence from an iterator. + pub fn from_iter>(&mut self, i: I) -> BumpVec { + let base = self.vec.len() as u32; + self.vec.extend(i.map(|item| MaybeUninit::new(item))); + let len = self.vec.len() as u32 - base; + BumpVec { + base, + len, + cap: len, + _phantom: PhantomData, + } + } + + /// Append two `BumpVec`s, returning a new one. Consumes both + /// vectors. This will use the capacity at the end of `a` if + /// possible to move `b`'s elements into place; otherwise it will + /// need to allocate new space. + pub fn append(&mut self, a: BumpVec, b: BumpVec) -> BumpVec { + if (a.cap - a.len) >= b.len { + self.append_into_cap(a, b) + } else { + self.append_into_new(a, b) + } + } + + /// Helper: read the `T` out of a given arena index. After + /// reading, that index becomes uninitialized. + unsafe fn read_out_of_index(&self, index: u32) -> T { + // Note that we don't actually *track* uninitialized status + // (and this is fine because we will never `Drop` and we never + // allow a `BumpVec` to refer to an uninitialized index, so + // the bits are effectively dead). We simply read the bits out + // and return them. + self.vec[index as usize].as_ptr().read() + } + + /// Helper: write a `T` into the given arena index. Index must + /// have been uninitialized previously. + unsafe fn write_into_index(&mut self, index: u32, t: T) { + self.vec[index as usize].as_mut_ptr().write(t); + } + + /// Helper: move a `T` from one index to another. Old index + /// becomes uninitialized and new index must have previously been + /// uninitialized. + unsafe fn move_item(&mut self, from: u32, to: u32) { + let item = self.read_out_of_index(from); + self.write_into_index(to, item); + } + + /// Helper: push a `T` onto the end of the arena, growing its + /// storage. The `T` to push is read out of another index, and + /// that index subsequently becomes uninitialized. + unsafe fn push_item(&mut self, from: u32) -> u32 { + let index = self.vec.len() as u32; + let item = self.read_out_of_index(from); + self.vec.push(MaybeUninit::new(item)); + index + } + + /// Helper: append `b` into the capacity at the end of `a`. + fn append_into_cap(&mut self, mut a: BumpVec, b: BumpVec) -> BumpVec { + debug_assert!(a.cap - a.len >= b.len); + for i in 0..b.len { + // Safety: initially, the indices in `b` are initialized; + // the indices in `a`'s cap, beyond its length, are + // uninitialized. We move the initialized contents from + // `b` to the tail beyond `a`, and we consume `b` (so it + // no longer exists), and we update `a`'s length to cover + // the initialized contents in their new location. + unsafe { + self.move_item(b.base + i, a.base + a.len + i); + } + } + a.len += b.len; + b.free(self); + a + } + + /// Helper: return a range of indices that are available + /// (uninitialized) according to the freelist for `len` elements, + /// if possible. + fn maybe_freelist_alloc(&mut self, len: u32) -> Option> { + if let Some(entry) = self.freelist.last_mut() { + if entry.len() >= len as usize { + let base = entry.start; + entry.start += len; + if entry.start == entry.end { + self.freelist.pop(); + } + return Some(base..(base + len)); + } + } + None + } + + /// Helper: append `a` and `b` into a completely new allocation. + fn append_into_new(&mut self, a: BumpVec, b: BumpVec) -> BumpVec { + // New capacity: round up to a power of two. + let len = a.len + b.len; + let cap = round_up_power_of_two(len); + + if let Some(range) = self.maybe_freelist_alloc(cap) { + for i in 0..a.len { + // Safety: the indices in `a` must be initialized. We read + // out the item and copy it to a new index; the old index + // is no longer covered by a BumpVec, because we consume + // `a`. + unsafe { + self.move_item(a.base + i, range.start + i); + } + } + for i in 0..b.len { + // Safety: the indices in `b` must be initialized. We read + // out the item and copy it to a new index; the old index + // is no longer covered by a BumpVec, because we consume + // `b`. + unsafe { + self.move_item(b.base + i, range.start + a.len + i); + } + } + + a.free(self); + b.free(self); + + BumpVec { + base: range.start, + len, + cap, + _phantom: PhantomData, + } + } else { + self.vec.reserve(cap as usize); + let base = self.vec.len() as u32; + for i in 0..a.len { + // Safety: the indices in `a` must be initialized. We read + // out the item and copy it to a new index; the old index + // is no longer covered by a BumpVec, because we consume + // `a`. + unsafe { + self.push_item(a.base + i); + } + } + for i in 0..b.len { + // Safety: the indices in `b` must be initialized. We read + // out the item and copy it to a new index; the old index + // is no longer covered by a BumpVec, because we consume + // `b`. + unsafe { + self.push_item(b.base + i); + } + } + let len = self.vec.len() as u32 - base; + + for _ in len..cap { + self.vec.push(MaybeUninit::uninit()); + } + + a.free(self); + b.free(self); + + BumpVec { + base, + len, + cap, + _phantom: PhantomData, + } + } + } + + /// Returns the size of the backing `Vec`. + pub fn size(&self) -> usize { + self.vec.len() + } +} + +fn round_up_power_of_two(x: u32) -> u32 { + debug_assert!(x > 0); + debug_assert!(x < 0x8000_0000); + let log2 = 32 - (x - 1).leading_zeros(); + 1 << log2 +} + +impl BumpVec { + /// Returns a slice view of this `BumpVec`, given a borrow of the + /// arena. + pub fn as_slice<'a>(&'a self, arena: &'a BumpArena) -> &'a [T] { + let maybe_uninit_slice = + &arena.vec[(self.base as usize)..((self.base + self.len) as usize)]; + // Safety: the index range we represent must be initialized. + unsafe { std::mem::transmute(maybe_uninit_slice) } + } + + /// Returns a mutable slice view of this `BumpVec`, given a + /// mutable borrow of the arena. + pub fn as_mut_slice<'a>(&'a mut self, arena: &'a mut BumpArena) -> &'a mut [T] { + let maybe_uninit_slice = + &mut arena.vec[(self.base as usize)..((self.base + self.len) as usize)]; + // Safety: the index range we represent must be initialized. + unsafe { std::mem::transmute(maybe_uninit_slice) } + } + + /// Returns the length of this vector. Does not require access to + /// the arena. + pub fn len(&self) -> usize { + self.len as usize + } + + /// Returns the capacity of this vector. Does not require access + /// to the arena. + pub fn cap(&self) -> usize { + self.cap as usize + } + + /// Reserve `extra_len` capacity at the end of the vector, + /// reallocating if necessary. + pub fn reserve(&mut self, extra_len: usize, arena: &mut BumpArena) { + let extra_len = u32::try_from(extra_len).unwrap(); + if self.cap - self.len < extra_len { + if self.base + self.cap == arena.vec.len() as u32 { + for _ in 0..extra_len { + arena.vec.push(MaybeUninit::uninit()); + } + self.cap += extra_len; + } else { + let new_cap = self.cap + extra_len; + let new = arena.vec_with_capacity(new_cap as usize); + unsafe { + for i in 0..self.len { + arena.move_item(self.base + i, new.base + i); + } + } + self.base = new.base; + self.cap = new.cap; + } + } + } + + /// Push an item, growing the capacity if needed. + pub fn push(&mut self, t: T, arena: &mut BumpArena) { + if self.cap > self.len { + unsafe { + arena.write_into_index(self.base + self.len, t); + } + self.len += 1; + } else if (self.base + self.cap) as usize == arena.vec.len() { + arena.vec.push(MaybeUninit::new(t)); + self.cap += 1; + self.len += 1; + } else { + let new_cap = round_up_power_of_two(self.cap + 1); + let extra = new_cap - self.cap; + self.reserve(extra as usize, arena); + unsafe { + arena.write_into_index(self.base + self.len, t); + } + self.len += 1; + } + } + + /// Clone, if `T` is cloneable. + pub fn clone(&self, arena: &mut BumpArena) -> BumpVec + where + T: Clone, + { + let mut new = arena.vec_with_capacity(self.len as usize); + for i in 0..self.len { + let item = self.as_slice(arena)[i as usize].clone(); + new.push(item, arena); + } + new + } + + /// Truncate the length to a smaller-or-equal length. + pub fn truncate(&mut self, len: usize) { + let len = len as u32; + assert!(len <= self.len); + self.len = len; + } + + /// Consume the BumpVec and return its indices to a free pool in + /// the arena. + pub fn free(self, arena: &mut BumpArena) { + arena.freelist.push(self.base..(self.base + self.cap)); + } + + /// Freeze the capacity of this BumpVec, turning it into a slice, + /// for a smaller struct (8 bytes rather than 12). Once this + /// exists, it is copyable, because the slice will never be freed. + pub fn freeze(self, arena: &mut BumpArena) -> BumpSlice { + if self.cap > self.len { + arena + .freelist + .push((self.base + self.len)..(self.base + self.cap)); + } + BumpSlice { + base: self.base, + len: self.len, + _phantom: PhantomData, + } + } +} + +impl BumpSlice { + /// Returns a slice view of the `BumpSlice`, given a borrow of the + /// arena. + pub fn as_slice<'a>(&'a self, arena: &'a BumpArena) -> &'a [T] { + let maybe_uninit_slice = + &arena.vec[(self.base as usize)..((self.base + self.len) as usize)]; + // Safety: the index range we represent must be initialized. + unsafe { std::mem::transmute(maybe_uninit_slice) } + } + + /// Returns a mutable slice view of the `BumpSlice`, given a + /// mutable borrow of the arena. + pub fn as_mut_slice<'a>(&'a mut self, arena: &'a mut BumpArena) -> &'a mut [T] { + let maybe_uninit_slice = + &mut arena.vec[(self.base as usize)..((self.base + self.len) as usize)]; + // Safety: the index range we represent must be initialized. + unsafe { std::mem::transmute(maybe_uninit_slice) } + } + + /// Returns the length of the `BumpSlice`. + pub fn len(&self) -> usize { + self.len as usize + } +} + +impl std::default::Default for BumpVec { + fn default() -> Self { + BumpVec { + base: 0, + len: 0, + cap: 0, + _phantom: PhantomData, + } + } +} + +impl std::default::Default for BumpSlice { + fn default() -> Self { + BumpSlice { + base: 0, + len: 0, + _phantom: PhantomData, + } + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_round_up() { + assert_eq!(1, round_up_power_of_two(1)); + assert_eq!(2, round_up_power_of_two(2)); + assert_eq!(4, round_up_power_of_two(3)); + assert_eq!(4, round_up_power_of_two(4)); + assert_eq!(32, round_up_power_of_two(24)); + assert_eq!(0x8000_0000, round_up_power_of_two(0x7fff_ffff)); + } + + #[test] + fn test_basic() { + let mut arena: BumpArena = BumpArena::new(); + + let a = arena.single(1); + let b = arena.single(2); + let c = arena.single(3); + let ab = arena.append(a, b); + assert_eq!(ab.as_slice(&arena), &[1, 2]); + assert_eq!(ab.cap(), 2); + let abc = arena.append(ab, c); + assert_eq!(abc.len(), 3); + assert_eq!(abc.cap(), 4); + assert_eq!(abc.as_slice(&arena), &[1, 2, 3]); + assert_eq!(arena.size(), 9); + let mut d = arena.single(4); + // Should have reused the freelist. + assert_eq!(arena.size(), 9); + assert_eq!(d.len(), 1); + assert_eq!(d.cap(), 1); + assert_eq!(d.as_slice(&arena), &[4]); + d.as_mut_slice(&mut arena)[0] = 5; + assert_eq!(d.as_slice(&arena), &[5]); + abc.free(&mut arena); + let d2 = d.clone(&mut arena); + let dd = arena.append(d, d2); + // Should have reused the freelist. + assert_eq!(arena.size(), 9); + assert_eq!(dd.as_slice(&arena), &[5, 5]); + let mut e = arena.from_iter([10, 11, 12].into_iter()); + e.push(13, &mut arena); + assert_eq!(arena.size(), 13); + e.reserve(4, &mut arena); + assert_eq!(arena.size(), 17); + let _f = arena.from_iter([1, 2, 3, 4, 5, 6, 7, 8].into_iter()); + assert_eq!(arena.size(), 25); + e.reserve(8, &mut arena); + assert_eq!(e.cap(), 16); + assert_eq!(e.as_slice(&arena), &[10, 11, 12, 13]); + // `e` must have been copied now that `f` is at the end of the + // arena. + assert_eq!(arena.size(), 41); + } +} diff --git a/cranelift/egraph/src/ctxhash.rs b/cranelift/egraph/src/ctxhash.rs new file mode 100644 index 0000000000..467a3b62a2 --- /dev/null +++ b/cranelift/egraph/src/ctxhash.rs @@ -0,0 +1,280 @@ +//! A hashmap with "external hashing": nodes are hashed or compared for +//! equality only with some external context provided on lookup/insert. +//! This allows very memory-efficient data structures where +//! node-internal data references some other storage (e.g., offsets into +//! an array or pool of shared data). + +use super::unionfind::UnionFind; +use hashbrown::raw::{Bucket, RawTable}; +use std::hash::{Hash, Hasher}; +use std::marker::PhantomData; + +/// Trait that allows for equality comparison given some external +/// context. +/// +/// Note that this trait is implemented by the *context*, rather than +/// the item type, for somewhat complex lifetime reasons (lack of GATs +/// to allow `for<'ctx> Ctx<'ctx>`-like associated types in traits on +/// the value type). +/// +/// Furthermore, the `ctx_eq` method includes a `UnionFind` parameter, +/// because in practice we require this and a borrow to it cannot be +/// included in the context type without GATs (similarly to above). +pub trait CtxEq { + /// Determine whether `a` and `b` are equal, given the context in + /// `self` and the union-find data structure `uf`. + fn ctx_eq(&self, a: &V1, b: &V2, uf: &mut UnionFind) -> bool; +} + +/// Trait that allows for hashing given some external context. +pub trait CtxHash: CtxEq { + /// Compute the hash of `value`, given the context in `self` and + /// the union-find data structure `uf`. + fn ctx_hash(&self, value: &Value, uf: &mut UnionFind) -> u64; +} + +/// A null-comparator context type for underlying value types that +/// already have `Eq` and `Hash`. +#[derive(Default)] +pub struct NullCtx; + +impl CtxEq for NullCtx { + fn ctx_eq(&self, a: &V, b: &V, _: &mut UnionFind) -> bool { + a.eq(b) + } +} +impl CtxHash for NullCtx { + fn ctx_hash(&self, value: &V, _: &mut UnionFind) -> u64 { + let mut state = fxhash::FxHasher::default(); + value.hash(&mut state); + state.finish() + } +} + +/// A bucket in the hash table. +/// +/// Some performance-related design notes: we cache the hashcode for +/// speed, as this often buys a few percent speed in +/// interning-table-heavy workloads. We only keep the low 32 bits of +/// the hashcode, for memory efficiency: in common use, `K` and `V` +/// are often 32 bits also, and a 12-byte bucket is measurably better +/// than a 16-byte bucket. +struct BucketData { + hash: u32, + k: K, + v: V, +} + +/// A HashMap that takes external context for all operations. +pub struct CtxHashMap { + raw: RawTable>, +} + +impl CtxHashMap { + /// Create an empty hashmap. + pub fn new() -> Self { + Self { + raw: RawTable::new(), + } + } + + /// Create an empty hashmap with pre-allocated space for the given + /// capacity. + pub fn with_capacity(capacity: usize) -> Self { + Self { + raw: RawTable::with_capacity(capacity), + } + } +} + +impl CtxHashMap { + /// Insert a new key-value pair, returning the old value associated + /// with this key (if any). + pub fn insert + CtxHash>( + &mut self, + k: K, + v: V, + ctx: &Ctx, + uf: &mut UnionFind, + ) -> Option { + let hash = ctx.ctx_hash(&k, uf) as u32; + match self.raw.find(hash as u64, |bucket| { + hash == bucket.hash && ctx.ctx_eq(&bucket.k, &k, uf) + }) { + Some(bucket) => { + let data = unsafe { bucket.as_mut() }; + Some(std::mem::replace(&mut data.v, v)) + } + None => { + let data = BucketData { hash, k, v }; + self.raw + .insert_entry(hash as u64, data, |bucket| bucket.hash as u64); + None + } + } + } + + /// Look up a key, returning a borrow of the value if present. + pub fn get<'a, Q, Ctx: CtxEq + CtxHash + CtxHash>( + &'a self, + k: &Q, + ctx: &Ctx, + uf: &mut UnionFind, + ) -> Option<&'a V> { + let hash = ctx.ctx_hash(k, uf) as u32; + self.raw + .find(hash as u64, |bucket| { + hash == bucket.hash && ctx.ctx_eq(&bucket.k, k, uf) + }) + .map(|bucket| { + let data = unsafe { bucket.as_ref() }; + &data.v + }) + } + + /// Return an Entry cursor on a given bucket for a key, allowing + /// for fetching the current value or inserting a new one. + pub fn entry<'a, Ctx: CtxEq + CtxHash>( + &'a mut self, + k: K, + ctx: &'a Ctx, + uf: &mut UnionFind, + ) -> Entry<'a, K, V> { + let hash = ctx.ctx_hash(&k, uf) as u32; + match self.raw.find(hash as u64, |bucket| { + hash == bucket.hash && ctx.ctx_eq(&bucket.k, &k, uf) + }) { + Some(bucket) => Entry::Occupied(OccupiedEntry { + bucket, + _phantom: PhantomData, + }), + None => Entry::Vacant(VacantEntry { + raw: &mut self.raw, + hash, + key: k, + }), + } + } +} + +/// An entry in the hashmap. +pub enum Entry<'a, K: 'a, V> { + Occupied(OccupiedEntry<'a, K, V>), + Vacant(VacantEntry<'a, K, V>), +} + +/// An occupied entry. +pub struct OccupiedEntry<'a, K, V> { + bucket: Bucket>, + _phantom: PhantomData<&'a ()>, +} + +impl<'a, K: 'a, V> OccupiedEntry<'a, K, V> { + /// Get the value. + pub fn get(&self) -> &'a V { + let bucket = unsafe { self.bucket.as_ref() }; + &bucket.v + } +} + +/// A vacant entry. +pub struct VacantEntry<'a, K, V> { + raw: &'a mut RawTable>, + hash: u32, + key: K, +} + +impl<'a, K, V> VacantEntry<'a, K, V> { + /// Insert a value. + pub fn insert(self, v: V) -> &'a V { + let bucket = self.raw.insert( + self.hash as u64, + BucketData { + hash: self.hash, + k: self.key, + v, + }, + |bucket| bucket.hash as u64, + ); + let data = unsafe { bucket.as_ref() }; + &data.v + } +} + +#[cfg(test)] +mod test { + use super::*; + use std::hash::Hash; + + #[derive(Clone, Copy, Debug)] + struct Key { + index: u32, + } + struct Ctx { + vals: &'static [&'static str], + } + impl CtxEq for Ctx { + fn ctx_eq(&self, a: &Key, b: &Key, _: &mut UnionFind) -> bool { + self.vals[a.index as usize].eq(self.vals[b.index as usize]) + } + } + impl CtxHash for Ctx { + fn ctx_hash(&self, value: &Key, _: &mut UnionFind) -> u64 { + let mut state = fxhash::FxHasher::default(); + self.vals[value.index as usize].hash(&mut state); + state.finish() + } + } + + #[test] + fn test_basic() { + let ctx = Ctx { + vals: &["a", "b", "a"], + }; + let mut uf = UnionFind::new(); + + let k0 = Key { index: 0 }; + let k1 = Key { index: 1 }; + let k2 = Key { index: 2 }; + + assert!(ctx.ctx_eq(&k0, &k2, &mut uf)); + assert!(!ctx.ctx_eq(&k0, &k1, &mut uf)); + assert!(!ctx.ctx_eq(&k2, &k1, &mut uf)); + + let mut map: CtxHashMap = CtxHashMap::new(); + assert_eq!(map.insert(k0, 42, &ctx, &mut uf), None); + assert_eq!(map.insert(k2, 84, &ctx, &mut uf), Some(42)); + assert_eq!(map.get(&k1, &ctx, &mut uf), None); + assert_eq!(*map.get(&k0, &ctx, &mut uf).unwrap(), 84); + } + + #[test] + fn test_entry() { + let mut ctx = Ctx { + vals: &["a", "b", "a"], + }; + let mut uf = UnionFind::new(); + + let k0 = Key { index: 0 }; + let k1 = Key { index: 1 }; + let k2 = Key { index: 2 }; + + let mut map: CtxHashMap = CtxHashMap::new(); + match map.entry(k0, &mut ctx, &mut uf) { + Entry::Vacant(v) => { + v.insert(1); + } + _ => panic!(), + } + match map.entry(k1, &mut ctx, &mut uf) { + Entry::Vacant(_) => {} + Entry::Occupied(_) => panic!(), + } + match map.entry(k2, &mut ctx, &mut uf) { + Entry::Occupied(o) => { + assert_eq!(*o.get(), 1); + } + _ => panic!(), + } + } +} diff --git a/cranelift/egraph/src/lib.rs b/cranelift/egraph/src/lib.rs new file mode 100644 index 0000000000..81006be2b8 --- /dev/null +++ b/cranelift/egraph/src/lib.rs @@ -0,0 +1,613 @@ +//! # ægraph (aegraph, or acyclic e-graph) implementation. +//! +//! An aegraph is a form of e-graph. We will first describe the +//! e-graph, then the aegraph as a slightly less powerful but highly +//! optimized variant of it. +//! +//! The main goal of this library is to be explicitly memory-efficient +//! and light on allocations. We need to be as fast and as small as +//! possible in order to minimize impact on compile time in a +//! production compiler. +//! +//! ## The e-graph +//! +//! An e-graph, or equivalence graph, is a kind of node-based +//! intermediate representation (IR) data structure that consists of +//! *eclasses* and *enodes*. An eclass contains one or more enodes; +//! semantically an eclass is like a value, and an enode is one way to +//! compute that value. If several enodes are in one eclass, the data +//! structure is asserting that any of these enodes, if evaluated, +//! would produce the value. +//! +//! An e-graph also contains a deduplicating hash-map of nodes, so if +//! the user creates the same e-node more than once, they get the same +//! e-class ID. +//! +//! In the usual use-case, an e-graph is used to build a sea-of-nodes +//! IR for a function body or other expression-based code, and then +//! *rewrite rules* are applied to the e-graph. Each rewrite +//! potentially introduces a new e-node that is equivalent to an +//! existing e-node, and then unions the two e-nodes' classes +//! together. +//! +//! In the trivial case this results in an e-class containing a series +//! of e-nodes that are newly added -- all known forms of an +//! expression -- but Note how if a rewrite rule rewrites into an +//! existing e-node (discovered via deduplication), rewriting can +//! result in unioning of two e-classes that have existed for some +//! time. +//! +//! An e-graph's enodes refer to *classes* for their arguments, rather +//! than other nodes directly. This is key to the ability of an +//! e-graph to canonicalize: when two e-classes that are already used +//! as arguments by other e-nodes are unioned, all e-nodes that refer +//! to those e-classes are themselves re-canonicalized. This can +//! result in "cascading" unioning of eclasses, in a process that +//! discovers the transitive implications of all individual +//! equalities. This process is known as "equality saturation". +//! +//! ## The acyclic e-graph (aegraph) +//! +//! An e-graph is powerful, but it can also be expensive to build and +//! saturate: there are often many different forms an expression can +//! take (because many different rewrites are possible), and cascading +//! canonicalization requires heavyweight data structure bookkeeping +//! that is expensive to maintain. +//! +//! This crate introduces the aegraph: an acyclic e-graph. This data +//! structure stores an e-class as an *immutable persistent data +//! structure*. An id can refer to some *level* of an eclass: a +//! snapshot of the nodes in the eclass at one point in time. The +//! nodes referred to by this id never change, though the eclass may +//! grow later. +//! +//! A *union* is also an operation that creates a new eclass id: the +//! original eclass IDs refer to the original eclass contents, while +//! the id resulting from the `union()` operation refers to an eclass +//! that has all nodes. +//! +//! In order to allow for adequate canonicalization, an enode normally +//! stores the *latest* eclass id for each argument, but computes +//! hashes and equality using a *canonical* eclass id. We define such +//! a canonical id with a union-find data structure, just as for a +//! traditional e-graph. It is normally the lowest id referring to +//! part of the eclass. +//! +//! The persistent/immutable nature of this data structure yields one +//! extremely important property: it is acyclic! This simplifies +//! operation greatly: +//! +//! - When "elaborating" out of the e-graph back to linearized code, +//! so that we can generate machine code, we do not need to break +//! cycles. A given enode cannot indirectly refer back to itself. +//! +//! - When applying rewrite rules, the nodes visible from a given id +//! for an eclass never change. This means that we only need to +//! apply rewrite rules at that node id *once*. +//! +//! ## Data Structure and Example +//! +//! Each eclass id refers to a table entry that can be one of: +//! +//! - A single enode; +//! - An enode and an earlier eclass id it is appended to; +//! - A "union node" with two earlier eclass ids. +//! +//! Building the aegraph consists solely of adding new entries to the +//! end of this table. An enode in any given entry can only refer to +//! earlier eclass ids. +//! +//! For example, consider the following eclass table: +//! +//! ```plain +//! +//! eclass/enode table +//! +//! eclass1 iconst(1) +//! eclass2 blockparam(block0, 0) +//! eclass3 iadd(eclass1, eclass2) +//! ``` +//! +//! This represents the expression `iadd(blockparam(block0, 0), +//! iconst(1))` (as the sole enode for eclass3). +//! +//! Now, say that as we further build the function body, we add +//! another enode `iadd(eclass3, iconst(1))`. The `iconst(1)` will be +//! deduplicated to `eclass1`, and the toplevel `iadd` will become its +//! own new eclass (`eclass4`). +//! +//! ```plain +//! eclass4 iadd(eclass3, eclass1) +//! ``` +//! +//! Now we apply our body of rewrite rules, and these results can +//! combine `x + 1 + 1` into `x + 2`; so we get: +//! +//! ```plain +//! eclass5 iconst(2) +//! eclass6 union(iadd(eclass2, eclass5), eclass4) +//! ``` +//! +//! Note that we added the nodes for the new expression, and then we +//! union'd it with the earlier `eclass4`. Logically this represents a +//! single eclass that contains two nodes -- the `x + 1 + 1` and `x + +//! 2` representations -- and the *latest* id for the eclass, +//! `eclass6`, can reach all nodes in the eclass (here the node stored +//! in `eclass6` and the earlier one in `elcass4`). +//! +//! ## aegraph vs. egraph +//! +//! Where does an aegraph fall short of an e-graph -- or in other +//! words, why maintain the data structures to allow for full +//! (re)canonicalization at all, with e.g. parent pointers to +//! recursively update parents? +//! +//! This question deserves further study, but right now, it appears +//! that the difference is limited to a case like the following: +//! +//! - expression E1 is interned into the aegraph. +//! - expression E2 is interned into the aegraph. It uses E1 as an +//! argument to one or more operators, and so refers to the +//! (currently) latest id for E1. +//! - expression E3 is interned into the aegraph. A rewrite rule fires +//! that unions E3 with E1. +//! +//! In an e-graph, the last action would trigger a re-canonicalization +//! of all "parents" (users) of E1; so E2 would be re-canonicalized +//! using an id that represents the union of E1 and E3. At +//! code-generation time, E2 could choose to use a value computed by +//! either E1's or E3's operator. In an aegraph, this is not the case: +//! E2's e-class and e-nodes are immutable once created, so E2 refers +//! only to E1's representation of the value (a "slice" of the whole +//! e-class). +//! +//! While at first this sounds quite limiting, there actually appears +//! to be a nice mutually-beneficial interaction with the immediate +//! application of rewrite rules: by applying all rewrites we know +//! about right when E1 is interned, E2 can refer to the best version +//! when it is created. The above scenario only leads to a missed +//! optimization if: +//! +//! - a rewrite rule exists from E3 to E1, but not E1 to E3; and +//! - E3 is *cheaper* than E1. +//! +//! Or in other words, this only matters if there is a rewrite rule +//! that rewrites into a more expensive direction. This is unlikely +//! for the sorts of rewrite rules we plan to write; it may matter +//! more if many possible equalities are expressed, such as +//! associativity, commutativity, etc. +//! +//! Note that the above represents the best of our understanding, but +//! there may be cases we have missed; a more complete examination of +//! this question would involve building a full equality saturation +//! loop on top of the (a)egraph in this crate, and testing with many +//! benchmarks to see if it makes any difference. +//! +//! ## Rewrite Rules (FLAX: Fast Localized Aegraph eXpansion) +//! +//! The most common use of an e-graph or aegraph is to serve as the IR +//! for a compiler. In this use-case, we usually wish to transform the +//! program using a body of rewrite rules that represent valid +//! transformations (equivalent and hopefully simpler ways of +//! computing results). An aegraph supports applying rules in a fairly +//! straightforward way: whenever a new eclass entry is added to the +//! table, we invoke a toplevel "apply all rewrite rules" entry +//! point. This entry point creates new nodes as needed, and when +//! done, unions the rewritten nodes with the original. We thus +//! *immediately* expand a new value into all of its representations. +//! +//! This immediate expansion stands in contrast to a traditional +//! "equality saturation" e-egraph system, in which it is usually best +//! to apply rules in batches and then fix up the +//! canonicalization. This approach was introduced in the `egg` +//! e-graph engine [^1]. We call our system FLAX (because flax is an +//! alternative to egg): Fast Localized Aegraph eXpansion. +//! +//! The reason that this is possible in an aegraph but not +//! (efficiently, at least) in a traditional e-graph is that the data +//! structure nodes are immutable once created: an eclass id will +//! always refer to a fixed set of enodes. There is no +//! recanonicalizing of eclass arguments as they union; but also this +//! is not usually necessary, because args will have already been +//! processed and eagerly rewritten as well. In other words, eager +//! rewriting and the immutable data structure mutually allow each +//! other to be practical; both work together. +//! +//! [^1]: M Willsey, C Nandi, Y R Wang, O Flatt, Z Tatlock, P +//! Panchekha. "egg: Fast and Flexible Equality Saturation." In +//! POPL 2021. + +use cranelift_entity::PrimaryMap; +use cranelift_entity::{entity_impl, packed_option::ReservedValue}; +use smallvec::{smallvec, SmallVec}; +use std::fmt::Debug; +use std::hash::Hash; +use std::marker::PhantomData; + +mod bumpvec; +mod ctxhash; +mod unionfind; + +pub use bumpvec::{BumpArena, BumpSlice, BumpVec}; +pub use ctxhash::{CtxEq, CtxHash, CtxHashMap, Entry}; +pub use unionfind::UnionFind; + +/// An eclass ID. +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct Id(u32); +entity_impl!(Id, "eclass"); + +impl Id { + pub fn invalid() -> Id { + Self::reserved_value() + } +} +impl std::default::Default for Id { + fn default() -> Self { + Self::invalid() + } +} + +/// A trait implemented by all "languages" (types that can be enodes). +pub trait Language: CtxEq + CtxHash { + type Node: Debug; + fn children<'a>(&'a self, node: &'a Self::Node) -> &'a [Id]; + fn children_mut<'a>(&'a mut self, ctx: &'a mut Self::Node) -> &'a mut [Id]; + fn needs_dedup(&self, node: &Self::Node) -> bool; +} + +/// Conditionally-compiled trace-log macro. (Borrowed from +/// `cranelift-codegen`; it's not worth factoring out a common +/// subcrate for this.) +#[macro_export] +macro_rules! trace { + ($($tt:tt)*) => { + if cfg!(feature = "trace-log") { + ::log::trace!($($tt)*); + } + }; +} + +/// An egraph. +pub struct EGraph { + /// Node-allocation arena. + pub nodes: Vec, + /// Hash-consing map from Nodes to eclass IDs. + node_map: CtxHashMap, + /// Eclass definitions. Each eclass consists of an enode, and + /// parent pointer to the rest of the eclass. + pub classes: PrimaryMap, + /// Union-find for canonical ID generation. This lets us name an + /// eclass with a canonical ID that is the same for all + /// generations of the class. + pub unionfind: UnionFind, +} + +/// A reference to a node. +#[derive(Clone, Copy, Debug)] +pub struct NodeKey { + index: u32, +} + +impl NodeKey { + fn from_node_idx(node_idx: usize) -> NodeKey { + NodeKey { + index: u32::try_from(node_idx).unwrap(), + } + } + + /// Get the node for this NodeKey, given the `nodes` from the + /// appropriate `EGraph`. + pub fn node<'a, L: Language>(&self, nodes: &'a [L::Node]) -> &'a L::Node { + &nodes[self.index as usize] + } + + fn bits(self) -> u32 { + self.index + } + + fn from_bits(bits: u32) -> Self { + NodeKey { index: bits } + } +} + +struct NodeKeyCtx<'a, L: Language> { + nodes: &'a [L::Node], + node_ctx: &'a L, +} + +impl<'ctx, L: Language> CtxEq for NodeKeyCtx<'ctx, L> { + fn ctx_eq(&self, a: &NodeKey, b: &NodeKey, uf: &mut UnionFind) -> bool { + let a = a.node::(self.nodes); + let b = b.node::(self.nodes); + self.node_ctx.ctx_eq(a, b, uf) + } +} + +impl<'ctx, L: Language> CtxHash for NodeKeyCtx<'ctx, L> { + fn ctx_hash(&self, value: &NodeKey, uf: &mut UnionFind) -> u64 { + self.node_ctx.ctx_hash(value.node::(self.nodes), uf) + } +} + +/// An EClass entry. Contains either a single new enode and a parent +/// eclass (i.e., adds one new enode), or unions two parent eclasses +/// together. +#[derive(Debug, Clone, Copy)] +pub struct EClass { + // formats: + // + // 00 | unused (31 bits) | NodeKey (31 bits) + // 01 | eclass_parent (31 bits) | NodeKey (31 bits) + // 10 | eclass_parent_1 (31 bits) | eclass_parent_id_2 (31 bits) + bits: u64, +} + +impl EClass { + fn node(node: NodeKey) -> EClass { + let node_idx = node.bits() as u64; + debug_assert!(node_idx < (1 << 31)); + EClass { + bits: (0b00 << 62) | node_idx, + } + } + + fn node_and_parent(node: NodeKey, eclass_parent: Id) -> EClass { + let node_idx = node.bits() as u64; + debug_assert!(node_idx < (1 << 31)); + debug_assert!(eclass_parent != Id::invalid()); + let parent = eclass_parent.0 as u64; + debug_assert!(parent < (1 << 31)); + EClass { + bits: (0b01 << 62) | (parent << 31) | node_idx, + } + } + + fn union(parent1: Id, parent2: Id) -> EClass { + debug_assert!(parent1 != Id::invalid()); + let parent1 = parent1.0 as u64; + debug_assert!(parent1 < (1 << 31)); + + debug_assert!(parent2 != Id::invalid()); + let parent2 = parent2.0 as u64; + debug_assert!(parent2 < (1 << 31)); + + EClass { + bits: (0b10 << 62) | (parent1 << 31) | parent2, + } + } + + /// Get the node, if any, from a node-only or node-and-parent + /// eclass. + pub fn get_node(&self) -> Option { + self.as_node() + .or_else(|| self.as_node_and_parent().map(|(node, _)| node)) + } + + /// Get the first parent, if any. + pub fn parent1(&self) -> Option { + self.as_node_and_parent() + .map(|(_, p1)| p1) + .or(self.as_union().map(|(p1, _)| p1)) + } + + /// Get the second parent, if any. + pub fn parent2(&self) -> Option { + self.as_union().map(|(_, p2)| p2) + } + + /// If this EClass is just a lone enode, return it. + pub fn as_node(&self) -> Option { + if (self.bits >> 62) == 0b00 { + let node_idx = (self.bits & ((1 << 31) - 1)) as u32; + Some(NodeKey::from_bits(node_idx)) + } else { + None + } + } + + /// If this EClass is one new enode and a parent, return the node + /// and parent ID. + pub fn as_node_and_parent(&self) -> Option<(NodeKey, Id)> { + if (self.bits >> 62) == 0b01 { + let node_idx = (self.bits & ((1 << 31) - 1)) as u32; + let parent = ((self.bits >> 31) & ((1 << 31) - 1)) as u32; + Some((NodeKey::from_bits(node_idx), Id::from_bits(parent))) + } else { + None + } + } + + /// If this EClass is the union variety, return the two parent + /// EClasses. Both are guaranteed not to be `Id::invalid()`. + pub fn as_union(&self) -> Option<(Id, Id)> { + if (self.bits >> 62) == 0b10 { + let parent1 = ((self.bits >> 31) & ((1 << 31) - 1)) as u32; + let parent2 = (self.bits & ((1 << 31) - 1)) as u32; + Some((Id::from_bits(parent1), Id::from_bits(parent2))) + } else { + None + } + } +} + +/// A new or existing `T` when adding to a deduplicated set or data +/// structure, like an egraph. +#[derive(Clone, Copy, Debug)] +pub enum NewOrExisting { + New(T), + Existing(T), +} + +impl NewOrExisting { + /// Get the underlying value. + pub fn get(self) -> T { + match self { + NewOrExisting::New(t) => t, + NewOrExisting::Existing(t) => t, + } + } +} + +impl EGraph +where + L::Node: 'static, +{ + /// Create a new aegraph. + pub fn new() -> Self { + Self { + nodes: vec![], + node_map: CtxHashMap::new(), + classes: PrimaryMap::new(), + unionfind: UnionFind::new(), + } + } + + /// Create a new aegraph with the given capacity. + pub fn with_capacity(nodes: usize) -> Self { + Self { + nodes: Vec::with_capacity(nodes), + node_map: CtxHashMap::with_capacity(nodes), + classes: PrimaryMap::with_capacity(nodes), + unionfind: UnionFind::with_capacity(nodes), + } + } + + /// Add a new node. + pub fn add(&mut self, node: L::Node, node_ctx: &L) -> NewOrExisting { + // Push the node. We can then build a NodeKey that refers to + // it and look for an existing interned copy. If one exists, + // we can pop the pushed node and return the existing Id. + let node_idx = self.nodes.len(); + trace!("adding node: {:?}", node); + let needs_dedup = node_ctx.needs_dedup(&node); + self.nodes.push(node); + + let key = NodeKey::from_node_idx(node_idx); + if needs_dedup { + let ctx = NodeKeyCtx { + nodes: &self.nodes[..], + node_ctx, + }; + + match self.node_map.entry(key, &ctx, &mut self.unionfind) { + Entry::Occupied(o) => { + let eclass_id = *o.get(); + self.nodes.pop(); + trace!(" -> existing id {}", eclass_id); + NewOrExisting::Existing(eclass_id) + } + Entry::Vacant(v) => { + // We're creating a new eclass now. + let eclass_id = self.classes.push(EClass::node(key)); + trace!(" -> new node and eclass: {}", eclass_id); + self.unionfind.add(eclass_id); + + // Add to interning map with a NodeKey referring to the eclass. + v.insert(eclass_id); + + NewOrExisting::New(eclass_id) + } + } + } else { + let eclass_id = self.classes.push(EClass::node(key)); + self.unionfind.add(eclass_id); + NewOrExisting::New(eclass_id) + } + } + + /// Merge one eclass into another, maintaining the acyclic + /// property (args must have lower eclass Ids than the eclass + /// containing the node with those args). Returns the Id of the + /// merged eclass. + pub fn union(&mut self, a: Id, b: Id) -> Id { + assert_ne!(a, Id::invalid()); + assert_ne!(b, Id::invalid()); + let (a, b) = (std::cmp::max(a, b), std::cmp::min(a, b)); + trace!("union: id {} and id {}", a, b); + if a == b { + trace!(" -> no-op"); + return a; + } + + self.unionfind.union(a, b); + + // If the younger eclass has no parent, we can link it + // directly and return that eclass. Otherwise, we create a new + // union eclass. + if let Some(node) = self.classes[a].as_node() { + trace!( + " -> id {} is one-node eclass; making into node-and-parent with id {}", + a, + b + ); + self.classes[a] = EClass::node_and_parent(node, b); + return a; + } + + let u = self.classes.push(EClass::union(a, b)); + self.unionfind.add(u); + self.unionfind.union(u, b); + trace!(" -> union id {} and id {} into id {}", a, b, u); + u + } + + /// Get the canonical ID for an eclass. This may be an older + /// generation, so will not be able to see all enodes in the + /// eclass; but it will allow us to unambiguously refer to an + /// eclass, even across merging. + pub fn canonical_id_mut(&mut self, eclass: Id) -> Id { + self.unionfind.find_and_update(eclass) + } + + /// Get the canonical ID for an eclass. This may be an older + /// generation, so will not be able to see all enodes in the + /// eclass; but it will allow us to unambiguously refer to an + /// eclass, even across merging. + pub fn canonical_id(&self, eclass: Id) -> Id { + self.unionfind.find(eclass) + } + + /// Get the enodes for a given eclass. + pub fn enodes(&self, eclass: Id) -> NodeIter { + NodeIter { + stack: smallvec![eclass], + _phantom: PhantomData, + } + } +} + +/// An iterator over all nodes in an eclass. +/// +/// Because eclasses are immutable once created, this does *not* need +/// to hold an open borrow on the egraph; it is free to add new nodes, +/// while our existing Ids will remain valid. +pub struct NodeIter { + stack: SmallVec<[Id; 8]>, + _phantom: PhantomData, +} + +impl NodeIter { + pub fn next<'a>(&mut self, egraph: &'a EGraph) -> Option<&'a L::Node> { + while let Some(next) = self.stack.pop() { + let eclass = egraph.classes[next]; + if let Some(node) = eclass.as_node() { + return Some(&egraph.nodes[node.index as usize]); + } else if let Some((node, parent)) = eclass.as_node_and_parent() { + if parent != Id::invalid() { + self.stack.push(parent); + } + return Some(&egraph.nodes[node.index as usize]); + } else if let Some((parent1, parent2)) = eclass.as_union() { + debug_assert!(parent1 != Id::invalid()); + debug_assert!(parent2 != Id::invalid()); + self.stack.push(parent2); + self.stack.push(parent1); + continue; + } else { + unreachable!("Invalid eclass format"); + } + } + None + } +} diff --git a/cranelift/egraph/src/unionfind.rs b/cranelift/egraph/src/unionfind.rs new file mode 100644 index 0000000000..70106e2896 --- /dev/null +++ b/cranelift/egraph/src/unionfind.rs @@ -0,0 +1,70 @@ +//! Simple union-find data structure. + +use crate::{trace, Id}; +use cranelift_entity::SecondaryMap; + +/// A union-find data structure. The data structure can allocate +/// `Id`s, indicating eclasses, and can merge eclasses together. +#[derive(Clone, Debug)] +pub struct UnionFind { + parent: SecondaryMap, +} + +impl UnionFind { + /// Create a new `UnionFind`. + pub fn new() -> Self { + UnionFind { + parent: SecondaryMap::new(), + } + } + + /// Create a new `UnionFind` with the given capacity. + pub fn with_capacity(cap: usize) -> Self { + UnionFind { + parent: SecondaryMap::with_capacity(cap), + } + } + + /// Add an `Id` to the `UnionFind`, with its own equivalence class + /// initially. All `Id`s must be added before being queried or + /// unioned. + pub fn add(&mut self, id: Id) { + self.parent[id] = id; + } + + /// Find the canonical `Id` of a given `Id`. + pub fn find(&self, mut node: Id) -> Id { + while node != self.parent[node] { + node = self.parent[node]; + } + node + } + + /// Find the canonical `Id` of a given `Id`, updating the data + /// structure in the process so that future queries for this `Id` + /// (and others in its chain up to the root of the equivalence + /// class) will be faster. + pub fn find_and_update(&mut self, mut node: Id) -> Id { + // "Path splitting" mutating find (Tarjan and Van Leeuwen). + let orig = node; + while node != self.parent[node] { + let next = self.parent[self.parent[node]]; + self.parent[node] = next; + node = next; + } + trace!("find_and_update: {} -> {}", orig, node); + node + } + + /// Merge the equivalence classes of the two `Id`s. + pub fn union(&mut self, a: Id, b: Id) { + let a = self.find_and_update(a); + let b = self.find_and_update(b); + let (a, b) = (std::cmp::min(a, b), std::cmp::max(a, b)); + if a != b { + // Always canonicalize toward lower IDs. + self.parent[b] = a; + trace!("union: {}, {}", a, b); + } + } +} diff --git a/scripts/publish.rs b/scripts/publish.rs index 30e699b3c7..f0a9565171 100644 --- a/scripts/publish.rs +++ b/scripts/publish.rs @@ -25,6 +25,7 @@ const CRATES_TO_PUBLISH: &[&str] = &[ "cranelift-codegen-shared", "cranelift-codegen-meta", "cranelift-codegen", + "cranelift-egraph", "cranelift-reader", "cranelift-serde", "cranelift-module",