Add the aegraph (acyclic e-graph) implementation crate. (#4909)

* Add the aegraph (acyclic egraph) implementation crate. * fix crate-dep version for cranelift-entity (rebase error) * Review feedback. * Fix link in Markdown doc comment. * Doc link fix again. * add cranelift-egraph to publish list.
2022-09-21 17:33:27 -07:00
parent b652ce2fb1
commit 89abd80c3c
8 changed files with 1527 additions and 2 deletions
--- a/cranelift/egraph/Cargo.toml
+++ b/cranelift/egraph/Cargo.toml
@@ -0,0 +1,24 @@
+[package]
+authors = ["The Cranelift Project Developers"]
+name = "cranelift-egraph"
+version = "0.89.0"
+description = "acyclic-egraph (aegraph) implementation for Cranelift"
+license = "Apache-2.0 WITH LLVM-exception"
+documentation = "https://docs.rs/cranelift-egraph"
+repository = "https://github.com/bytecodealliance/wasmtime"
+edition = "2021"
+
+[dependencies]
+cranelift-entity = { path = "../entity", version = "0.89.0" }
+log = { version = "0.4.6", default-features = false }
+smallvec = { version = "1.6.1" }
+indexmap = { version = "1.9.1" }
+hashbrown = { version = "0.12.2", features = ["raw"] }
+fxhash = "0.2.1"
+
+[features]
+default = []
+
+# Enable detailed trace-level debug logging. Excluded by default to
+# omit the dynamic overhead of checking the logging level.
+trace-log = []
--- a/cranelift/egraph/src/bumpvec.rs
+++ b/cranelift/egraph/src/bumpvec.rs
@@ -0,0 +1,524 @@
+//! Vectors allocated in arenas, with small per-vector overhead.
+
+use std::marker::PhantomData;
+use std::mem::MaybeUninit;
+use std::ops::Range;
+
+/// A vector of `T` stored within a `BumpArena`.
+///
+/// This is something like a normal `Vec`, except that all accesses
+/// and updates require a separate borrow of the `BumpArena`. This, in
+/// turn, makes the Vec itself very compact: only three `u32`s (12
+/// bytes). The `BumpSlice` variant is only two `u32`s (8 bytes) and
+/// is sufficient to reconstruct a slice, but not grow the vector.
+///
+/// The `BumpVec` does *not* implement `Clone` or `Copy`; it
+/// represents unique ownership of a range of indices in the arena. If
+/// dropped, those indices will be unavailable until the arena is
+/// freed. This is "fine" (it is normally how arena allocation
+/// works). To explicitly free and make available for some
+/// allocations, a very rudimentary reuse mechanism exists via
+/// `BumpVec::free(arena)`. (The allocation path opportunistically
+/// checks the first range on the freelist, and can carve off a piece
+/// of it if larger than needed, but it does not attempt to traverse
+/// the entire freelist; this is a compromise between bump-allocation
+/// speed and memory efficiency, which also influences speed through
+/// cached-memory reuse.)
+///
+/// The type `T` should not have a `Drop` implementation. This
+/// typically means that it does not own any boxed memory,
+/// sub-collections, or other resources. This is important for the
+/// efficiency of the data structure (otherwise, to call `Drop` impls,
+/// the arena needs to track which indices are live or dead; the
+/// BumpVec itself cannot do the drop because it does not retain a
+/// reference to the arena). Note that placing a `T` with a `Drop`
+/// impl in the arena is still *safe*, because leaking (that is, never
+/// calling `Drop::drop()`) is safe. It is merely less efficient, and
+/// so should be avoided if possible.
+#[derive(Debug)]
+pub struct BumpVec<T> {
+    base: u32,
+    len: u32,
+    cap: u32,
+    _phantom: PhantomData<T>,
+}
+
+/// A slice in an arena: like a `BumpVec`, but has a fixed size that
+/// cannot grow. The size of this struct is one 32-bit word smaller
+/// than `BumpVec`. It is copyable/cloneable because it will never be
+/// freed.
+#[derive(Debug, Clone, Copy)]
+pub struct BumpSlice<T> {
+    base: u32,
+    len: u32,
+    _phantom: PhantomData<T>,
+}
+
+#[derive(Default)]
+pub struct BumpArena<T> {
+    vec: Vec<MaybeUninit<T>>,
+    freelist: Vec<Range<u32>>,
+}
+
+impl<T> BumpArena<T> {
+    /// Create a new arena into which one can allocate `BumpVec`s.
+    pub fn new() -> Self {
+        Self {
+            vec: vec![],
+            freelist: vec![],
+        }
+    }
+
+    /// Create a new arena, pre-allocating space for `cap` total `T`
+    /// elements.
+    pub fn arena_with_capacity(cap: usize) -> Self {
+        Self {
+            vec: Vec::with_capacity(cap),
+            freelist: Vec::with_capacity(cap / 16),
+        }
+    }
+
+    /// Create a new `BumpVec` with the given pre-allocated capacity
+    /// and zero length.
+    pub fn vec_with_capacity(&mut self, cap: usize) -> BumpVec<T> {
+        let cap = u32::try_from(cap).unwrap();
+        if let Some(range) = self.maybe_freelist_alloc(cap) {
+            BumpVec {
+                base: range.start,
+                len: 0,
+                cap,
+                _phantom: PhantomData,
+            }
+        } else {
+            let base = self.vec.len() as u32;
+            for _ in 0..cap {
+                self.vec.push(MaybeUninit::uninit());
+            }
+            BumpVec {
+                base,
+                len: 0,
+                cap,
+                _phantom: PhantomData,
+            }
+        }
+    }
+
+    /// Create a new `BumpVec` with a single element. The capacity is
+    /// also only one element; growing the vector further will require
+    /// a reallocation.
+    pub fn single(&mut self, t: T) -> BumpVec<T> {
+        let mut vec = self.vec_with_capacity(1);
+        unsafe {
+            self.write_into_index(vec.base, t);
+        }
+        vec.len = 1;
+        vec
+    }
+
+    /// Create a new `BumpVec` with the sequence from an iterator.
+    pub fn from_iter<I: Iterator<Item = T>>(&mut self, i: I) -> BumpVec<T> {
+        let base = self.vec.len() as u32;
+        self.vec.extend(i.map(|item| MaybeUninit::new(item)));
+        let len = self.vec.len() as u32 - base;
+        BumpVec {
+            base,
+            len,
+            cap: len,
+            _phantom: PhantomData,
+        }
+    }
+
+    /// Append two `BumpVec`s, returning a new one. Consumes both
+    /// vectors. This will use the capacity at the end of `a` if
+    /// possible to move `b`'s elements into place; otherwise it will
+    /// need to allocate new space.
+    pub fn append(&mut self, a: BumpVec<T>, b: BumpVec<T>) -> BumpVec<T> {
+        if (a.cap - a.len) >= b.len {
+            self.append_into_cap(a, b)
+        } else {
+            self.append_into_new(a, b)
+        }
+    }
+
+    /// Helper: read the `T` out of a given arena index. After
+    /// reading, that index becomes uninitialized.
+    unsafe fn read_out_of_index(&self, index: u32) -> T {
+        // Note that we don't actually *track* uninitialized status
+        // (and this is fine because we will never `Drop` and we never
+        // allow a `BumpVec` to refer to an uninitialized index, so
+        // the bits are effectively dead). We simply read the bits out
+        // and return them.
+        self.vec[index as usize].as_ptr().read()
+    }
+
+    /// Helper: write a `T` into the given arena index. Index must
+    /// have been uninitialized previously.
+    unsafe fn write_into_index(&mut self, index: u32, t: T) {
+        self.vec[index as usize].as_mut_ptr().write(t);
+    }
+
+    /// Helper: move a `T` from one index to another. Old index
+    /// becomes uninitialized and new index must have previously been
+    /// uninitialized.
+    unsafe fn move_item(&mut self, from: u32, to: u32) {
+        let item = self.read_out_of_index(from);
+        self.write_into_index(to, item);
+    }
+
+    /// Helper: push a `T` onto the end of the arena, growing its
+    /// storage. The `T` to push is read out of another index, and
+    /// that index subsequently becomes uninitialized.
+    unsafe fn push_item(&mut self, from: u32) -> u32 {
+        let index = self.vec.len() as u32;
+        let item = self.read_out_of_index(from);
+        self.vec.push(MaybeUninit::new(item));
+        index
+    }
+
+    /// Helper: append `b` into the capacity at the end of `a`.
+    fn append_into_cap(&mut self, mut a: BumpVec<T>, b: BumpVec<T>) -> BumpVec<T> {
+        debug_assert!(a.cap - a.len >= b.len);
+        for i in 0..b.len {
+            // Safety: initially, the indices in `b` are initialized;
+            // the indices in `a`'s cap, beyond its length, are
+            // uninitialized. We move the initialized contents from
+            // `b` to the tail beyond `a`, and we consume `b` (so it
+            // no longer exists), and we update `a`'s length to cover
+            // the initialized contents in their new location.
+            unsafe {
+                self.move_item(b.base + i, a.base + a.len + i);
+            }
+        }
+        a.len += b.len;
+        b.free(self);
+        a
+    }
+
+    /// Helper: return a range of indices that are available
+    /// (uninitialized) according to the freelist for `len` elements,
+    /// if possible.
+    fn maybe_freelist_alloc(&mut self, len: u32) -> Option<Range<u32>> {
+        if let Some(entry) = self.freelist.last_mut() {
+            if entry.len() >= len as usize {
+                let base = entry.start;
+                entry.start += len;
+                if entry.start == entry.end {
+                    self.freelist.pop();
+                }
+                return Some(base..(base + len));
+            }
+        }
+        None
+    }
+
+    /// Helper: append `a` and `b` into a completely new allocation.
+    fn append_into_new(&mut self, a: BumpVec<T>, b: BumpVec<T>) -> BumpVec<T> {
+        // New capacity: round up to a power of two.
+        let len = a.len + b.len;
+        let cap = round_up_power_of_two(len);
+
+        if let Some(range) = self.maybe_freelist_alloc(cap) {
+            for i in 0..a.len {
+                // Safety: the indices in `a` must be initialized. We read
+                // out the item and copy it to a new index; the old index
+                // is no longer covered by a BumpVec, because we consume
+                // `a`.
+                unsafe {
+                    self.move_item(a.base + i, range.start + i);
+                }
+            }
+            for i in 0..b.len {
+                // Safety: the indices in `b` must be initialized. We read
+                // out the item and copy it to a new index; the old index
+                // is no longer covered by a BumpVec, because we consume
+                // `b`.
+                unsafe {
+                    self.move_item(b.base + i, range.start + a.len + i);
+                }
+            }
+
+            a.free(self);
+            b.free(self);
+
+            BumpVec {
+                base: range.start,
+                len,
+                cap,
+                _phantom: PhantomData,
+            }
+        } else {
+            self.vec.reserve(cap as usize);
+            let base = self.vec.len() as u32;
+            for i in 0..a.len {
+                // Safety: the indices in `a` must be initialized. We read
+                // out the item and copy it to a new index; the old index
+                // is no longer covered by a BumpVec, because we consume
+                // `a`.
+                unsafe {
+                    self.push_item(a.base + i);
+                }
+            }
+            for i in 0..b.len {
+                // Safety: the indices in `b` must be initialized. We read
+                // out the item and copy it to a new index; the old index
+                // is no longer covered by a BumpVec, because we consume
+                // `b`.
+                unsafe {
+                    self.push_item(b.base + i);
+                }
+            }
+            let len = self.vec.len() as u32 - base;
+
+            for _ in len..cap {
+                self.vec.push(MaybeUninit::uninit());
+            }
+
+            a.free(self);
+            b.free(self);
+
+            BumpVec {
+                base,
+                len,
+                cap,
+                _phantom: PhantomData,
+            }
+        }
+    }
+
+    /// Returns the size of the backing `Vec`.
+    pub fn size(&self) -> usize {
+        self.vec.len()
+    }
+}
+
+fn round_up_power_of_two(x: u32) -> u32 {
+    debug_assert!(x > 0);
+    debug_assert!(x < 0x8000_0000);
+    let log2 = 32 - (x - 1).leading_zeros();
+    1 << log2
+}
+
+impl<T> BumpVec<T> {
+    /// Returns a slice view of this `BumpVec`, given a borrow of the
+    /// arena.
+    pub fn as_slice<'a>(&'a self, arena: &'a BumpArena<T>) -> &'a [T] {
+        let maybe_uninit_slice =
+            &arena.vec[(self.base as usize)..((self.base + self.len) as usize)];
+        // Safety: the index range we represent must be initialized.
+        unsafe { std::mem::transmute(maybe_uninit_slice) }
+    }
+
+    /// Returns a mutable slice view of this `BumpVec`, given a
+    /// mutable borrow of the arena.
+    pub fn as_mut_slice<'a>(&'a mut self, arena: &'a mut BumpArena<T>) -> &'a mut [T] {
+        let maybe_uninit_slice =
+            &mut arena.vec[(self.base as usize)..((self.base + self.len) as usize)];
+        // Safety: the index range we represent must be initialized.
+        unsafe { std::mem::transmute(maybe_uninit_slice) }
+    }
+
+    /// Returns the length of this vector. Does not require access to
+    /// the arena.
+    pub fn len(&self) -> usize {
+        self.len as usize
+    }
+
+    /// Returns the capacity of this vector. Does not require access
+    /// to the arena.
+    pub fn cap(&self) -> usize {
+        self.cap as usize
+    }
+
+    /// Reserve `extra_len` capacity at the end of the vector,
+    /// reallocating if necessary.
+    pub fn reserve(&mut self, extra_len: usize, arena: &mut BumpArena<T>) {
+        let extra_len = u32::try_from(extra_len).unwrap();
+        if self.cap - self.len < extra_len {
+            if self.base + self.cap == arena.vec.len() as u32 {
+                for _ in 0..extra_len {
+                    arena.vec.push(MaybeUninit::uninit());
+                }
+                self.cap += extra_len;
+            } else {
+                let new_cap = self.cap + extra_len;
+                let new = arena.vec_with_capacity(new_cap as usize);
+                unsafe {
+                    for i in 0..self.len {
+                        arena.move_item(self.base + i, new.base + i);
+                    }
+                }
+                self.base = new.base;
+                self.cap = new.cap;
+            }
+        }
+    }
+
+    /// Push an item, growing the capacity if needed.
+    pub fn push(&mut self, t: T, arena: &mut BumpArena<T>) {
+        if self.cap > self.len {
+            unsafe {
+                arena.write_into_index(self.base + self.len, t);
+            }
+            self.len += 1;
+        } else if (self.base + self.cap) as usize == arena.vec.len() {
+            arena.vec.push(MaybeUninit::new(t));
+            self.cap += 1;
+            self.len += 1;
+        } else {
+            let new_cap = round_up_power_of_two(self.cap + 1);
+            let extra = new_cap - self.cap;
+            self.reserve(extra as usize, arena);
+            unsafe {
+                arena.write_into_index(self.base + self.len, t);
+            }
+            self.len += 1;
+        }
+    }
+
+    /// Clone, if `T` is cloneable.
+    pub fn clone(&self, arena: &mut BumpArena<T>) -> BumpVec<T>
+    where
+        T: Clone,
+    {
+        let mut new = arena.vec_with_capacity(self.len as usize);
+        for i in 0..self.len {
+            let item = self.as_slice(arena)[i as usize].clone();
+            new.push(item, arena);
+        }
+        new
+    }
+
+    /// Truncate the length to a smaller-or-equal length.
+    pub fn truncate(&mut self, len: usize) {
+        let len = len as u32;
+        assert!(len <= self.len);
+        self.len = len;
+    }
+
+    /// Consume the BumpVec and return its indices to a free pool in
+    /// the arena.
+    pub fn free(self, arena: &mut BumpArena<T>) {
+        arena.freelist.push(self.base..(self.base + self.cap));
+    }
+
+    /// Freeze the capacity of this BumpVec, turning it into a slice,
+    /// for a smaller struct (8 bytes rather than 12). Once this
+    /// exists, it is copyable, because the slice will never be freed.
+    pub fn freeze(self, arena: &mut BumpArena<T>) -> BumpSlice<T> {
+        if self.cap > self.len {
+            arena
+                .freelist
+                .push((self.base + self.len)..(self.base + self.cap));
+        }
+        BumpSlice {
+            base: self.base,
+            len: self.len,
+            _phantom: PhantomData,
+        }
+    }
+}
+
+impl<T> BumpSlice<T> {
+    /// Returns a slice view of the `BumpSlice`, given a borrow of the
+    /// arena.
+    pub fn as_slice<'a>(&'a self, arena: &'a BumpArena<T>) -> &'a [T] {
+        let maybe_uninit_slice =
+            &arena.vec[(self.base as usize)..((self.base + self.len) as usize)];
+        // Safety: the index range we represent must be initialized.
+        unsafe { std::mem::transmute(maybe_uninit_slice) }
+    }
+
+    /// Returns a mutable slice view of the `BumpSlice`, given a
+    /// mutable borrow of the arena.
+    pub fn as_mut_slice<'a>(&'a mut self, arena: &'a mut BumpArena<T>) -> &'a mut [T] {
+        let maybe_uninit_slice =
+            &mut arena.vec[(self.base as usize)..((self.base + self.len) as usize)];
+        // Safety: the index range we represent must be initialized.
+        unsafe { std::mem::transmute(maybe_uninit_slice) }
+    }
+
+    /// Returns the length of the `BumpSlice`.
+    pub fn len(&self) -> usize {
+        self.len as usize
+    }
+}
+
+impl<T> std::default::Default for BumpVec<T> {
+    fn default() -> Self {
+        BumpVec {
+            base: 0,
+            len: 0,
+            cap: 0,
+            _phantom: PhantomData,
+        }
+    }
+}
+
+impl<T> std::default::Default for BumpSlice<T> {
+    fn default() -> Self {
+        BumpSlice {
+            base: 0,
+            len: 0,
+            _phantom: PhantomData,
+        }
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+
+    #[test]
+    fn test_round_up() {
+        assert_eq!(1, round_up_power_of_two(1));
+        assert_eq!(2, round_up_power_of_two(2));
+        assert_eq!(4, round_up_power_of_two(3));
+        assert_eq!(4, round_up_power_of_two(4));
+        assert_eq!(32, round_up_power_of_two(24));
+        assert_eq!(0x8000_0000, round_up_power_of_two(0x7fff_ffff));
+    }
+
+    #[test]
+    fn test_basic() {
+        let mut arena: BumpArena<u32> = BumpArena::new();
+
+        let a = arena.single(1);
+        let b = arena.single(2);
+        let c = arena.single(3);
+        let ab = arena.append(a, b);
+        assert_eq!(ab.as_slice(&arena), &[1, 2]);
+        assert_eq!(ab.cap(), 2);
+        let abc = arena.append(ab, c);
+        assert_eq!(abc.len(), 3);
+        assert_eq!(abc.cap(), 4);
+        assert_eq!(abc.as_slice(&arena), &[1, 2, 3]);
+        assert_eq!(arena.size(), 9);
+        let mut d = arena.single(4);
+        // Should have reused the freelist.
+        assert_eq!(arena.size(), 9);
+        assert_eq!(d.len(), 1);
+        assert_eq!(d.cap(), 1);
+        assert_eq!(d.as_slice(&arena), &[4]);
+        d.as_mut_slice(&mut arena)[0] = 5;
+        assert_eq!(d.as_slice(&arena), &[5]);
+        abc.free(&mut arena);
+        let d2 = d.clone(&mut arena);
+        let dd = arena.append(d, d2);
+        // Should have reused the freelist.
+        assert_eq!(arena.size(), 9);
+        assert_eq!(dd.as_slice(&arena), &[5, 5]);
+        let mut e = arena.from_iter([10, 11, 12].into_iter());
+        e.push(13, &mut arena);
+        assert_eq!(arena.size(), 13);
+        e.reserve(4, &mut arena);
+        assert_eq!(arena.size(), 17);
+        let _f = arena.from_iter([1, 2, 3, 4, 5, 6, 7, 8].into_iter());
+        assert_eq!(arena.size(), 25);
+        e.reserve(8, &mut arena);
+        assert_eq!(e.cap(), 16);
+        assert_eq!(e.as_slice(&arena), &[10, 11, 12, 13]);
+        // `e` must have been copied now that `f` is at the end of the
+        // arena.
+        assert_eq!(arena.size(), 41);
+    }
+}
--- a/cranelift/egraph/src/ctxhash.rs
+++ b/cranelift/egraph/src/ctxhash.rs
@@ -0,0 +1,280 @@
+//! A hashmap with "external hashing": nodes are hashed or compared for
+//! equality only with some external context provided on lookup/insert.
+//! This allows very memory-efficient data structures where
+//! node-internal data references some other storage (e.g., offsets into
+//! an array or pool of shared data).
+
+use super::unionfind::UnionFind;
+use hashbrown::raw::{Bucket, RawTable};
+use std::hash::{Hash, Hasher};
+use std::marker::PhantomData;
+
+/// Trait that allows for equality comparison given some external
+/// context.
+///
+/// Note that this trait is implemented by the *context*, rather than
+/// the item type, for somewhat complex lifetime reasons (lack of GATs
+/// to allow `for<'ctx> Ctx<'ctx>`-like associated types in traits on
+/// the value type).
+///
+/// Furthermore, the `ctx_eq` method includes a `UnionFind` parameter,
+/// because in practice we require this and a borrow to it cannot be
+/// included in the context type without GATs (similarly to above).
+pub trait CtxEq<V1: ?Sized, V2: ?Sized> {
+    /// Determine whether `a` and `b` are equal, given the context in
+    /// `self` and the union-find data structure `uf`.
+    fn ctx_eq(&self, a: &V1, b: &V2, uf: &mut UnionFind) -> bool;
+}
+
+/// Trait that allows for hashing given some external context.
+pub trait CtxHash<Value: ?Sized>: CtxEq<Value, Value> {
+    /// Compute the hash of `value`, given the context in `self` and
+    /// the union-find data structure `uf`.
+    fn ctx_hash(&self, value: &Value, uf: &mut UnionFind) -> u64;
+}
+
+/// A null-comparator context type for underlying value types that
+/// already have `Eq` and `Hash`.
+#[derive(Default)]
+pub struct NullCtx;
+
+impl<V: Eq + Hash> CtxEq<V, V> for NullCtx {
+    fn ctx_eq(&self, a: &V, b: &V, _: &mut UnionFind) -> bool {
+        a.eq(b)
+    }
+}
+impl<V: Eq + Hash> CtxHash<V> for NullCtx {
+    fn ctx_hash(&self, value: &V, _: &mut UnionFind) -> u64 {
+        let mut state = fxhash::FxHasher::default();
+        value.hash(&mut state);
+        state.finish()
+    }
+}
+
+/// A bucket in the hash table.
+///
+/// Some performance-related design notes: we cache the hashcode for
+/// speed, as this often buys a few percent speed in
+/// interning-table-heavy workloads. We only keep the low 32 bits of
+/// the hashcode, for memory efficiency: in common use, `K` and `V`
+/// are often 32 bits also, and a 12-byte bucket is measurably better
+/// than a 16-byte bucket.
+struct BucketData<K, V> {
+    hash: u32,
+    k: K,
+    v: V,
+}
+
+/// A HashMap that takes external context for all operations.
+pub struct CtxHashMap<K, V> {
+    raw: RawTable<BucketData<K, V>>,
+}
+
+impl<K, V> CtxHashMap<K, V> {
+    /// Create an empty hashmap.
+    pub fn new() -> Self {
+        Self {
+            raw: RawTable::new(),
+        }
+    }
+
+    /// Create an empty hashmap with pre-allocated space for the given
+    /// capacity.
+    pub fn with_capacity(capacity: usize) -> Self {
+        Self {
+            raw: RawTable::with_capacity(capacity),
+        }
+    }
+}
+
+impl<K, V> CtxHashMap<K, V> {
+    /// Insert a new key-value pair, returning the old value associated
+    /// with this key (if any).
+    pub fn insert<Ctx: CtxEq<K, K> + CtxHash<K>>(
+        &mut self,
+        k: K,
+        v: V,
+        ctx: &Ctx,
+        uf: &mut UnionFind,
+    ) -> Option<V> {
+        let hash = ctx.ctx_hash(&k, uf) as u32;
+        match self.raw.find(hash as u64, |bucket| {
+            hash == bucket.hash && ctx.ctx_eq(&bucket.k, &k, uf)
+        }) {
+            Some(bucket) => {
+                let data = unsafe { bucket.as_mut() };
+                Some(std::mem::replace(&mut data.v, v))
+            }
+            None => {
+                let data = BucketData { hash, k, v };
+                self.raw
+                    .insert_entry(hash as u64, data, |bucket| bucket.hash as u64);
+                None
+            }
+        }
+    }
+
+    /// Look up a key, returning a borrow of the value if present.
+    pub fn get<'a, Q, Ctx: CtxEq<K, Q> + CtxHash<Q> + CtxHash<K>>(
+        &'a self,
+        k: &Q,
+        ctx: &Ctx,
+        uf: &mut UnionFind,
+    ) -> Option<&'a V> {
+        let hash = ctx.ctx_hash(k, uf) as u32;
+        self.raw
+            .find(hash as u64, |bucket| {
+                hash == bucket.hash && ctx.ctx_eq(&bucket.k, k, uf)
+            })
+            .map(|bucket| {
+                let data = unsafe { bucket.as_ref() };
+                &data.v
+            })
+    }
+
+    /// Return an Entry cursor on a given bucket for a key, allowing
+    /// for fetching the current value or inserting a new one.
+    pub fn entry<'a, Ctx: CtxEq<K, K> + CtxHash<K>>(
+        &'a mut self,
+        k: K,
+        ctx: &'a Ctx,
+        uf: &mut UnionFind,
+    ) -> Entry<'a, K, V> {
+        let hash = ctx.ctx_hash(&k, uf) as u32;
+        match self.raw.find(hash as u64, |bucket| {
+            hash == bucket.hash && ctx.ctx_eq(&bucket.k, &k, uf)
+        }) {
+            Some(bucket) => Entry::Occupied(OccupiedEntry {
+                bucket,
+                _phantom: PhantomData,
+            }),
+            None => Entry::Vacant(VacantEntry {
+                raw: &mut self.raw,
+                hash,
+                key: k,
+            }),
+        }
+    }
+}
+
+/// An entry in the hashmap.
+pub enum Entry<'a, K: 'a, V> {
+    Occupied(OccupiedEntry<'a, K, V>),
+    Vacant(VacantEntry<'a, K, V>),
+}
+
+/// An occupied entry.
+pub struct OccupiedEntry<'a, K, V> {
+    bucket: Bucket<BucketData<K, V>>,
+    _phantom: PhantomData<&'a ()>,
+}
+
+impl<'a, K: 'a, V> OccupiedEntry<'a, K, V> {
+    /// Get the value.
+    pub fn get(&self) -> &'a V {
+        let bucket = unsafe { self.bucket.as_ref() };
+        &bucket.v
+    }
+}
+
+/// A vacant entry.
+pub struct VacantEntry<'a, K, V> {
+    raw: &'a mut RawTable<BucketData<K, V>>,
+    hash: u32,
+    key: K,
+}
+
+impl<'a, K, V> VacantEntry<'a, K, V> {
+    /// Insert a value.
+    pub fn insert(self, v: V) -> &'a V {
+        let bucket = self.raw.insert(
+            self.hash as u64,
+            BucketData {
+                hash: self.hash,
+                k: self.key,
+                v,
+            },
+            |bucket| bucket.hash as u64,
+        );
+        let data = unsafe { bucket.as_ref() };
+        &data.v
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use std::hash::Hash;
+
+    #[derive(Clone, Copy, Debug)]
+    struct Key {
+        index: u32,
+    }
+    struct Ctx {
+        vals: &'static [&'static str],
+    }
+    impl CtxEq<Key, Key> for Ctx {
+        fn ctx_eq(&self, a: &Key, b: &Key, _: &mut UnionFind) -> bool {
+            self.vals[a.index as usize].eq(self.vals[b.index as usize])
+        }
+    }
+    impl CtxHash<Key> for Ctx {
+        fn ctx_hash(&self, value: &Key, _: &mut UnionFind) -> u64 {
+            let mut state = fxhash::FxHasher::default();
+            self.vals[value.index as usize].hash(&mut state);
+            state.finish()
+        }
+    }
+
+    #[test]
+    fn test_basic() {
+        let ctx = Ctx {
+            vals: &["a", "b", "a"],
+        };
+        let mut uf = UnionFind::new();
+
+        let k0 = Key { index: 0 };
+        let k1 = Key { index: 1 };
+        let k2 = Key { index: 2 };
+
+        assert!(ctx.ctx_eq(&k0, &k2, &mut uf));
+        assert!(!ctx.ctx_eq(&k0, &k1, &mut uf));
+        assert!(!ctx.ctx_eq(&k2, &k1, &mut uf));
+
+        let mut map: CtxHashMap<Key, u64> = CtxHashMap::new();
+        assert_eq!(map.insert(k0, 42, &ctx, &mut uf), None);
+        assert_eq!(map.insert(k2, 84, &ctx, &mut uf), Some(42));
+        assert_eq!(map.get(&k1, &ctx, &mut uf), None);
+        assert_eq!(*map.get(&k0, &ctx, &mut uf).unwrap(), 84);
+    }
+
+    #[test]
+    fn test_entry() {
+        let mut ctx = Ctx {
+            vals: &["a", "b", "a"],
+        };
+        let mut uf = UnionFind::new();
+
+        let k0 = Key { index: 0 };
+        let k1 = Key { index: 1 };
+        let k2 = Key { index: 2 };
+
+        let mut map: CtxHashMap<Key, u64> = CtxHashMap::new();
+        match map.entry(k0, &mut ctx, &mut uf) {
+            Entry::Vacant(v) => {
+                v.insert(1);
+            }
+            _ => panic!(),
+        }
+        match map.entry(k1, &mut ctx, &mut uf) {
+            Entry::Vacant(_) => {}
+            Entry::Occupied(_) => panic!(),
+        }
+        match map.entry(k2, &mut ctx, &mut uf) {
+            Entry::Occupied(o) => {
+                assert_eq!(*o.get(), 1);
+            }
+            _ => panic!(),
+        }
+    }
+}
--- a/cranelift/egraph/src/lib.rs
+++ b/cranelift/egraph/src/lib.rs
@@ -0,0 +1,613 @@
+//! # ægraph (aegraph, or acyclic e-graph) implementation.
+//!
+//! An aegraph is a form of e-graph. We will first describe the
+//! e-graph, then the aegraph as a slightly less powerful but highly
+//! optimized variant of it.
+//!
+//! The main goal of this library is to be explicitly memory-efficient
+//! and light on allocations. We need to be as fast and as small as
+//! possible in order to minimize impact on compile time in a
+//! production compiler.
+//!
+//! ## The e-graph
+//!
+//! An e-graph, or equivalence graph, is a kind of node-based
+//! intermediate representation (IR) data structure that consists of
+//! *eclasses* and *enodes*. An eclass contains one or more enodes;
+//! semantically an eclass is like a value, and an enode is one way to
+//! compute that value. If several enodes are in one eclass, the data
+//! structure is asserting that any of these enodes, if evaluated,
+//! would produce the value.
+//!
+//! An e-graph also contains a deduplicating hash-map of nodes, so if
+//! the user creates the same e-node more than once, they get the same
+//! e-class ID.
+//!
+//! In the usual use-case, an e-graph is used to build a sea-of-nodes
+//! IR for a function body or other expression-based code, and then
+//! *rewrite rules* are applied to the e-graph. Each rewrite
+//! potentially introduces a new e-node that is equivalent to an
+//! existing e-node, and then unions the two e-nodes' classes
+//! together.
+//!
+//! In the trivial case this results in an e-class containing a series
+//! of e-nodes that are newly added -- all known forms of an
+//! expression -- but Note how if a rewrite rule rewrites into an
+//! existing e-node (discovered via deduplication), rewriting can
+//! result in unioning of two e-classes that have existed for some
+//! time.
+//!
+//! An e-graph's enodes refer to *classes* for their arguments, rather
+//! than other nodes directly. This is key to the ability of an
+//! e-graph to canonicalize: when two e-classes that are already used
+//! as arguments by other e-nodes are unioned, all e-nodes that refer
+//! to those e-classes are themselves re-canonicalized. This can
+//! result in "cascading" unioning of eclasses, in a process that
+//! discovers the transitive implications of all individual
+//! equalities. This process is known as "equality saturation".
+//!
+//! ## The acyclic e-graph (aegraph)
+//!
+//! An e-graph is powerful, but it can also be expensive to build and
+//! saturate: there are often many different forms an expression can
+//! take (because many different rewrites are possible), and cascading
+//! canonicalization requires heavyweight data structure bookkeeping
+//! that is expensive to maintain.
+//!
+//! This crate introduces the aegraph: an acyclic e-graph. This data
+//! structure stores an e-class as an *immutable persistent data
+//! structure*. An id can refer to some *level* of an eclass: a
+//! snapshot of the nodes in the eclass at one point in time. The
+//! nodes referred to by this id never change, though the eclass may
+//! grow later.
+//!
+//! A *union* is also an operation that creates a new eclass id: the
+//! original eclass IDs refer to the original eclass contents, while
+//! the id resulting from the `union()` operation refers to an eclass
+//! that has all nodes.
+//!
+//! In order to allow for adequate canonicalization, an enode normally
+//! stores the *latest* eclass id for each argument, but computes
+//! hashes and equality using a *canonical* eclass id. We define such
+//! a canonical id with a union-find data structure, just as for a
+//! traditional e-graph. It is normally the lowest id referring to
+//! part of the eclass.
+//!
+//! The persistent/immutable nature of this data structure yields one
+//! extremely important property: it is acyclic! This simplifies
+//! operation greatly:
+//!
+//! - When "elaborating" out of the e-graph back to linearized code,
+//!   so that we can generate machine code, we do not need to break
+//!   cycles. A given enode cannot indirectly refer back to itself.
+//!
+//! - When applying rewrite rules, the nodes visible from a given id
+//!   for an eclass never change. This means that we only need to
+//!   apply rewrite rules at that node id *once*.
+//!
+//! ## Data Structure and Example
+//!
+//! Each eclass id refers to a table entry that can be one of:
+//!
+//! - A single enode;
+//! - An enode and an earlier eclass id it is appended to;
+//! - A "union node" with two earlier eclass ids.
+//!
+//! Building the aegraph consists solely of adding new entries to the
+//! end of this table. An enode in any given entry can only refer to
+//! earlier eclass ids.
+//!
+//! For example, consider the following eclass table:
+//!
+//! ```plain
+//!
+//!    eclass/enode table
+//!
+//!     eclass1    iconst(1)
+//!     eclass2    blockparam(block0, 0)
+//!     eclass3    iadd(eclass1, eclass2)
+//! ```
+//!
+//! This represents the expression `iadd(blockparam(block0, 0),
+//! iconst(1))` (as the sole enode for eclass3).
+//!
+//! Now, say that as we further build the function body, we add
+//! another enode `iadd(eclass3, iconst(1))`. The `iconst(1)` will be
+//! deduplicated to `eclass1`, and the toplevel `iadd` will become its
+//! own new eclass (`eclass4`).
+//!
+//! ```plain
+//!     eclass4    iadd(eclass3, eclass1)
+//! ```
+//!
+//! Now we apply our body of rewrite rules, and these results can
+//! combine `x + 1 + 1` into `x + 2`; so we get:
+//!
+//! ```plain
+//!     eclass5    iconst(2)
+//!     eclass6    union(iadd(eclass2, eclass5), eclass4)
+//! ```
+//!
+//! Note that we added the nodes for the new expression, and then we
+//! union'd it with the earlier `eclass4`. Logically this represents a
+//! single eclass that contains two nodes -- the `x + 1 + 1` and `x +
+//! 2` representations -- and the *latest* id for the eclass,
+//! `eclass6`, can reach all nodes in the eclass (here the node stored
+//! in `eclass6` and the earlier one in `elcass4`).
+//!
+//! ## aegraph vs. egraph
+//!
+//! Where does an aegraph fall short of an e-graph -- or in other
+//! words, why maintain the data structures to allow for full
+//! (re)canonicalization at all, with e.g. parent pointers to
+//! recursively update parents?
+//!
+//! This question deserves further study, but right now, it appears
+//! that the difference is limited to a case like the following:
+//!
+//! - expression E1 is interned into the aegraph.
+//! - expression E2 is interned into the aegraph. It uses E1 as an
+//!   argument to one or more operators, and so refers to the
+//!   (currently) latest id for E1.
+//! - expression E3 is interned into the aegraph. A rewrite rule fires
+//!   that unions E3 with E1.
+//!
+//! In an e-graph, the last action would trigger a re-canonicalization
+//! of all "parents" (users) of E1; so E2 would be re-canonicalized
+//! using an id that represents the union of E1 and E3. At
+//! code-generation time, E2 could choose to use a value computed by
+//! either E1's or E3's operator. In an aegraph, this is not the case:
+//! E2's e-class and e-nodes are immutable once created, so E2 refers
+//! only to E1's representation of the value (a "slice" of the whole
+//! e-class).
+//!
+//! While at first this sounds quite limiting, there actually appears
+//! to be a nice mutually-beneficial interaction with the immediate
+//! application of rewrite rules: by applying all rewrites we know
+//! about right when E1 is interned, E2 can refer to the best version
+//! when it is created. The above scenario only leads to a missed
+//! optimization if:
+//!
+//! - a rewrite rule exists from E3 to E1, but not E1 to E3; and
+//! - E3 is *cheaper* than E1.
+//!
+//! Or in other words, this only matters if there is a rewrite rule
+//! that rewrites into a more expensive direction. This is unlikely
+//! for the sorts of rewrite rules we plan to write; it may matter
+//! more if many possible equalities are expressed, such as
+//! associativity, commutativity, etc.
+//!
+//! Note that the above represents the best of our understanding, but
+//! there may be cases we have missed; a more complete examination of
+//! this question would involve building a full equality saturation
+//! loop on top of the (a)egraph in this crate, and testing with many
+//! benchmarks to see if it makes any difference.
+//!
+//! ## Rewrite Rules (FLAX: Fast Localized Aegraph eXpansion)
+//!
+//! The most common use of an e-graph or aegraph is to serve as the IR
+//! for a compiler. In this use-case, we usually wish to transform the
+//! program using a body of rewrite rules that represent valid
+//! transformations (equivalent and hopefully simpler ways of
+//! computing results). An aegraph supports applying rules in a fairly
+//! straightforward way: whenever a new eclass entry is added to the
+//! table, we invoke a toplevel "apply all rewrite rules" entry
+//! point. This entry point creates new nodes as needed, and when
+//! done, unions the rewritten nodes with the original. We thus
+//! *immediately* expand a new value into all of its representations.
+//!
+//! This immediate expansion stands in contrast to a traditional
+//! "equality saturation" e-egraph system, in which it is usually best
+//! to apply rules in batches and then fix up the
+//! canonicalization. This approach was introduced in the `egg`
+//! e-graph engine [^1]. We call our system FLAX (because flax is an
+//! alternative to egg): Fast Localized Aegraph eXpansion.
+//!
+//! The reason that this is possible in an aegraph but not
+//! (efficiently, at least) in a traditional e-graph is that the data
+//! structure nodes are immutable once created: an eclass id will
+//! always refer to a fixed set of enodes. There is no
+//! recanonicalizing of eclass arguments as they union; but also this
+//! is not usually necessary, because args will have already been
+//! processed and eagerly rewritten as well. In other words, eager
+//! rewriting and the immutable data structure mutually allow each
+//! other to be practical; both work together.
+//!
+//! [^1]: M Willsey, C Nandi, Y R Wang, O Flatt, Z Tatlock, P
+//!       Panchekha. "egg: Fast and Flexible Equality Saturation." In
+//!       POPL 2021. <https://dl.acm.org/doi/10.1145/3434304>
+
+use cranelift_entity::PrimaryMap;
+use cranelift_entity::{entity_impl, packed_option::ReservedValue};
+use smallvec::{smallvec, SmallVec};
+use std::fmt::Debug;
+use std::hash::Hash;
+use std::marker::PhantomData;
+
+mod bumpvec;
+mod ctxhash;
+mod unionfind;
+
+pub use bumpvec::{BumpArena, BumpSlice, BumpVec};
+pub use ctxhash::{CtxEq, CtxHash, CtxHashMap, Entry};
+pub use unionfind::UnionFind;
+
+/// An eclass ID.
+#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
+pub struct Id(u32);
+entity_impl!(Id, "eclass");
+
+impl Id {
+    pub fn invalid() -> Id {
+        Self::reserved_value()
+    }
+}
+impl std::default::Default for Id {
+    fn default() -> Self {
+        Self::invalid()
+    }
+}
+
+/// A trait implemented by all "languages" (types that can be enodes).
+pub trait Language: CtxEq<Self::Node, Self::Node> + CtxHash<Self::Node> {
+    type Node: Debug;
+    fn children<'a>(&'a self, node: &'a Self::Node) -> &'a [Id];
+    fn children_mut<'a>(&'a mut self, ctx: &'a mut Self::Node) -> &'a mut [Id];
+    fn needs_dedup(&self, node: &Self::Node) -> bool;
+}
+
+/// Conditionally-compiled trace-log macro. (Borrowed from
+/// `cranelift-codegen`; it's not worth factoring out a common
+/// subcrate for this.)
+#[macro_export]
+macro_rules! trace {
+    ($($tt:tt)*) => {
+        if cfg!(feature = "trace-log") {
+            ::log::trace!($($tt)*);
+        }
+    };
+}
+
+/// An egraph.
+pub struct EGraph<L: Language> {
+    /// Node-allocation arena.
+    pub nodes: Vec<L::Node>,
+    /// Hash-consing map from Nodes to eclass IDs.
+    node_map: CtxHashMap<NodeKey, Id>,
+    /// Eclass definitions. Each eclass consists of an enode, and
+    /// parent pointer to the rest of the eclass.
+    pub classes: PrimaryMap<Id, EClass>,
+    /// Union-find for canonical ID generation. This lets us name an
+    /// eclass with a canonical ID that is the same for all
+    /// generations of the class.
+    pub unionfind: UnionFind,
+}
+
+/// A reference to a node.
+#[derive(Clone, Copy, Debug)]
+pub struct NodeKey {
+    index: u32,
+}
+
+impl NodeKey {
+    fn from_node_idx(node_idx: usize) -> NodeKey {
+        NodeKey {
+            index: u32::try_from(node_idx).unwrap(),
+        }
+    }
+
+    /// Get the node for this NodeKey, given the `nodes` from the
+    /// appropriate `EGraph`.
+    pub fn node<'a, L: Language>(&self, nodes: &'a [L::Node]) -> &'a L::Node {
+        &nodes[self.index as usize]
+    }
+
+    fn bits(self) -> u32 {
+        self.index
+    }
+
+    fn from_bits(bits: u32) -> Self {
+        NodeKey { index: bits }
+    }
+}
+
+struct NodeKeyCtx<'a, L: Language> {
+    nodes: &'a [L::Node],
+    node_ctx: &'a L,
+}
+
+impl<'ctx, L: Language> CtxEq<NodeKey, NodeKey> for NodeKeyCtx<'ctx, L> {
+    fn ctx_eq(&self, a: &NodeKey, b: &NodeKey, uf: &mut UnionFind) -> bool {
+        let a = a.node::<L>(self.nodes);
+        let b = b.node::<L>(self.nodes);
+        self.node_ctx.ctx_eq(a, b, uf)
+    }
+}
+
+impl<'ctx, L: Language> CtxHash<NodeKey> for NodeKeyCtx<'ctx, L> {
+    fn ctx_hash(&self, value: &NodeKey, uf: &mut UnionFind) -> u64 {
+        self.node_ctx.ctx_hash(value.node::<L>(self.nodes), uf)
+    }
+}
+
+/// An EClass entry. Contains either a single new enode and a parent
+/// eclass (i.e., adds one new enode), or unions two parent eclasses
+/// together.
+#[derive(Debug, Clone, Copy)]
+pub struct EClass {
+    // formats:
+    //
+    // 00 | unused  (31 bits)         | NodeKey (31 bits)
+    // 01 | eclass_parent   (31 bits) | NodeKey (31 bits)
+    // 10 | eclass_parent_1 (31 bits) | eclass_parent_id_2 (31 bits)
+    bits: u64,
+}
+
+impl EClass {
+    fn node(node: NodeKey) -> EClass {
+        let node_idx = node.bits() as u64;
+        debug_assert!(node_idx < (1 << 31));
+        EClass {
+            bits: (0b00 << 62) | node_idx,
+        }
+    }
+
+    fn node_and_parent(node: NodeKey, eclass_parent: Id) -> EClass {
+        let node_idx = node.bits() as u64;
+        debug_assert!(node_idx < (1 << 31));
+        debug_assert!(eclass_parent != Id::invalid());
+        let parent = eclass_parent.0 as u64;
+        debug_assert!(parent < (1 << 31));
+        EClass {
+            bits: (0b01 << 62) | (parent << 31) | node_idx,
+        }
+    }
+
+    fn union(parent1: Id, parent2: Id) -> EClass {
+        debug_assert!(parent1 != Id::invalid());
+        let parent1 = parent1.0 as u64;
+        debug_assert!(parent1 < (1 << 31));
+
+        debug_assert!(parent2 != Id::invalid());
+        let parent2 = parent2.0 as u64;
+        debug_assert!(parent2 < (1 << 31));
+
+        EClass {
+            bits: (0b10 << 62) | (parent1 << 31) | parent2,
+        }
+    }
+
+    /// Get the node, if any, from a node-only or node-and-parent
+    /// eclass.
+    pub fn get_node(&self) -> Option<NodeKey> {
+        self.as_node()
+            .or_else(|| self.as_node_and_parent().map(|(node, _)| node))
+    }
+
+    /// Get the first parent, if any.
+    pub fn parent1(&self) -> Option<Id> {
+        self.as_node_and_parent()
+            .map(|(_, p1)| p1)
+            .or(self.as_union().map(|(p1, _)| p1))
+    }
+
+    /// Get the second parent, if any.
+    pub fn parent2(&self) -> Option<Id> {
+        self.as_union().map(|(_, p2)| p2)
+    }
+
+    /// If this EClass is just a lone enode, return it.
+    pub fn as_node(&self) -> Option<NodeKey> {
+        if (self.bits >> 62) == 0b00 {
+            let node_idx = (self.bits & ((1 << 31) - 1)) as u32;
+            Some(NodeKey::from_bits(node_idx))
+        } else {
+            None
+        }
+    }
+
+    /// If this EClass is one new enode and a parent, return the node
+    /// and parent ID.
+    pub fn as_node_and_parent(&self) -> Option<(NodeKey, Id)> {
+        if (self.bits >> 62) == 0b01 {
+            let node_idx = (self.bits & ((1 << 31) - 1)) as u32;
+            let parent = ((self.bits >> 31) & ((1 << 31) - 1)) as u32;
+            Some((NodeKey::from_bits(node_idx), Id::from_bits(parent)))
+        } else {
+            None
+        }
+    }
+
+    /// If this EClass is the union variety, return the two parent
+    /// EClasses. Both are guaranteed not to be `Id::invalid()`.
+    pub fn as_union(&self) -> Option<(Id, Id)> {
+        if (self.bits >> 62) == 0b10 {
+            let parent1 = ((self.bits >> 31) & ((1 << 31) - 1)) as u32;
+            let parent2 = (self.bits & ((1 << 31) - 1)) as u32;
+            Some((Id::from_bits(parent1), Id::from_bits(parent2)))
+        } else {
+            None
+        }
+    }
+}
+
+/// A new or existing `T` when adding to a deduplicated set or data
+/// structure, like an egraph.
+#[derive(Clone, Copy, Debug)]
+pub enum NewOrExisting<T> {
+    New(T),
+    Existing(T),
+}
+
+impl<T> NewOrExisting<T> {
+    /// Get the underlying value.
+    pub fn get(self) -> T {
+        match self {
+            NewOrExisting::New(t) => t,
+            NewOrExisting::Existing(t) => t,
+        }
+    }
+}
+
+impl<L: Language> EGraph<L>
+where
+    L::Node: 'static,
+{
+    /// Create a new aegraph.
+    pub fn new() -> Self {
+        Self {
+            nodes: vec![],
+            node_map: CtxHashMap::new(),
+            classes: PrimaryMap::new(),
+            unionfind: UnionFind::new(),
+        }
+    }
+
+    /// Create a new aegraph with the given capacity.
+    pub fn with_capacity(nodes: usize) -> Self {
+        Self {
+            nodes: Vec::with_capacity(nodes),
+            node_map: CtxHashMap::with_capacity(nodes),
+            classes: PrimaryMap::with_capacity(nodes),
+            unionfind: UnionFind::with_capacity(nodes),
+        }
+    }
+
+    /// Add a new node.
+    pub fn add(&mut self, node: L::Node, node_ctx: &L) -> NewOrExisting<Id> {
+        // Push the node. We can then build a NodeKey that refers to
+        // it and look for an existing interned copy. If one exists,
+        // we can pop the pushed node and return the existing Id.
+        let node_idx = self.nodes.len();
+        trace!("adding node: {:?}", node);
+        let needs_dedup = node_ctx.needs_dedup(&node);
+        self.nodes.push(node);
+
+        let key = NodeKey::from_node_idx(node_idx);
+        if needs_dedup {
+            let ctx = NodeKeyCtx {
+                nodes: &self.nodes[..],
+                node_ctx,
+            };
+
+            match self.node_map.entry(key, &ctx, &mut self.unionfind) {
+                Entry::Occupied(o) => {
+                    let eclass_id = *o.get();
+                    self.nodes.pop();
+                    trace!(" -> existing id {}", eclass_id);
+                    NewOrExisting::Existing(eclass_id)
+                }
+                Entry::Vacant(v) => {
+                    // We're creating a new eclass now.
+                    let eclass_id = self.classes.push(EClass::node(key));
+                    trace!(" -> new node and eclass: {}", eclass_id);
+                    self.unionfind.add(eclass_id);
+
+                    // Add to interning map with a NodeKey referring to the eclass.
+                    v.insert(eclass_id);
+
+                    NewOrExisting::New(eclass_id)
+                }
+            }
+        } else {
+            let eclass_id = self.classes.push(EClass::node(key));
+            self.unionfind.add(eclass_id);
+            NewOrExisting::New(eclass_id)
+        }
+    }
+
+    /// Merge one eclass into another, maintaining the acyclic
+    /// property (args must have lower eclass Ids than the eclass
+    /// containing the node with those args). Returns the Id of the
+    /// merged eclass.
+    pub fn union(&mut self, a: Id, b: Id) -> Id {
+        assert_ne!(a, Id::invalid());
+        assert_ne!(b, Id::invalid());
+        let (a, b) = (std::cmp::max(a, b), std::cmp::min(a, b));
+        trace!("union: id {} and id {}", a, b);
+        if a == b {
+            trace!(" -> no-op");
+            return a;
+        }
+
+        self.unionfind.union(a, b);
+
+        // If the younger eclass has no parent, we can link it
+        // directly and return that eclass. Otherwise, we create a new
+        // union eclass.
+        if let Some(node) = self.classes[a].as_node() {
+            trace!(
+                " -> id {} is one-node eclass; making into node-and-parent with id {}",
+                a,
+                b
+            );
+            self.classes[a] = EClass::node_and_parent(node, b);
+            return a;
+        }
+
+        let u = self.classes.push(EClass::union(a, b));
+        self.unionfind.add(u);
+        self.unionfind.union(u, b);
+        trace!(" -> union id {} and id {} into id {}", a, b, u);
+        u
+    }
+
+    /// Get the canonical ID for an eclass. This may be an older
+    /// generation, so will not be able to see all enodes in the
+    /// eclass; but it will allow us to unambiguously refer to an
+    /// eclass, even across merging.
+    pub fn canonical_id_mut(&mut self, eclass: Id) -> Id {
+        self.unionfind.find_and_update(eclass)
+    }
+
+    /// Get the canonical ID for an eclass. This may be an older
+    /// generation, so will not be able to see all enodes in the
+    /// eclass; but it will allow us to unambiguously refer to an
+    /// eclass, even across merging.
+    pub fn canonical_id(&self, eclass: Id) -> Id {
+        self.unionfind.find(eclass)
+    }
+
+    /// Get the enodes for a given eclass.
+    pub fn enodes(&self, eclass: Id) -> NodeIter<L> {
+        NodeIter {
+            stack: smallvec![eclass],
+            _phantom: PhantomData,
+        }
+    }
+}
+
+/// An iterator over all nodes in an eclass.
+///
+/// Because eclasses are immutable once created, this does *not* need
+/// to hold an open borrow on the egraph; it is free to add new nodes,
+/// while our existing Ids will remain valid.
+pub struct NodeIter<L: Language> {
+    stack: SmallVec<[Id; 8]>,
+    _phantom: PhantomData<L>,
+}
+
+impl<L: Language> NodeIter<L> {
+    pub fn next<'a>(&mut self, egraph: &'a EGraph<L>) -> Option<&'a L::Node> {
+        while let Some(next) = self.stack.pop() {
+            let eclass = egraph.classes[next];
+            if let Some(node) = eclass.as_node() {
+                return Some(&egraph.nodes[node.index as usize]);
+            } else if let Some((node, parent)) = eclass.as_node_and_parent() {
+                if parent != Id::invalid() {
+                    self.stack.push(parent);
+                }
+                return Some(&egraph.nodes[node.index as usize]);
+            } else if let Some((parent1, parent2)) = eclass.as_union() {
+                debug_assert!(parent1 != Id::invalid());
+                debug_assert!(parent2 != Id::invalid());
+                self.stack.push(parent2);
+                self.stack.push(parent1);
+                continue;
+            } else {
+                unreachable!("Invalid eclass format");
+            }
+        }
+        None
+    }
+}
--- a/cranelift/egraph/src/unionfind.rs
+++ b/cranelift/egraph/src/unionfind.rs
@@ -0,0 +1,70 @@
+//! Simple union-find data structure.
+
+use crate::{trace, Id};
+use cranelift_entity::SecondaryMap;
+
+/// A union-find data structure. The data structure can allocate
+/// `Id`s, indicating eclasses, and can merge eclasses together.
+#[derive(Clone, Debug)]
+pub struct UnionFind {
+    parent: SecondaryMap<Id, Id>,
+}
+
+impl UnionFind {
+    /// Create a new `UnionFind`.
+    pub fn new() -> Self {
+        UnionFind {
+            parent: SecondaryMap::new(),
+        }
+    }
+
+    /// Create a new `UnionFind` with the given capacity.
+    pub fn with_capacity(cap: usize) -> Self {
+        UnionFind {
+            parent: SecondaryMap::with_capacity(cap),
+        }
+    }
+
+    /// Add an `Id` to the `UnionFind`, with its own equivalence class
+    /// initially. All `Id`s must be added before being queried or
+    /// unioned.
+    pub fn add(&mut self, id: Id) {
+        self.parent[id] = id;
+    }
+
+    /// Find the canonical `Id` of a given `Id`.
+    pub fn find(&self, mut node: Id) -> Id {
+        while node != self.parent[node] {
+            node = self.parent[node];
+        }
+        node
+    }
+
+    /// Find the canonical `Id` of a given `Id`, updating the data
+    /// structure in the process so that future queries for this `Id`
+    /// (and others in its chain up to the root of the equivalence
+    /// class) will be faster.
+    pub fn find_and_update(&mut self, mut node: Id) -> Id {
+        // "Path splitting" mutating find (Tarjan and Van Leeuwen).
+        let orig = node;
+        while node != self.parent[node] {
+            let next = self.parent[self.parent[node]];
+            self.parent[node] = next;
+            node = next;
+        }
+        trace!("find_and_update: {} -> {}", orig, node);
+        node
+    }
+
+    /// Merge the equivalence classes of the two `Id`s.
+    pub fn union(&mut self, a: Id, b: Id) {
+        let a = self.find_and_update(a);
+        let b = self.find_and_update(b);
+        let (a, b) = (std::cmp::min(a, b), std::cmp::max(a, b));
+        if a != b {
+            // Always canonicalize toward lower IDs.
+            self.parent[b] = a;
+            trace!("union: {}, {}", a, b);
+        }
+    }
+}