diff --git a/Cargo.lock b/Cargo.lock
index ac6545cd1f..a91b1554dd 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -554,7 +554,6 @@ dependencies = [
  "cranelift-bforest",
  "cranelift-codegen-meta",
  "cranelift-codegen-shared",
- "cranelift-egraph",
  "cranelift-entity",
  "cranelift-isle",
  "criterion",
@@ -580,18 +579,6 @@ dependencies = [
 name = "cranelift-codegen-shared"
 version = "0.92.0"
 
-[[package]]
-name = "cranelift-egraph"
-version = "0.92.0"
-dependencies = [
- "cranelift-entity",
- "fxhash",
- "hashbrown",
- "indexmap",
- "log",
- "smallvec",
-]
-
 [[package]]
 name = "cranelift-entity"
 version = "0.92.0"
diff --git a/Cargo.toml b/Cargo.toml
index 7f9cf233a7..e70b034024 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -78,7 +78,6 @@ opt-level = 0
 resolver = '2'
 members = [
   "cranelift",
-  "cranelift/egraph",
   "cranelift/isle/fuzz",
   "cranelift/isle/islec",
   "cranelift/serde",
@@ -137,7 +136,6 @@ wasmtime-wit-bindgen = { path = "crates/wit-bindgen", version = "=5.0.0" }
 
 cranelift-wasm = { path = "cranelift/wasm", version = "0.92.0" }
 cranelift-codegen = { path = "cranelift/codegen", version = "0.92.0" }
-cranelift-egraph = { path = "cranelift/egraph", version = "0.92.0" }
 cranelift-frontend = { path = "cranelift/frontend", version = "0.92.0" }
 cranelift-entity = { path = "cranelift/entity", version = "0.92.0" }
 cranelift-native = { path = "cranelift/native", version = "0.92.0" }
diff --git a/cranelift/codegen/Cargo.toml b/cranelift/codegen/Cargo.toml
index 65818b9074..580af31d22 100644
--- a/cranelift/codegen/Cargo.toml
+++ b/cranelift/codegen/Cargo.toml
@@ -18,8 +18,7 @@ bumpalo = "3"
 cranelift-codegen-shared = { path = "./shared", version = "0.92.0" }
 cranelift-entity = { workspace = true }
 cranelift-bforest = { workspace = true }
-cranelift-egraph = { workspace = true }
-hashbrown = { workspace = true, optional = true }
+hashbrown = { workspace = true }
 target-lexicon = { workspace = true }
 log = { workspace = true }
 serde = { version = "1.0.94", features = ["derive"], optional = true }
@@ -42,16 +41,18 @@ cranelift-codegen-meta = { path = "meta", version = "0.92.0" }
 cranelift-isle = { path = "../isle/isle", version = "=0.92.0" }
 
 [features]
-default = ["std", "unwind"]
+default = ["std", "unwind", "trace-log"]
 
 # The "std" feature enables use of libstd. The "core" feature enables use
 # of some minimal std-like replacement libraries. At least one of these two
 # features need to be enabled.
 std = []
 
-# The "core" features enables use of "hashbrown" since core doesn't have
-# a HashMap implementation, and a workaround for Cargo #4866.
-core = ["hashbrown"]
+# The "core" feature used to enable a hashmap workaround, but is now
+# deprecated (we (i) always use hashbrown, and (ii) don't support a
+# no_std build anymore). The feature remains for backward
+# compatibility as a no-op.
+core = []
 
 # This enables some additional functions useful for writing tests, but which
 # can significantly increase the size of the library.
diff --git a/cranelift/codegen/meta/src/gen_inst.rs b/cranelift/codegen/meta/src/gen_inst.rs
index e0adf5827e..5e9f0abfef 100644
--- a/cranelift/codegen/meta/src/gen_inst.rs
+++ b/cranelift/codegen/meta/src/gen_inst.rs
@@ -60,51 +60,36 @@ fn gen_formats(formats: &[&InstructionFormat], fmt: &mut Formatter) {
     fmt.empty_line();
 }
 
-/// Generate the InstructionData and InstructionImms enums.
+/// Generate the InstructionData enum.
 ///
 /// Every variant must contain an `opcode` field. The size of `InstructionData` should be kept at
 /// 16 bytes on 64-bit architectures. If more space is needed to represent an instruction, use a
 /// `ValueList` to store the additional information out of line.
-///
-/// `InstructionImms` stores everything about an instruction except for the arguments: in other
-/// words, the `Opcode` and any immediates or other parameters. `InstructionData` stores this, plus
-/// the SSA `Value` arguments.
 fn gen_instruction_data(formats: &[&InstructionFormat], fmt: &mut Formatter) {
-    for (name, include_args) in &[("InstructionData", true), ("InstructionImms", false)] {
-        fmt.line("#[derive(Copy, Clone, Debug, PartialEq, Hash)]");
-        if !include_args {
-            // `InstructionImms` gets some extra derives: it acts like a sort of
-            // extended opcode and we want to allow for hashconsing via `Eq`.
-            fmt.line("#[derive(Eq)]");
+    fmt.line("#[derive(Copy, Clone, Debug, PartialEq, Hash)]");
+    fmt.line(r#"#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]"#);
+    fmt.line("#[allow(missing_docs)]");
+    fmtln!(fmt, "pub enum InstructionData {");
+    fmt.indent(|fmt| {
+        for format in formats {
+            fmtln!(fmt, "{} {{", format.name);
+            fmt.indent(|fmt| {
+                fmt.line("opcode: Opcode,");
+                if format.has_value_list {
+                    fmt.line("args: ValueList,");
+                } else if format.num_value_operands == 1 {
+                    fmt.line("arg: Value,");
+                } else if format.num_value_operands > 0 {
+                    fmtln!(fmt, "args: [Value; {}],", format.num_value_operands);
+                }
+                for field in &format.imm_fields {
+                    fmtln!(fmt, "{}: {},", field.member, field.kind.rust_type);
+                }
+            });
+            fmtln!(fmt, "},");
         }
-        fmt.line(r#"#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]"#);
-        fmt.line("#[allow(missing_docs)]");
-        // Generate `enum InstructionData` or `enum InstructionImms`. (This
-        // comment exists so one can grep for `enum InstructionData`!)
-        fmtln!(fmt, "pub enum {} {{", name);
-        fmt.indent(|fmt| {
-            for format in formats {
-                fmtln!(fmt, "{} {{", format.name);
-                fmt.indent(|fmt| {
-                    fmt.line("opcode: Opcode,");
-                    if *include_args {
-                        if format.has_value_list {
-                            fmt.line("args: ValueList,");
-                        } else if format.num_value_operands == 1 {
-                            fmt.line("arg: Value,");
-                        } else if format.num_value_operands > 0 {
-                            fmtln!(fmt, "args: [Value; {}],", format.num_value_operands);
-                        }
-                    }
-                    for field in &format.imm_fields {
-                        fmtln!(fmt, "{}: {},", field.member, field.kind.rust_type);
-                    }
-                });
-                fmtln!(fmt, "},");
-            }
-        });
-        fmt.line("}");
-    }
+    });
+    fmt.line("}");
 }
 
 fn gen_arguments_method(formats: &[&InstructionFormat], fmt: &mut Formatter, is_mut: bool) {
@@ -165,122 +150,6 @@ fn gen_arguments_method(formats: &[&InstructionFormat], fmt: &mut Formatter, is_
     fmtln!(fmt, "}");
 }
 
-/// Generate the conversion from `InstructionData` to `InstructionImms`, stripping out the
-/// `Value`s.
-fn gen_instruction_data_to_instruction_imms(formats: &[&InstructionFormat], fmt: &mut Formatter) {
-    fmt.line("impl std::convert::From<&InstructionData> for InstructionImms {");
-    fmt.indent(|fmt| {
-        fmt.doc_comment("Convert an `InstructionData` into an `InstructionImms`.");
-        fmt.line("fn from(data: &InstructionData) -> InstructionImms {");
-        fmt.indent(|fmt| {
-            fmt.line("match data {");
-            fmt.indent(|fmt| {
-                for format in formats {
-                    fmtln!(fmt, "InstructionData::{} {{", format.name);
-                    fmt.indent(|fmt| {
-                        fmt.line("opcode,");
-                        for field in &format.imm_fields {
-                            fmtln!(fmt, "{},", field.member);
-                        }
-                        fmt.line("..");
-                    });
-                    fmtln!(fmt, "}} => InstructionImms::{} {{", format.name);
-                    fmt.indent(|fmt| {
-                        fmt.line("opcode: *opcode,");
-                        for field in &format.imm_fields {
-                            fmtln!(fmt, "{}: {}.clone(),", field.member, field.member);
-                        }
-                    });
-                    fmt.line("},");
-                }
-            });
-            fmt.line("}");
-        });
-        fmt.line("}");
-    });
-    fmt.line("}");
-    fmt.empty_line();
-}
-
-/// Generate the conversion from `InstructionImms` to `InstructionData`, adding the
-/// `Value`s.
-fn gen_instruction_imms_to_instruction_data(formats: &[&InstructionFormat], fmt: &mut Formatter) {
-    fmt.line("impl  InstructionImms {");
-    fmt.indent(|fmt| {
-        fmt.doc_comment("Convert an `InstructionImms` into an `InstructionData` by adding args.");
-        fmt.line(
-            "pub fn with_args(&self, values: &[Value], value_list: &mut ValueListPool) -> InstructionData {",
-        );
-        fmt.indent(|fmt| {
-            fmt.line("match self {");
-            fmt.indent(|fmt| {
-                for format in formats {
-                    fmtln!(fmt, "InstructionImms::{} {{", format.name);
-                    fmt.indent(|fmt| {
-                        fmt.line("opcode,");
-                        for field in &format.imm_fields {
-                            fmtln!(fmt, "{},", field.member);
-                        }
-                    });
-                    fmt.line("} => {");
-                    if format.has_value_list {
-                        fmtln!(fmt, "let args = ValueList::from_slice(values, value_list);");
-                    }
-                    fmt.indent(|fmt| {
-                        fmtln!(fmt, "InstructionData::{} {{", format.name);
-                        fmt.indent(|fmt| {
-                            fmt.line("opcode: *opcode,");
-                            for field in &format.imm_fields {
-                                fmtln!(fmt, "{}: {}.clone(),", field.member, field.member);
-                            }
-                            if format.has_value_list {
-                                fmtln!(fmt, "args,");
-                            } else if format.num_value_operands == 1 {
-                                fmtln!(fmt, "arg: values[0],");
-                            } else if format.num_value_operands > 0 {
-                                let mut args = vec![];
-                                for i in 0..format.num_value_operands {
-                                    args.push(format!("values[{}]", i));
-                                }
-                                fmtln!(fmt, "args: [{}],", args.join(", "));
-                            }
-                        });
-                        fmt.line("}");
-                    });
-                    fmt.line("},");
-                }
-            });
-            fmt.line("}");
-        });
-        fmt.line("}");
-    });
-    fmt.line("}");
-    fmt.empty_line();
-}
-
-/// Generate the `opcode` method on InstructionImms.
-fn gen_instruction_imms_impl(formats: &[&InstructionFormat], fmt: &mut Formatter) {
-    fmt.line("impl InstructionImms {");
-    fmt.indent(|fmt| {
-        fmt.doc_comment("Get the opcode of this instruction.");
-        fmt.line("pub fn opcode(&self) -> Opcode {");
-        fmt.indent(|fmt| {
-            let mut m = Match::new("*self");
-            for format in formats {
-                m.arm(
-                    format!("Self::{}", format.name),
-                    vec!["opcode", ".."],
-                    "opcode".to_string(),
-                );
-            }
-            fmt.add_match(m);
-        });
-        fmt.line("}");
-    });
-    fmt.line("}");
-    fmt.empty_line();
-}
-
 /// Generate the boring parts of the InstructionData implementation.
 ///
 /// These methods in `impl InstructionData` can be generated automatically from the instruction
@@ -401,8 +270,12 @@ fn gen_instruction_data_impl(formats: &[&InstructionFormat], fmt: &mut Formatter
 
             This operation requires a reference to a `ValueListPool` to
             determine if the contents of any `ValueLists` are equal.
+
+            This operation takes a closure that is allowed to map each
+            argument value to some other value before the instructions
+            are compared. This allows various forms of canonicalization.
         "#);
-        fmt.line("pub fn eq(&self, other: &Self, pool: &ir::ValueListPool) -> bool {");
+        fmt.line("pub fn eq<F: Fn(Value) -> Value>(&self, other: &Self, pool: &ir::ValueListPool, mapper: F) -> bool {");
         fmt.indent(|fmt| {
             fmt.line("if ::core::mem::discriminant(self) != ::core::mem::discriminant(other) {");
             fmt.indent(|fmt| {
@@ -418,13 +291,13 @@ fn gen_instruction_data_impl(formats: &[&InstructionFormat], fmt: &mut Formatter
 
                     let args_eq = if format.has_value_list {
                         members.push("args");
-                        Some("args1.as_slice(pool) == args2.as_slice(pool)")
+                        Some("args1.as_slice(pool).iter().zip(args2.as_slice(pool).iter()).all(|(a, b)| mapper(*a) == mapper(*b))")
                     } else if format.num_value_operands == 1 {
                         members.push("arg");
-                        Some("arg1 == arg2")
+                        Some("mapper(*arg1) == mapper(*arg2)")
                     } else if format.num_value_operands > 0 {
                         members.push("args");
-                        Some("args1 == args2")
+                        Some("args1.iter().zip(args2.iter()).all(|(a, b)| mapper(*a) == mapper(*b))")
                     } else {
                         None
                     };
@@ -459,8 +332,12 @@ fn gen_instruction_data_impl(formats: &[&InstructionFormat], fmt: &mut Formatter
 
             This operation requires a reference to a `ValueListPool` to
             hash the contents of any `ValueLists`.
+
+            This operation takes a closure that is allowed to map each
+            argument value to some other value before it is hashed. This
+            allows various forms of canonicalization.
         "#);
-        fmt.line("pub fn hash<H: ::core::hash::Hasher>(&self, state: &mut H, pool: &ir::ValueListPool) {");
+        fmt.line("pub fn hash<H: ::core::hash::Hasher, F: Fn(Value) -> Value>(&self, state: &mut H, pool: &ir::ValueListPool, mapper: F) {");
         fmt.indent(|fmt| {
             fmt.line("match *self {");
             fmt.indent(|fmt| {
@@ -468,17 +345,17 @@ fn gen_instruction_data_impl(formats: &[&InstructionFormat], fmt: &mut Formatter
                     let name = format!("Self::{}", format.name);
                     let mut members = vec!["opcode"];
 
-                    let args = if format.has_value_list {
+                    let (args, len) = if format.has_value_list {
                         members.push("ref args");
-                        "args.as_slice(pool)"
+                        ("args.as_slice(pool)", "args.len(pool)")
                     } else if format.num_value_operands == 1 {
                         members.push("ref arg");
-                        "arg"
-                    } else if format.num_value_operands > 0{
+                        ("std::slice::from_ref(arg)", "1")
+                    } else if format.num_value_operands > 0 {
                         members.push("ref args");
-                        "args"
+                        ("args", "args.len()")
                     } else {
-                        "&()"
+                        ("&[]", "0")
                     };
 
                     for field in &format.imm_fields {
@@ -493,7 +370,13 @@ fn gen_instruction_data_impl(formats: &[&InstructionFormat], fmt: &mut Formatter
                         for field in &format.imm_fields {
                             fmtln!(fmt, "::core::hash::Hash::hash(&{}, state);", field.member);
                         }
-                        fmtln!(fmt, "::core::hash::Hash::hash({}, state);", args);
+                        fmtln!(fmt, "::core::hash::Hash::hash(&{}, state);", len);
+                        fmtln!(fmt, "for &arg in {} {{", args);
+                        fmt.indent(|fmt| {
+                            fmtln!(fmt, "let arg = mapper(arg);");
+                            fmtln!(fmt, "::core::hash::Hash::hash(&arg, state);");
+                        });
+                        fmtln!(fmt, "}");
                     });
                     fmtln!(fmt, "}");
                 }
@@ -1264,46 +1147,40 @@ fn gen_common_isle(
         gen_isle_enum(name, variants, fmt)
     }
 
-    if isle_target == IsleTarget::Lower {
-        // Generate all of the value arrays we need for `InstructionData` as well as
-        // the constructors and extractors for them.
-        fmt.line(
-            ";;;; Value Arrays ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;",
+    // Generate all of the value arrays we need for `InstructionData` as well as
+    // the constructors and extractors for them.
+    fmt.line(";;;; Value Arrays ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;");
+    fmt.empty_line();
+    let value_array_arities: BTreeSet<_> = formats
+        .iter()
+        .filter(|f| f.typevar_operand.is_some() && !f.has_value_list && f.num_value_operands != 1)
+        .map(|f| f.num_value_operands)
+        .collect();
+    for n in value_array_arities {
+        fmtln!(fmt, ";; ISLE representation of `[Value; {}]`.", n);
+        fmtln!(fmt, "(type ValueArray{} extern (enum))", n);
+        fmt.empty_line();
+
+        fmtln!(
+            fmt,
+            "(decl value_array_{} ({}) ValueArray{})",
+            n,
+            (0..n).map(|_| "Value").collect::<Vec<_>>().join(" "),
+            n
+        );
+        fmtln!(
+            fmt,
+            "(extern constructor value_array_{} pack_value_array_{})",
+            n,
+            n
+        );
+        fmtln!(
+            fmt,
+            "(extern extractor infallible value_array_{} unpack_value_array_{})",
+            n,
+            n
         );
         fmt.empty_line();
-        let value_array_arities: BTreeSet<_> = formats
-            .iter()
-            .filter(|f| {
-                f.typevar_operand.is_some() && !f.has_value_list && f.num_value_operands != 1
-            })
-            .map(|f| f.num_value_operands)
-            .collect();
-        for n in value_array_arities {
-            fmtln!(fmt, ";; ISLE representation of `[Value; {}]`.", n);
-            fmtln!(fmt, "(type ValueArray{} extern (enum))", n);
-            fmt.empty_line();
-
-            fmtln!(
-                fmt,
-                "(decl value_array_{} ({}) ValueArray{})",
-                n,
-                (0..n).map(|_| "Value").collect::<Vec<_>>().join(" "),
-                n
-            );
-            fmtln!(
-                fmt,
-                "(extern constructor value_array_{} pack_value_array_{})",
-                n,
-                n
-            );
-            fmtln!(
-                fmt,
-                "(extern extractor infallible value_array_{} unpack_value_array_{})",
-                n,
-                n
-            );
-            fmt.empty_line();
-        }
     }
 
     // Generate the extern type declaration for `Opcode`.
@@ -1322,32 +1199,24 @@ fn gen_common_isle(
     fmt.line(")");
     fmt.empty_line();
 
-    // Generate the extern type declaration for `InstructionData`
-    // (lowering) or `InstructionImms` (opt).
-    let inst_data_name = match isle_target {
-        IsleTarget::Lower => "InstructionData",
-        IsleTarget::Opt => "InstructionImms",
-    };
+    // Generate the extern type declaration for `InstructionData`.
     fmtln!(
         fmt,
-        ";;;; `{}` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;",
-        inst_data_name
+        ";;;; `InstructionData` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;",
     );
     fmt.empty_line();
-    fmtln!(fmt, "(type {} extern", inst_data_name);
+    fmtln!(fmt, "(type InstructionData extern");
     fmt.indent(|fmt| {
         fmt.line("(enum");
         fmt.indent(|fmt| {
             for format in formats {
                 let mut s = format!("({} (opcode Opcode)", format.name);
-                if isle_target == IsleTarget::Lower {
-                    if format.has_value_list {
-                        s.push_str(" (args ValueList)");
-                    } else if format.num_value_operands == 1 {
-                        s.push_str(" (arg Value)");
-                    } else if format.num_value_operands > 1 {
-                        write!(&mut s, " (args ValueArray{})", format.num_value_operands).unwrap();
-                    }
+                if format.has_value_list {
+                    s.push_str(" (args ValueList)");
+                } else if format.num_value_operands == 1 {
+                    s.push_str(" (arg Value)");
+                } else if format.num_value_operands > 1 {
+                    write!(&mut s, " (args ValueArray{})", format.num_value_operands).unwrap();
                 }
                 for field in &format.imm_fields {
                     write!(
@@ -1370,13 +1239,12 @@ fn gen_common_isle(
     // Generate the helper extractors for each opcode's full instruction.
     fmtln!(
         fmt,
-        ";;;; Extracting Opcode, Operands, and Immediates from `{}` ;;;;;;;;",
-        inst_data_name
+        ";;;; Extracting Opcode, Operands, and Immediates from `InstructionData` ;;;;;;;;",
     );
     fmt.empty_line();
     let ret_ty = match isle_target {
         IsleTarget::Lower => "Inst",
-        IsleTarget::Opt => "Id",
+        IsleTarget::Opt => "Value",
     };
     for inst in instructions {
         if isle_target == IsleTarget::Opt && inst.format.has_value_list {
@@ -1395,23 +1263,10 @@ fn gen_common_isle(
                 .iter()
                 .map(|o| {
                     let ty = o.kind.rust_type;
-                    match isle_target {
-                        IsleTarget::Lower => {
-                            if ty == "&[Value]" {
-                                "ValueSlice"
-                            } else {
-                                ty.rsplit("::").next().unwrap()
-                            }
-                        }
-                        IsleTarget::Opt => {
-                            if ty == "&[Value]" {
-                                panic!("value slice in mid-end extractor");
-                            } else if ty == "Value" || ty == "ir::Value" {
-                                "Id"
-                            } else {
-                                ty.rsplit("::").next().unwrap()
-                            }
-                        }
+                    if ty == "&[Value]" {
+                        "ValueSlice"
+                    } else {
+                        ty.rsplit("::").next().unwrap()
                     }
                 })
                 .collect::<Vec<_>>()
@@ -1435,102 +1290,55 @@ fn gen_common_isle(
                     .join(" ")
             );
 
-            if isle_target == IsleTarget::Lower {
-                let mut s = format!(
-                    "(inst_data (InstructionData.{} (Opcode.{})",
-                    inst.format.name, inst.camel_name
-                );
+            let mut s = format!(
+                "(inst_data{} (InstructionData.{} (Opcode.{})",
+                match isle_target {
+                    IsleTarget::Lower => "",
+                    IsleTarget::Opt => " ty",
+                },
+                inst.format.name,
+                inst.camel_name
+            );
 
-                // Value and varargs operands.
-                if inst.format.has_value_list {
-                    // The instruction format uses a value list, but the
-                    // instruction itself might have not only a `&[Value]`
-                    // varargs operand, but also one or more `Value` operands as
-                    // well. If this is the case, then we need to read them off
-                    // the front of the `ValueList`.
-                    let values: Vec<_> = inst
-                        .operands_in
-                        .iter()
-                        .filter(|o| o.is_value())
-                        .map(|o| o.name)
-                        .collect();
-                    let varargs = inst
-                        .operands_in
-                        .iter()
-                        .find(|o| o.is_varargs())
-                        .unwrap()
-                        .name;
-                    if values.is_empty() {
-                        write!(&mut s, " (value_list_slice {})", varargs).unwrap();
-                    } else {
-                        write!(
-                            &mut s,
-                            " (unwrap_head_value_list_{} {} {})",
-                            values.len(),
-                            values.join(" "),
-                            varargs
-                        )
-                        .unwrap();
-                    }
-                } else if inst.format.num_value_operands == 1 {
+            // Value and varargs operands.
+            if inst.format.has_value_list {
+                // The instruction format uses a value list, but the
+                // instruction itself might have not only a `&[Value]`
+                // varargs operand, but also one or more `Value` operands as
+                // well. If this is the case, then we need to read them off
+                // the front of the `ValueList`.
+                let values: Vec<_> = inst
+                    .operands_in
+                    .iter()
+                    .filter(|o| o.is_value())
+                    .map(|o| o.name)
+                    .collect();
+                let varargs = inst
+                    .operands_in
+                    .iter()
+                    .find(|o| o.is_varargs())
+                    .unwrap()
+                    .name;
+                if values.is_empty() {
+                    write!(&mut s, " (value_list_slice {})", varargs).unwrap();
+                } else {
                     write!(
                         &mut s,
-                        " {}",
-                        inst.operands_in.iter().find(|o| o.is_value()).unwrap().name
-                    )
-                    .unwrap();
-                } else if inst.format.num_value_operands > 1 {
-                    let values = inst
-                        .operands_in
-                        .iter()
-                        .filter(|o| o.is_value())
-                        .map(|o| o.name)
-                        .collect::<Vec<_>>();
-                    assert_eq!(values.len(), inst.format.num_value_operands);
-                    let values = values.join(" ");
-                    write!(
-                        &mut s,
-                        " (value_array_{} {})",
-                        inst.format.num_value_operands, values,
+                        " (unwrap_head_value_list_{} {} {})",
+                        values.len(),
+                        values.join(" "),
+                        varargs
                     )
                     .unwrap();
                 }
-
-                // Immediates.
-                let imm_operands: Vec<_> = inst
-                    .operands_in
-                    .iter()
-                    .filter(|o| !o.is_value() && !o.is_varargs())
-                    .collect();
-                assert_eq!(imm_operands.len(), inst.format.imm_fields.len());
-                for op in imm_operands {
-                    write!(&mut s, " {}", op.name).unwrap();
-                }
-
-                s.push_str("))");
-                fmt.line(&s);
-            } else {
-                // Mid-end case.
-                let mut s = format!(
-                    "(enodes ty (InstructionImms.{} (Opcode.{})",
-                    inst.format.name, inst.camel_name
-                );
-
-                // Immediates.
-                let imm_operands: Vec<_> = inst
-                    .operands_in
-                    .iter()
-                    .filter(|o| !o.is_value() && !o.is_varargs())
-                    .collect();
-                assert_eq!(imm_operands.len(), inst.format.imm_fields.len());
-                for op in imm_operands {
-                    write!(&mut s, " {}", op.name).unwrap();
-                }
-                // End of `InstructionImms`.
-                s.push_str(")");
-
-                // Second arg to `enode`: value args.
-                assert!(!inst.operands_in.iter().any(|op| op.is_varargs()));
+            } else if inst.format.num_value_operands == 1 {
+                write!(
+                    &mut s,
+                    " {}",
+                    inst.operands_in.iter().find(|o| o.is_value()).unwrap().name
+                )
+                .unwrap();
+            } else if inst.format.num_value_operands > 1 {
                 let values = inst
                     .operands_in
                     .iter()
@@ -1541,14 +1349,25 @@ fn gen_common_isle(
                 let values = values.join(" ");
                 write!(
                     &mut s,
-                    " (id_array_{} {})",
+                    " (value_array_{} {})",
                     inst.format.num_value_operands, values,
                 )
                 .unwrap();
-
-                s.push_str(")");
-                fmt.line(&s);
             }
+
+            // Immediates.
+            let imm_operands: Vec<_> = inst
+                .operands_in
+                .iter()
+                .filter(|o| !o.is_value() && !o.is_varargs())
+                .collect();
+            assert_eq!(imm_operands.len(), inst.format.imm_fields.len());
+            for op in imm_operands {
+                write!(&mut s, " {}", op.name).unwrap();
+            }
+
+            s.push_str("))");
+            fmt.line(&s);
         });
         fmt.line(")");
 
@@ -1566,10 +1385,53 @@ fn gen_common_isle(
             );
             fmt.indent(|fmt| {
                 let mut s = format!(
-                    "(pure_enode ty (InstructionImms.{} (Opcode.{})",
+                    "(make_inst ty (InstructionData.{} (Opcode.{})",
                     inst.format.name, inst.camel_name
                 );
 
+                // Handle values. Note that we skip generating
+                // constructors for any instructions with variadic
+                // value lists. This is fine for the mid-end because
+                // in practice only calls and branches (for branch
+                // args) use this functionality, and neither can
+                // really be optimized or rewritten in the mid-end
+                // (currently).
+                //
+                // As a consequence, we only have to handle the
+                // one-`Value` case, in which the `Value` is directly
+                // in the `InstructionData`, and the multiple-`Value`
+                // case, in which the `Value`s are in a
+                // statically-sized array (e.g. `[Value; 2]` for a
+                // binary op).
+                assert!(!inst.format.has_value_list);
+                if inst.format.num_value_operands == 1 {
+                    write!(
+                        &mut s,
+                        " {}",
+                        inst.operands_in.iter().find(|o| o.is_value()).unwrap().name
+                    )
+                    .unwrap();
+                } else if inst.format.num_value_operands > 1 {
+                    // As above, get all bindings together, and pass
+                    // to a sub-term; here we use a constructor to
+                    // build the value array.
+                    let values = inst
+                        .operands_in
+                        .iter()
+                        .filter(|o| o.is_value())
+                        .map(|o| o.name)
+                        .collect::<Vec<_>>();
+                    assert_eq!(values.len(), inst.format.num_value_operands);
+                    let values = values.join(" ");
+                    write!(
+                        &mut s,
+                        " (value_array_{}_ctor {})",
+                        inst.format.num_value_operands, values
+                    )
+                    .unwrap();
+                }
+
+                // Immediates (non-value args).
                 for o in inst
                     .operands_in
                     .iter()
@@ -1577,22 +1439,7 @@ fn gen_common_isle(
                 {
                     write!(&mut s, " {}", o.name).unwrap();
                 }
-                s.push_str(")");
-
-                let values = inst
-                    .operands_in
-                    .iter()
-                    .filter(|o| o.is_value())
-                    .map(|o| o.name)
-                    .collect::<Vec<_>>();
-                let values = values.join(" ");
-                write!(
-                    &mut s,
-                    " (id_array_{} {})",
-                    inst.format.num_value_operands, values
-                )
-                .unwrap();
-                s.push_str(")");
+                s.push_str("))");
                 fmt.line(&s);
             });
             fmt.line(")");
@@ -1693,9 +1540,6 @@ pub(crate) fn generate(
     gen_instruction_data(&formats, &mut fmt);
     fmt.empty_line();
     gen_instruction_data_impl(&formats, &mut fmt);
-    gen_instruction_data_to_instruction_imms(&formats, &mut fmt);
-    gen_instruction_imms_impl(&formats, &mut fmt);
-    gen_instruction_imms_to_instruction_data(&formats, &mut fmt);
     fmt.empty_line();
     gen_opcodes(all_inst, &mut fmt);
     fmt.empty_line();
diff --git a/cranelift/codegen/src/context.rs b/cranelift/codegen/src/context.rs
index 8a5fed38eb..8d705a8809 100644
--- a/cranelift/codegen/src/context.rs
+++ b/cranelift/codegen/src/context.rs
@@ -12,7 +12,7 @@
 use crate::alias_analysis::AliasAnalysis;
 use crate::dce::do_dce;
 use crate::dominator_tree::DominatorTree;
-use crate::egraph::FuncEGraph;
+use crate::egraph::EgraphPass;
 use crate::flowgraph::ControlFlowGraph;
 use crate::ir::Function;
 use crate::isa::TargetIsa;
@@ -26,6 +26,7 @@ use crate::result::{CodegenResult, CompileResult};
 use crate::settings::{FlagsOrIsa, OptLevel};
 use crate::simple_gvn::do_simple_gvn;
 use crate::simple_preopt::do_preopt;
+use crate::trace;
 use crate::unreachable_code::eliminate_unreachable_code;
 use crate::verifier::{verify_context, VerifierErrors, VerifierResult};
 use crate::{timing, CompileError};
@@ -191,15 +192,7 @@ impl Context {
         self.remove_constant_phis(isa)?;
 
         if isa.flags().use_egraphs() {
-            log::debug!(
-                "About to optimize with egraph phase:\n{}",
-                self.func.display()
-            );
-            self.compute_loop_analysis();
-            let mut eg = FuncEGraph::new(&self.func, &self.domtree, &self.loop_analysis, &self.cfg);
-            eg.elaborate(&mut self.func);
-            log::debug!("After egraph optimization:\n{}", self.func.display());
-            log::info!("egraph stats: {:?}", eg.stats);
+            self.egraph_pass()?;
         } else if opt_level != OptLevel::None && isa.flags().enable_alias_analysis() {
             self.replace_redundant_loads()?;
             self.simple_gvn(isa)?;
@@ -379,4 +372,24 @@ impl Context {
         do_souper_harvest(&self.func, out);
         Ok(())
     }
+
+    /// Run optimizations via the egraph infrastructure.
+    pub fn egraph_pass(&mut self) -> CodegenResult<()> {
+        trace!(
+            "About to optimize with egraph phase:\n{}",
+            self.func.display()
+        );
+        self.compute_loop_analysis();
+        let mut alias_analysis = AliasAnalysis::new(&self.func, &self.domtree);
+        let mut pass = EgraphPass::new(
+            &mut self.func,
+            &self.domtree,
+            &self.loop_analysis,
+            &mut alias_analysis,
+        );
+        pass.run();
+        log::info!("egraph stats: {:?}", pass.stats);
+        trace!("After egraph optimization:\n{}", self.func.display());
+        Ok(())
+    }
 }
diff --git a/cranelift/codegen/src/ctxhash.rs b/cranelift/codegen/src/ctxhash.rs
new file mode 100644
index 0000000000..e172d46c12
--- /dev/null
+++ b/cranelift/codegen/src/ctxhash.rs
@@ -0,0 +1,168 @@
+//! A hashmap with "external hashing": nodes are hashed or compared for
+//! equality only with some external context provided on lookup/insert.
+//! This allows very memory-efficient data structures where
+//! node-internal data references some other storage (e.g., offsets into
+//! an array or pool of shared data).
+
+use hashbrown::raw::RawTable;
+use std::hash::{Hash, Hasher};
+
+/// Trait that allows for equality comparison given some external
+/// context.
+///
+/// Note that this trait is implemented by the *context*, rather than
+/// the item type, for somewhat complex lifetime reasons (lack of GATs
+/// to allow `for<'ctx> Ctx<'ctx>`-like associated types in traits on
+/// the value type).
+pub trait CtxEq<V1: ?Sized, V2: ?Sized> {
+    /// Determine whether `a` and `b` are equal, given the context in
+    /// `self` and the union-find data structure `uf`.
+    fn ctx_eq(&self, a: &V1, b: &V2) -> bool;
+}
+
+/// Trait that allows for hashing given some external context.
+pub trait CtxHash<Value: ?Sized>: CtxEq<Value, Value> {
+    /// Compute the hash of `value`, given the context in `self` and
+    /// the union-find data structure `uf`.
+    fn ctx_hash<H: Hasher>(&self, state: &mut H, value: &Value);
+}
+
+/// A null-comparator context type for underlying value types that
+/// already have `Eq` and `Hash`.
+#[derive(Default)]
+pub struct NullCtx;
+
+impl<V: Eq + Hash> CtxEq<V, V> for NullCtx {
+    fn ctx_eq(&self, a: &V, b: &V) -> bool {
+        a.eq(b)
+    }
+}
+impl<V: Eq + Hash> CtxHash<V> for NullCtx {
+    fn ctx_hash<H: Hasher>(&self, state: &mut H, value: &V) {
+        value.hash(state);
+    }
+}
+
+/// A bucket in the hash table.
+///
+/// Some performance-related design notes: we cache the hashcode for
+/// speed, as this often buys a few percent speed in
+/// interning-table-heavy workloads. We only keep the low 32 bits of
+/// the hashcode, for memory efficiency: in common use, `K` and `V`
+/// are often 32 bits also, and a 12-byte bucket is measurably better
+/// than a 16-byte bucket.
+struct BucketData<K, V> {
+    hash: u32,
+    k: K,
+    v: V,
+}
+
+/// A HashMap that takes external context for all operations.
+pub struct CtxHashMap<K, V> {
+    raw: RawTable<BucketData<K, V>>,
+}
+
+impl<K, V> CtxHashMap<K, V> {
+    /// Create an empty hashmap with pre-allocated space for the given
+    /// capacity.
+    pub fn with_capacity(capacity: usize) -> Self {
+        Self {
+            raw: RawTable::with_capacity(capacity),
+        }
+    }
+}
+
+fn compute_hash<Ctx, K>(ctx: &Ctx, k: &K) -> u32
+where
+    Ctx: CtxHash<K>,
+{
+    let mut hasher = crate::fx::FxHasher::default();
+    ctx.ctx_hash(&mut hasher, k);
+    hasher.finish() as u32
+}
+
+impl<K, V> CtxHashMap<K, V> {
+    /// Insert a new key-value pair, returning the old value associated
+    /// with this key (if any).
+    pub fn insert<Ctx>(&mut self, k: K, v: V, ctx: &Ctx) -> Option<V>
+    where
+        Ctx: CtxEq<K, K> + CtxHash<K>,
+    {
+        let hash = compute_hash(ctx, &k);
+        match self.raw.find(hash as u64, |bucket| {
+            hash == bucket.hash && ctx.ctx_eq(&bucket.k, &k)
+        }) {
+            Some(bucket) => {
+                let data = unsafe { bucket.as_mut() };
+                Some(std::mem::replace(&mut data.v, v))
+            }
+            None => {
+                let data = BucketData { hash, k, v };
+                self.raw
+                    .insert_entry(hash as u64, data, |bucket| bucket.hash as u64);
+                None
+            }
+        }
+    }
+
+    /// Look up a key, returning a borrow of the value if present.
+    pub fn get<'a, Q, Ctx>(&'a self, k: &Q, ctx: &Ctx) -> Option<&'a V>
+    where
+        Ctx: CtxEq<K, Q> + CtxHash<Q> + CtxHash<K>,
+    {
+        let hash = compute_hash(ctx, k);
+        self.raw
+            .find(hash as u64, |bucket| {
+                hash == bucket.hash && ctx.ctx_eq(&bucket.k, k)
+            })
+            .map(|bucket| {
+                let data = unsafe { bucket.as_ref() };
+                &data.v
+            })
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use std::hash::Hash;
+
+    #[derive(Clone, Copy, Debug)]
+    struct Key {
+        index: u32,
+    }
+    struct Ctx {
+        vals: &'static [&'static str],
+    }
+    impl CtxEq<Key, Key> for Ctx {
+        fn ctx_eq(&self, a: &Key, b: &Key) -> bool {
+            self.vals[a.index as usize].eq(self.vals[b.index as usize])
+        }
+    }
+    impl CtxHash<Key> for Ctx {
+        fn ctx_hash<H: Hasher>(&self, state: &mut H, value: &Key) {
+            self.vals[value.index as usize].hash(state);
+        }
+    }
+
+    #[test]
+    fn test_basic() {
+        let ctx = Ctx {
+            vals: &["a", "b", "a"],
+        };
+
+        let k0 = Key { index: 0 };
+        let k1 = Key { index: 1 };
+        let k2 = Key { index: 2 };
+
+        assert!(ctx.ctx_eq(&k0, &k2));
+        assert!(!ctx.ctx_eq(&k0, &k1));
+        assert!(!ctx.ctx_eq(&k2, &k1));
+
+        let mut map: CtxHashMap<Key, u64> = CtxHashMap::with_capacity(4);
+        assert_eq!(map.insert(k0, 42, &ctx), None);
+        assert_eq!(map.insert(k2, 84, &ctx), Some(42));
+        assert_eq!(map.get(&k1, &ctx), None);
+        assert_eq!(*map.get(&k0, &ctx).unwrap(), 84);
+    }
+}
diff --git a/cranelift/codegen/src/egraph.rs b/cranelift/codegen/src/egraph.rs
index d8d625671b..69870d556e 100644
--- a/cranelift/codegen/src/egraph.rs
+++ b/cranelift/codegen/src/egraph.rs
@@ -1,342 +1,462 @@
-//! Egraph-based mid-end optimization framework.
+//! Support for egraphs represented in the DataFlowGraph.
 
+use crate::alias_analysis::{AliasAnalysis, LastStores};
+use crate::ctxhash::{CtxEq, CtxHash, CtxHashMap};
+use crate::cursor::{Cursor, CursorPosition, FuncCursor};
 use crate::dominator_tree::DominatorTree;
-use crate::egraph::stores::PackedMemoryState;
-use crate::flowgraph::ControlFlowGraph;
-use crate::loop_analysis::{LoopAnalysis, LoopLevel};
-use crate::trace;
-use crate::{
-    fx::{FxHashMap, FxHashSet},
-    inst_predicates::has_side_effect,
-    ir::{Block, Function, Inst, InstructionData, InstructionImms, Opcode, Type},
+use crate::egraph::domtree::DomTreeWithChildren;
+use crate::egraph::elaborate::Elaborator;
+use crate::fx::FxHashSet;
+use crate::inst_predicates::is_pure_for_egraph;
+use crate::ir::{
+    DataFlowGraph, Function, Inst, InstructionData, Type, Value, ValueDef, ValueListPool,
 };
-use alloc::vec::Vec;
-use core::ops::Range;
-use cranelift_egraph::{EGraph, Id, Language, NewOrExisting};
-use cranelift_entity::EntityList;
+use crate::loop_analysis::LoopAnalysis;
+use crate::opts::generated_code::ContextIter;
+use crate::opts::IsleContext;
+use crate::trace;
+use crate::unionfind::UnionFind;
+use cranelift_entity::packed_option::ReservedValue;
 use cranelift_entity::SecondaryMap;
+use std::hash::Hasher;
 
+mod cost;
 mod domtree;
 mod elaborate;
-mod node;
-mod stores;
 
-use elaborate::Elaborator;
-pub use node::{Node, NodeCtx};
-pub use stores::{AliasAnalysis, MemoryState};
-
-pub struct FuncEGraph<'a> {
+/// Pass over a Function that does the whole aegraph thing.
+///
+/// - Removes non-skeleton nodes from the Layout.
+/// - Performs a GVN-and-rule-application pass over all Values
+///   reachable from the skeleton, potentially creating new Union
+///   nodes (i.e., an aegraph) so that some values have multiple
+///   representations.
+/// - Does "extraction" on the aegraph: selects the best value out of
+///   the tree-of-Union nodes for each used value.
+/// - Does "scoped elaboration" on the aegraph: chooses one or more
+///   locations for pure nodes to become instructions again in the
+///   layout, as forced by the skeleton.
+///
+/// At the beginning and end of this pass, the CLIF should be in a
+/// state that passes the verifier and, additionally, has no Union
+/// nodes. During the pass, Union nodes may exist, and instructions in
+/// the layout may refer to results of instructions that are not
+/// placed in the layout.
+pub struct EgraphPass<'a> {
+    /// The function we're operating on.
+    func: &'a mut Function,
     /// Dominator tree, used for elaboration pass.
     domtree: &'a DominatorTree,
-    /// Loop analysis results, used for built-in LICM during elaboration.
+    /// Alias analysis, used during optimization.
+    alias_analysis: &'a mut AliasAnalysis<'a>,
+    /// "Domtree with children": like `domtree`, but with an explicit
+    /// list of children, rather than just parent pointers.
+    domtree_children: DomTreeWithChildren,
+    /// Loop analysis results, used for built-in LICM during
+    /// elaboration.
     loop_analysis: &'a LoopAnalysis,
-    /// Last-store tracker for integrated alias analysis during egraph build.
-    alias_analysis: AliasAnalysis,
-    /// The egraph itself.
-    pub(crate) egraph: EGraph<NodeCtx, Analysis>,
-    /// "node context", containing arenas for node data.
-    pub(crate) node_ctx: NodeCtx,
-    /// Ranges in `side_effect_ids` for sequences of side-effecting
-    /// eclasses per block.
-    side_effects: SecondaryMap<Block, Range<u32>>,
-    side_effect_ids: Vec<Id>,
-    /// Map from store instructions to their nodes; used for store-to-load forwarding.
-    pub(crate) store_nodes: FxHashMap<Inst, (Type, Id)>,
-    /// Ranges in `blockparam_ids_tys` for sequences of blockparam
-    /// eclass IDs and types per block.
-    blockparams: SecondaryMap<Block, Range<u32>>,
-    blockparam_ids_tys: Vec<(Id, Type)>,
-    /// Which canonical node IDs do we want to rematerialize in each
+    /// Which canonical Values do we want to rematerialize in each
     /// block where they're used?
-    pub(crate) remat_ids: FxHashSet<Id>,
-    /// Which canonical node IDs have an enode whose value subsumes
-    /// all others it's unioned with?
-    pub(crate) subsume_ids: FxHashSet<Id>,
-    /// Statistics recorded during the process of building,
-    /// optimizing, and lowering out of this egraph.
+    ///
+    /// (A canonical Value is the *oldest* Value in an eclass,
+    /// i.e. tree of union value-nodes).
+    remat_values: FxHashSet<Value>,
+    /// Stats collected while we run this pass.
     pub(crate) stats: Stats,
-    /// Current rewrite-recursion depth. Used to enforce a finite
-    /// limit on rewrite rule application so that we don't get stuck
-    /// in an infinite chain.
+    /// Union-find that maps all members of a Union tree (eclass) back
+    /// to the *oldest* (lowest-numbered) `Value`.
+    eclasses: UnionFind<Value>,
+}
+
+/// Context passed through node insertion and optimization.
+pub(crate) struct OptimizeCtx<'opt, 'analysis>
+where
+    'analysis: 'opt,
+{
+    // Borrowed from EgraphPass:
+    pub(crate) func: &'opt mut Function,
+    pub(crate) value_to_opt_value: &'opt mut SecondaryMap<Value, Value>,
+    pub(crate) gvn_map: &'opt mut CtxHashMap<(Type, InstructionData), Value>,
+    pub(crate) eclasses: &'opt mut UnionFind<Value>,
+    pub(crate) remat_values: &'opt mut FxHashSet<Value>,
+    pub(crate) stats: &'opt mut Stats,
+    pub(crate) alias_analysis: &'opt mut AliasAnalysis<'analysis>,
+    pub(crate) alias_analysis_state: &'opt mut LastStores,
+    // Held locally during optimization of one node (recursively):
     pub(crate) rewrite_depth: usize,
+    pub(crate) subsume_values: FxHashSet<Value>,
 }
 
-#[derive(Clone, Debug, Default)]
-pub(crate) struct Stats {
-    pub(crate) node_created: u64,
-    pub(crate) node_param: u64,
-    pub(crate) node_result: u64,
-    pub(crate) node_pure: u64,
-    pub(crate) node_inst: u64,
-    pub(crate) node_load: u64,
-    pub(crate) node_dedup_query: u64,
-    pub(crate) node_dedup_hit: u64,
-    pub(crate) node_dedup_miss: u64,
-    pub(crate) node_ctor_created: u64,
-    pub(crate) node_ctor_deduped: u64,
-    pub(crate) node_union: u64,
-    pub(crate) node_subsume: u64,
-    pub(crate) store_map_insert: u64,
-    pub(crate) side_effect_nodes: u64,
-    pub(crate) rewrite_rule_invoked: u64,
-    pub(crate) rewrite_depth_limit: u64,
-    pub(crate) store_to_load_forward: u64,
-    pub(crate) elaborate_visit_node: u64,
-    pub(crate) elaborate_memoize_hit: u64,
-    pub(crate) elaborate_memoize_miss: u64,
-    pub(crate) elaborate_memoize_miss_remat: u64,
-    pub(crate) elaborate_licm_hoist: u64,
-    pub(crate) elaborate_func: u64,
-    pub(crate) elaborate_func_pre_insts: u64,
-    pub(crate) elaborate_func_post_insts: u64,
+/// For passing to `insert_pure_enode`. Sometimes the enode already
+/// exists as an Inst (from the original CLIF), and sometimes we're in
+/// the middle of creating it and want to avoid inserting it if
+/// possible until we know we need it.
+pub(crate) enum NewOrExistingInst {
+    New(InstructionData, Type),
+    Existing(Inst),
 }
 
-impl<'a> FuncEGraph<'a> {
-    /// Create a new EGraph for the given function. Requires the
-    /// domtree to be precomputed as well; the domtree is used for
-    /// scheduling when lowering out of the egraph.
-    pub fn new(
-        func: &Function,
-        domtree: &'a DominatorTree,
-        loop_analysis: &'a LoopAnalysis,
-        cfg: &ControlFlowGraph,
-    ) -> FuncEGraph<'a> {
-        let num_values = func.dfg.num_values();
-        let num_blocks = func.dfg.num_blocks();
-        let node_count_estimate = num_values * 2;
-        let alias_analysis = AliasAnalysis::new(func, cfg);
-        let mut this = Self {
-            domtree,
-            loop_analysis,
-            alias_analysis,
-            egraph: EGraph::with_capacity(node_count_estimate, Some(Analysis)),
-            node_ctx: NodeCtx::with_capacity_for_dfg(&func.dfg),
-            side_effects: SecondaryMap::with_capacity(num_blocks),
-            side_effect_ids: Vec::with_capacity(node_count_estimate),
-            store_nodes: FxHashMap::default(),
-            blockparams: SecondaryMap::with_capacity(num_blocks),
-            blockparam_ids_tys: Vec::with_capacity(num_blocks * 10),
-            remat_ids: FxHashSet::default(),
-            subsume_ids: FxHashSet::default(),
-            stats: Default::default(),
-            rewrite_depth: 0,
+impl NewOrExistingInst {
+    fn get_inst_key<'a>(&'a self, dfg: &'a DataFlowGraph) -> (Type, InstructionData) {
+        match self {
+            NewOrExistingInst::New(data, ty) => (*ty, *data),
+            NewOrExistingInst::Existing(inst) => {
+                let ty = dfg.ctrl_typevar(*inst);
+                (ty, dfg[*inst].clone())
+            }
+        }
+    }
+}
+
+impl<'opt, 'analysis> OptimizeCtx<'opt, 'analysis>
+where
+    'analysis: 'opt,
+{
+    /// Optimization of a single instruction.
+    ///
+    /// This does a few things:
+    /// - Looks up the instruction in the GVN deduplication map. If we
+    ///   already have the same instruction somewhere else, with the
+    ///   same args, then we can alias the original instruction's
+    ///   results and omit this instruction entirely.
+    ///   - Note that we do this canonicalization based on the
+    ///     instruction with its arguments as *canonical* eclass IDs,
+    ///     that is, the oldest (smallest index) `Value` reachable in
+    ///     the tree-of-unions (whole eclass). This ensures that we
+    ///     properly canonicalize newer nodes that use newer "versions"
+    ///     of a value that are still equal to the older versions.
+    /// - If the instruction is "new" (not deduplicated), then apply
+    ///   optimization rules:
+    ///   - All of the mid-end rules written in ISLE.
+    ///   - Store-to-load forwarding.
+    /// - Update the value-to-opt-value map, and update the eclass
+    ///   union-find, if we rewrote the value to different form(s).
+    pub(crate) fn insert_pure_enode(&mut self, inst: NewOrExistingInst) -> Value {
+        // Create the external context for looking up and updating the
+        // GVN map. This is necessary so that instructions themselves
+        // do not have to carry all the references or data for a full
+        // `Eq` or `Hash` impl.
+        let gvn_context = GVNContext {
+            union_find: self.eclasses,
+            value_lists: &self.func.dfg.value_lists,
         };
-        this.store_nodes.reserve(func.dfg.num_values() / 8);
-        this.remat_ids.reserve(func.dfg.num_values() / 4);
-        this.subsume_ids.reserve(func.dfg.num_values() / 4);
-        this.build(func);
-        this
+
+        self.stats.pure_inst += 1;
+        if let NewOrExistingInst::New(..) = inst {
+            self.stats.new_inst += 1;
+        }
+
+        // Does this instruction already exist? If so, add entries to
+        // the value-map to rewrite uses of its results to the results
+        // of the original (existing) instruction. If not, optimize
+        // the new instruction.
+        if let Some(&orig_result) = self
+            .gvn_map
+            .get(&inst.get_inst_key(&self.func.dfg), &gvn_context)
+        {
+            self.stats.pure_inst_deduped += 1;
+            if let NewOrExistingInst::Existing(inst) = inst {
+                debug_assert_eq!(self.func.dfg.inst_results(inst).len(), 1);
+                let result = self.func.dfg.first_result(inst);
+                self.value_to_opt_value[result] = orig_result;
+                self.eclasses.union(result, orig_result);
+                self.stats.union += 1;
+                result
+            } else {
+                orig_result
+            }
+        } else {
+            // Now actually insert the InstructionData and attach
+            // result value (exactly one).
+            let (inst, result, ty) = match inst {
+                NewOrExistingInst::New(data, typevar) => {
+                    let inst = self.func.dfg.make_inst(data);
+                    // TODO: reuse return value?
+                    self.func.dfg.make_inst_results(inst, typevar);
+                    let result = self.func.dfg.first_result(inst);
+                    // Add to eclass unionfind.
+                    self.eclasses.add(result);
+                    // New inst. We need to do the analysis of its result.
+                    (inst, result, typevar)
+                }
+                NewOrExistingInst::Existing(inst) => {
+                    let result = self.func.dfg.first_result(inst);
+                    let ty = self.func.dfg.ctrl_typevar(inst);
+                    (inst, result, ty)
+                }
+            };
+
+            let opt_value = self.optimize_pure_enode(inst);
+            let gvn_context = GVNContext {
+                union_find: self.eclasses,
+                value_lists: &self.func.dfg.value_lists,
+            };
+            self.gvn_map
+                .insert((ty, self.func.dfg[inst].clone()), opt_value, &gvn_context);
+            self.value_to_opt_value[result] = opt_value;
+            opt_value
+        }
     }
 
-    fn build(&mut self, func: &Function) {
-        // Mapping of SSA `Value` to eclass ID.
-        let mut value_to_id = FxHashMap::default();
+    /// Optimizes an enode by applying any matching mid-end rewrite
+    /// rules (or store-to-load forwarding, which is a special case),
+    /// unioning together all possible optimized (or rewritten) forms
+    /// of this expression into an eclass and returning the `Value`
+    /// that represents that eclass.
+    fn optimize_pure_enode(&mut self, inst: Inst) -> Value {
+        // A pure node always has exactly one result.
+        let orig_value = self.func.dfg.first_result(inst);
 
-        // For each block in RPO, create an enode for block entry, for
-        // each block param, and for each instruction.
-        for &block in self.domtree.cfg_postorder().iter().rev() {
-            let loop_level = self.loop_analysis.loop_level(block);
-            let blockparam_start =
-                u32::try_from(self.blockparam_ids_tys.len()).expect("Overflow in blockparam count");
-            for (i, &value) in func.dfg.block_params(block).iter().enumerate() {
-                let ty = func.dfg.value_type(value);
-                let param = self
-                    .egraph
-                    .add(
-                        Node::Param {
-                            block,
-                            index: i
-                                .try_into()
-                                .expect("blockparam index should fit in Node::Param"),
-                            ty,
-                            loop_level,
-                        },
-                        &mut self.node_ctx,
-                    )
-                    .get();
-                value_to_id.insert(value, param);
-                self.blockparam_ids_tys.push((param, ty));
-                self.stats.node_created += 1;
-                self.stats.node_param += 1;
-            }
-            let blockparam_end =
-                u32::try_from(self.blockparam_ids_tys.len()).expect("Overflow in blockparam count");
-            self.blockparams[block] = blockparam_start..blockparam_end;
+        let mut isle_ctx = IsleContext { ctx: self };
 
-            let side_effect_start =
-                u32::try_from(self.side_effect_ids.len()).expect("Overflow in side-effect count");
-            for inst in func.layout.block_insts(block) {
-                // Build args from SSA values.
-                let args = EntityList::from_iter(
-                    func.dfg.inst_args(inst).iter().map(|&arg| {
-                        let arg = func.dfg.resolve_aliases(arg);
-                        *value_to_id
-                            .get(&arg)
-                            .expect("Must have seen def before this use")
-                    }),
-                    &mut self.node_ctx.args,
+        // Limit rewrite depth. When we apply optimization rules, they
+        // may create new nodes (values) and those are, recursively,
+        // optimized eagerly as soon as they are created. So we may
+        // have more than one ISLE invocation on the stack. (This is
+        // necessary so that as the toplevel builds the
+        // right-hand-side expression bottom-up, it uses the "latest"
+        // optimized values for all the constituent parts.) To avoid
+        // infinite or problematic recursion, we bound the rewrite
+        // depth to a small constant here.
+        const REWRITE_LIMIT: usize = 5;
+        if isle_ctx.ctx.rewrite_depth > REWRITE_LIMIT {
+            isle_ctx.ctx.stats.rewrite_depth_limit += 1;
+            return orig_value;
+        }
+        isle_ctx.ctx.rewrite_depth += 1;
+
+        // Invoke the ISLE toplevel constructor, getting all new
+        // values produced as equivalents to this value.
+        trace!("Calling into ISLE with original value {}", orig_value);
+        isle_ctx.ctx.stats.rewrite_rule_invoked += 1;
+        let optimized_values =
+            crate::opts::generated_code::constructor_simplify(&mut isle_ctx, orig_value);
+
+        // Create a union of all new values with the original (or
+        // maybe just one new value marked as "subsuming" the
+        // original, if present.)
+        let mut union_value = orig_value;
+        if let Some(mut optimized_values) = optimized_values {
+            while let Some(optimized_value) = optimized_values.next(&mut isle_ctx) {
+                trace!(
+                    "Returned from ISLE for {}, got {:?}",
+                    orig_value,
+                    optimized_value
                 );
+                if optimized_value == orig_value {
+                    trace!(" -> same as orig value; skipping");
+                    continue;
+                }
+                if isle_ctx.ctx.subsume_values.contains(&optimized_value) {
+                    // Merge in the unionfind so canonicalization
+                    // still works, but take *only* the subsuming
+                    // value, and break now.
+                    isle_ctx.ctx.eclasses.union(optimized_value, union_value);
+                    union_value = optimized_value;
+                    break;
+                }
 
-                let results = func.dfg.inst_results(inst);
-                let ty = if results.len() == 1 {
-                    func.dfg.value_type(results[0])
+                let old_union_value = union_value;
+                union_value = isle_ctx
+                    .ctx
+                    .func
+                    .dfg
+                    .union(old_union_value, optimized_value);
+                isle_ctx.ctx.stats.union += 1;
+                trace!(" -> union: now {}", union_value);
+                isle_ctx.ctx.eclasses.add(union_value);
+                isle_ctx
+                    .ctx
+                    .eclasses
+                    .union(old_union_value, optimized_value);
+                isle_ctx.ctx.eclasses.union(old_union_value, union_value);
+            }
+        }
+
+        isle_ctx.ctx.rewrite_depth -= 1;
+
+        union_value
+    }
+
+    /// Optimize a "skeleton" instruction, possibly removing
+    /// it. Returns `true` if the instruction should be removed from
+    /// the layout.
+    fn optimize_skeleton_inst(&mut self, inst: Inst) -> bool {
+        self.stats.skeleton_inst += 1;
+        // Not pure, but may still be a load or store:
+        // process it to see if we can optimize it.
+        if let Some(new_result) =
+            self.alias_analysis
+                .process_inst(self.func, self.alias_analysis_state, inst)
+        {
+            self.stats.alias_analysis_removed += 1;
+            let result = self.func.dfg.first_result(inst);
+            self.value_to_opt_value[result] = new_result;
+            true
+        } else {
+            // Set all results to identity-map to themselves
+            // in the value-to-opt-value map.
+            for &result in self.func.dfg.inst_results(inst) {
+                self.value_to_opt_value[result] = result;
+                self.eclasses.add(result);
+            }
+            false
+        }
+    }
+}
+
+impl<'a> EgraphPass<'a> {
+    /// Create a new EgraphPass.
+    pub fn new(
+        func: &'a mut Function,
+        domtree: &'a DominatorTree,
+        loop_analysis: &'a LoopAnalysis,
+        alias_analysis: &'a mut AliasAnalysis<'a>,
+    ) -> Self {
+        let num_values = func.dfg.num_values();
+        let domtree_children = DomTreeWithChildren::new(func, domtree);
+        Self {
+            func,
+            domtree,
+            domtree_children,
+            loop_analysis,
+            alias_analysis,
+            stats: Stats::default(),
+            eclasses: UnionFind::with_capacity(num_values),
+            remat_values: FxHashSet::default(),
+        }
+    }
+
+    /// Run the process.
+    pub fn run(&mut self) {
+        self.remove_pure_and_optimize();
+
+        trace!("egraph built:\n{}\n", self.func.display());
+        if cfg!(feature = "trace-log") {
+            for (value, def) in self.func.dfg.values_and_defs() {
+                trace!(" -> {} = {:?}", value, def);
+                match def {
+                    ValueDef::Result(i, 0) => {
+                        trace!("  -> {} = {:?}", i, self.func.dfg[i]);
+                    }
+                    _ => {}
+                }
+            }
+        }
+        trace!("stats: {:?}", self.stats);
+        self.elaborate();
+    }
+
+    /// Remove pure nodes from the `Layout` of the function, ensuring
+    /// that only the "side-effect skeleton" remains, and also
+    /// optimize the pure nodes. This is the first step of
+    /// egraph-based processing and turns the pure CFG-based CLIF into
+    /// a CFG skeleton with a sea of (optimized) nodes tying it
+    /// together.
+    ///
+    /// As we walk through the code, we eagerly apply optimization
+    /// rules; at any given point we have a "latest version" of an
+    /// eclass of possible representations for a `Value` in the
+    /// original program, which is itself a `Value` at the root of a
+    /// union-tree. We keep a map from the original values to these
+    /// optimized values. When we encounter any instruction (pure or
+    /// side-effecting skeleton) we rewrite its arguments to capture
+    /// the "latest" optimized forms of these values. (We need to do
+    /// this as part of this pass, and not later using a finished map,
+    /// because the eclass can continue to be updated and we need to
+    /// only refer to its subset that exists at this stage, to
+    /// maintain acyclicity.)
+    fn remove_pure_and_optimize(&mut self) {
+        let mut cursor = FuncCursor::new(self.func);
+        let mut value_to_opt_value: SecondaryMap<Value, Value> =
+            SecondaryMap::with_default(Value::reserved_value());
+        let mut gvn_map: CtxHashMap<(Type, InstructionData), Value> =
+            CtxHashMap::with_capacity(cursor.func.dfg.num_values());
+
+        // In domtree preorder, visit blocks. (TODO: factor out an
+        // iterator from this and elaborator.)
+        let root = self.domtree_children.root();
+        let mut block_stack = vec![root];
+        while let Some(block) = block_stack.pop() {
+            // We popped this block; push children
+            // immediately, then process this block.
+            block_stack.extend(self.domtree_children.children(block));
+
+            trace!("Processing block {}", block);
+            cursor.set_position(CursorPosition::Before(block));
+
+            let mut alias_analysis_state = self.alias_analysis.block_starting_state(block);
+
+            for &param in cursor.func.dfg.block_params(block) {
+                trace!("creating initial singleton eclass for blockparam {}", param);
+                self.eclasses.add(param);
+                value_to_opt_value[param] = param;
+            }
+            while let Some(inst) = cursor.next_inst() {
+                trace!("Processing inst {}", inst);
+
+                // While we're passing over all insts, create initial
+                // singleton eclasses for all result and blockparam
+                // values.  Also do initial analysis of all inst
+                // results.
+                for &result in cursor.func.dfg.inst_results(inst) {
+                    trace!("creating initial singleton eclass for {}", result);
+                    self.eclasses.add(result);
+                }
+
+                // Rewrite args of *all* instructions using the
+                // value-to-opt-value map.
+                cursor.func.dfg.resolve_aliases_in_arguments(inst);
+                for arg in cursor.func.dfg.inst_args_mut(inst) {
+                    let new_value = value_to_opt_value[*arg];
+                    trace!("rewriting arg {} of inst {} to {}", arg, inst, new_value);
+                    debug_assert_ne!(new_value, Value::reserved_value());
+                    *arg = new_value;
+                }
+
+                // Build a context for optimization, with borrows of
+                // state. We can't invoke a method on `self` because
+                // we've borrowed `self.func` mutably (as
+                // `cursor.func`) so we pull apart the pieces instead
+                // here.
+                let mut ctx = OptimizeCtx {
+                    func: cursor.func,
+                    value_to_opt_value: &mut value_to_opt_value,
+                    gvn_map: &mut gvn_map,
+                    eclasses: &mut self.eclasses,
+                    rewrite_depth: 0,
+                    subsume_values: FxHashSet::default(),
+                    remat_values: &mut self.remat_values,
+                    stats: &mut self.stats,
+                    alias_analysis: self.alias_analysis,
+                    alias_analysis_state: &mut alias_analysis_state,
+                };
+
+                if is_pure_for_egraph(ctx.func, inst) {
+                    // Insert into GVN map and optimize any new nodes
+                    // inserted (recursively performing this work for
+                    // any nodes the optimization rules produce).
+                    let inst = NewOrExistingInst::Existing(inst);
+                    ctx.insert_pure_enode(inst);
+                    // We've now rewritten all uses, or will when we
+                    // see them, and the instruction exists as a pure
+                    // enode in the eclass, so we can remove it.
+                    cursor.remove_inst_and_step_back();
                 } else {
-                    crate::ir::types::INVALID
-                };
-
-                let load_mem_state = self.alias_analysis.get_state_for_load(inst);
-                let is_readonly_load = match func.dfg[inst] {
-                    InstructionData::Load {
-                        opcode: Opcode::Load,
-                        flags,
-                        ..
-                    } => flags.readonly() && flags.notrap(),
-                    _ => false,
-                };
-
-                // Create the egraph node.
-                let op = InstructionImms::from(&func.dfg[inst]);
-                let opcode = op.opcode();
-                let srcloc = func.srclocs[inst];
-                let arity = u16::try_from(results.len())
-                    .expect("More than 2^16 results from an instruction");
-
-                let node = if is_readonly_load {
-                    self.stats.node_created += 1;
-                    self.stats.node_pure += 1;
-                    Node::Pure {
-                        op,
-                        args,
-                        ty,
-                        arity,
-                    }
-                } else if let Some(load_mem_state) = load_mem_state {
-                    let addr = args.as_slice(&self.node_ctx.args)[0];
-                    trace!("load at inst {} has mem state {:?}", inst, load_mem_state);
-                    self.stats.node_created += 1;
-                    self.stats.node_load += 1;
-                    Node::Load {
-                        op,
-                        ty,
-                        addr,
-                        mem_state: load_mem_state,
-                        srcloc,
-                    }
-                } else if has_side_effect(func, inst) || opcode.can_load() {
-                    self.stats.node_created += 1;
-                    self.stats.node_inst += 1;
-                    Node::Inst {
-                        op,
-                        args,
-                        ty,
-                        arity,
-                        srcloc,
-                        loop_level,
-                    }
-                } else {
-                    self.stats.node_created += 1;
-                    self.stats.node_pure += 1;
-                    Node::Pure {
-                        op,
-                        args,
-                        ty,
-                        arity,
-                    }
-                };
-                let dedup_needed = self.node_ctx.needs_dedup(&node);
-                let is_pure = matches!(node, Node::Pure { .. });
-
-                let mut id = self.egraph.add(node, &mut self.node_ctx);
-
-                if dedup_needed {
-                    self.stats.node_dedup_query += 1;
-                    match id {
-                        NewOrExisting::New(_) => {
-                            self.stats.node_dedup_miss += 1;
-                        }
-                        NewOrExisting::Existing(_) => {
-                            self.stats.node_dedup_hit += 1;
-                        }
-                    }
-                }
-
-                if opcode == Opcode::Store {
-                    let store_data_ty = func.dfg.value_type(func.dfg.inst_args(inst)[0]);
-                    self.store_nodes.insert(inst, (store_data_ty, id.get()));
-                    self.stats.store_map_insert += 1;
-                }
-
-                // Loads that did not already merge into an existing
-                // load: try to forward from a store (store-to-load
-                // forwarding).
-                if let NewOrExisting::New(new_id) = id {
-                    if load_mem_state.is_some() {
-                        let opt_id = crate::opts::store_to_load(new_id, self);
-                        trace!("store_to_load: {} -> {}", new_id, opt_id);
-                        if opt_id != new_id {
-                            id = NewOrExisting::Existing(opt_id);
-                        }
-                    }
-                }
-
-                // Now either optimize (for new pure nodes), or add to
-                // the side-effecting list (for all other new nodes).
-                let id = match id {
-                    NewOrExisting::Existing(id) => id,
-                    NewOrExisting::New(id) if is_pure => {
-                        // Apply all optimization rules immediately; the
-                        // aegraph (acyclic egraph) works best when we do
-                        // this so all uses pick up the eclass with all
-                        // possible enodes.
-                        crate::opts::optimize_eclass(id, self)
-                    }
-                    NewOrExisting::New(id) => {
-                        self.side_effect_ids.push(id);
-                        self.stats.side_effect_nodes += 1;
-                        id
-                    }
-                };
-
-                // Create results and save in Value->Id map.
-                match results {
-                    &[] => {}
-                    &[one_result] => {
-                        trace!("build: value {} -> id {}", one_result, id);
-                        value_to_id.insert(one_result, id);
-                    }
-                    many_results => {
-                        debug_assert!(many_results.len() > 1);
-                        for (i, &result) in many_results.iter().enumerate() {
-                            let ty = func.dfg.value_type(result);
-                            let projection = self
-                                .egraph
-                                .add(
-                                    Node::Result {
-                                        value: id,
-                                        result: i,
-                                        ty,
-                                    },
-                                    &mut self.node_ctx,
-                                )
-                                .get();
-                            self.stats.node_created += 1;
-                            self.stats.node_result += 1;
-                            trace!("build: value {} -> id {}", result, projection);
-                            value_to_id.insert(result, projection);
-                        }
+                    if ctx.optimize_skeleton_inst(inst) {
+                        cursor.remove_inst_and_step_back();
                     }
                 }
             }
-
-            let side_effect_end =
-                u32::try_from(self.side_effect_ids.len()).expect("Overflow in side-effect count");
-            let side_effect_range = side_effect_start..side_effect_end;
-            self.side_effects[block] = side_effect_range;
         }
     }
 
     /// Scoped elaboration: compute a final ordering of op computation
-    /// for each block and replace the given Func body.
+    /// for each block and update the given Func body. After this
+    /// runs, the function body is back into the state where every
+    /// Inst with an used result is placed in the layout (possibly
+    /// duplicated, if our code-motion logic decides this is the best
+    /// option).
     ///
     /// This works in concert with the domtree. We do a preorder
     /// traversal of the domtree, tracking a scoped map from Id to
@@ -354,76 +474,95 @@ impl<'a> FuncEGraph<'a> {
     /// thus computed "as late as possible", but then memoized into
     /// the Id-to-Value map and available to all dominated blocks and
     /// for the rest of this block. (This subsumes GVN.)
-    pub fn elaborate(&mut self, func: &mut Function) {
-        let mut elab = Elaborator::new(
-            func,
+    fn elaborate(&mut self) {
+        let mut elaborator = Elaborator::new(
+            self.func,
             self.domtree,
+            &self.domtree_children,
             self.loop_analysis,
-            &self.egraph,
-            &self.node_ctx,
-            &self.remat_ids,
+            &mut self.remat_values,
+            &mut self.eclasses,
             &mut self.stats,
         );
-        elab.elaborate(
-            |block| {
-                let blockparam_range = self.blockparams[block].clone();
-                &self.blockparam_ids_tys
-                    [blockparam_range.start as usize..blockparam_range.end as usize]
-            },
-            |block| {
-                let side_effect_range = self.side_effects[block].clone();
-                &self.side_effect_ids
-                    [side_effect_range.start as usize..side_effect_range.end as usize]
-            },
-        );
+        elaborator.elaborate();
+
+        self.check_post_egraph();
     }
-}
 
-/// State for egraph analysis that computes all needed properties.
-pub(crate) struct Analysis;
-
-/// Analysis results for each eclass id.
-#[derive(Clone, Debug)]
-pub(crate) struct AnalysisValue {
-    pub(crate) loop_level: LoopLevel,
-}
-
-impl Default for AnalysisValue {
-    fn default() -> Self {
-        Self {
-            loop_level: LoopLevel::root(),
+    #[cfg(debug_assertions)]
+    fn check_post_egraph(&self) {
+        // Verify that no union nodes are reachable from inst args,
+        // and that all inst args' defining instructions are in the
+        // layout.
+        for block in self.func.layout.blocks() {
+            for inst in self.func.layout.block_insts(block) {
+                for &arg in self.func.dfg.inst_args(inst) {
+                    match self.func.dfg.value_def(arg) {
+                        ValueDef::Result(i, _) => {
+                            debug_assert!(self.func.layout.inst_block(i).is_some());
+                        }
+                        ValueDef::Union(..) => {
+                            panic!("egraph union node {} still reachable at {}!", arg, inst);
+                        }
+                        _ => {}
+                    }
+                }
+            }
         }
     }
+
+    #[cfg(not(debug_assertions))]
+    fn check_post_egraph(&self) {}
 }
 
-impl cranelift_egraph::Analysis for Analysis {
-    type L = NodeCtx;
-    type Value = AnalysisValue;
+/// Implementation of external-context equality and hashing on
+/// InstructionData. This allows us to deduplicate instructions given
+/// some context that lets us see its value lists and the mapping from
+/// any value to "canonical value" (in an eclass).
+struct GVNContext<'a> {
+    value_lists: &'a ValueListPool,
+    union_find: &'a UnionFind<Value>,
+}
 
-    fn for_node(
+impl<'a> CtxEq<(Type, InstructionData), (Type, InstructionData)> for GVNContext<'a> {
+    fn ctx_eq(
         &self,
-        ctx: &NodeCtx,
-        n: &Node,
-        values: &SecondaryMap<Id, AnalysisValue>,
-    ) -> AnalysisValue {
-        let loop_level = match n {
-            &Node::Pure { ref args, .. } => args
-                .as_slice(&ctx.args)
-                .iter()
-                .map(|&arg| values[arg].loop_level)
-                .max()
-                .unwrap_or(LoopLevel::root()),
-            &Node::Load { addr, .. } => values[addr].loop_level,
-            &Node::Result { value, .. } => values[value].loop_level,
-            &Node::Inst { loop_level, .. } | &Node::Param { loop_level, .. } => loop_level,
-        };
-
-        AnalysisValue { loop_level }
-    }
-
-    fn meet(&self, _ctx: &NodeCtx, v1: &AnalysisValue, v2: &AnalysisValue) -> AnalysisValue {
-        AnalysisValue {
-            loop_level: std::cmp::max(v1.loop_level, v2.loop_level),
-        }
+        (a_ty, a_inst): &(Type, InstructionData),
+        (b_ty, b_inst): &(Type, InstructionData),
+    ) -> bool {
+        a_ty == b_ty
+            && a_inst.eq(b_inst, self.value_lists, |value| {
+                self.union_find.find(value)
+            })
     }
 }
+
+impl<'a> CtxHash<(Type, InstructionData)> for GVNContext<'a> {
+    fn ctx_hash<H: Hasher>(&self, state: &mut H, (ty, inst): &(Type, InstructionData)) {
+        std::hash::Hash::hash(&ty, state);
+        inst.hash(state, self.value_lists, |value| self.union_find.find(value));
+    }
+}
+
+/// Statistics collected during egraph-based processing.
+#[derive(Clone, Debug, Default)]
+pub(crate) struct Stats {
+    pub(crate) pure_inst: u64,
+    pub(crate) pure_inst_deduped: u64,
+    pub(crate) skeleton_inst: u64,
+    pub(crate) alias_analysis_removed: u64,
+    pub(crate) new_inst: u64,
+    pub(crate) union: u64,
+    pub(crate) subsume: u64,
+    pub(crate) remat: u64,
+    pub(crate) rewrite_rule_invoked: u64,
+    pub(crate) rewrite_depth_limit: u64,
+    pub(crate) elaborate_visit_node: u64,
+    pub(crate) elaborate_memoize_hit: u64,
+    pub(crate) elaborate_memoize_miss: u64,
+    pub(crate) elaborate_memoize_miss_remat: u64,
+    pub(crate) elaborate_licm_hoist: u64,
+    pub(crate) elaborate_func: u64,
+    pub(crate) elaborate_func_pre_insts: u64,
+    pub(crate) elaborate_func_post_insts: u64,
+}
diff --git a/cranelift/codegen/src/egraph/cost.rs b/cranelift/codegen/src/egraph/cost.rs
new file mode 100644
index 0000000000..8a9f852818
--- /dev/null
+++ b/cranelift/codegen/src/egraph/cost.rs
@@ -0,0 +1,97 @@
+//! Cost functions for egraph representation.
+
+use crate::ir::Opcode;
+
+/// A cost of computing some value in the program.
+///
+/// Costs are measured in an arbitrary union that we represent in a
+/// `u32`. The ordering is meant to be meaningful, but the value of a
+/// single unit is arbitrary (and "not to scale"). We use a collection
+/// of heuristics to try to make this approximation at least usable.
+///
+/// We start by defining costs for each opcode (see `pure_op_cost`
+/// below). The cost of computing some value, initially, is the cost
+/// of its opcode, plus the cost of computing its inputs.
+///
+/// We then adjust the cost according to loop nests: for each
+/// loop-nest level, we multiply by 1024. Because we only have 32
+/// bits, we limit this scaling to a loop-level of two (i.e., multiply
+/// by 2^20 ~= 1M).
+///
+/// Arithmetic on costs is always saturating: we don't want to wrap
+/// around and return to a tiny cost when adding the costs of two very
+/// expensive operations. It is better to approximate and lose some
+/// precision than to lose the ordering by wrapping.
+///
+/// Finally, we reserve the highest value, `u32::MAX`, as a sentinel
+/// that means "infinite". This is separate from the finite costs and
+/// not reachable by doing arithmetic on them (even when overflowing)
+/// -- we saturate just *below* infinity. (This is done by the
+/// `finite()` method.) An infinite cost is used to represent a value
+/// that cannot be computed, or otherwise serve as a sentinel when
+/// performing search for the lowest-cost representation of a value.
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
+pub(crate) struct Cost(u32);
+impl Cost {
+    pub(crate) fn at_level(&self, loop_level: usize) -> Cost {
+        let loop_level = std::cmp::min(2, loop_level);
+        let multiplier = 1u32 << ((10 * loop_level) as u32);
+        Cost(self.0.saturating_mul(multiplier)).finite()
+    }
+
+    pub(crate) fn infinity() -> Cost {
+        // 2^32 - 1 is, uh, pretty close to infinite... (we use `Cost`
+        // only for heuristics and always saturate so this suffices!)
+        Cost(u32::MAX)
+    }
+
+    pub(crate) fn zero() -> Cost {
+        Cost(0)
+    }
+
+    /// Clamp this cost at a "finite" value. Can be used in
+    /// conjunction with saturating ops to avoid saturating into
+    /// `infinity()`.
+    fn finite(self) -> Cost {
+        Cost(std::cmp::min(u32::MAX - 1, self.0))
+    }
+}
+
+impl std::default::Default for Cost {
+    fn default() -> Cost {
+        Cost::zero()
+    }
+}
+
+impl std::ops::Add<Cost> for Cost {
+    type Output = Cost;
+    fn add(self, other: Cost) -> Cost {
+        Cost(self.0.saturating_add(other.0)).finite()
+    }
+}
+
+/// Return the cost of a *pure* opcode. Caller is responsible for
+/// checking that the opcode came from an instruction that satisfies
+/// `inst_predicates::is_pure_for_egraph()`.
+pub(crate) fn pure_op_cost(op: Opcode) -> Cost {
+    match op {
+        // Constants.
+        Opcode::Iconst | Opcode::F32const | Opcode::F64const => Cost(0),
+        // Extends/reduces.
+        Opcode::Uextend | Opcode::Sextend | Opcode::Ireduce | Opcode::Iconcat | Opcode::Isplit => {
+            Cost(1)
+        }
+        // "Simple" arithmetic.
+        Opcode::Iadd
+        | Opcode::Isub
+        | Opcode::Band
+        | Opcode::BandNot
+        | Opcode::Bor
+        | Opcode::BorNot
+        | Opcode::Bxor
+        | Opcode::BxorNot
+        | Opcode::Bnot => Cost(2),
+        // Everything else (pure.)
+        _ => Cost(3),
+    }
+}
diff --git a/cranelift/codegen/src/egraph/elaborate.rs b/cranelift/codegen/src/egraph/elaborate.rs
index ee465def22..2ca59bdb03 100644
--- a/cranelift/codegen/src/egraph/elaborate.rs
+++ b/cranelift/codegen/src/egraph/elaborate.rs
@@ -1,47 +1,78 @@
 //! Elaboration phase: lowers EGraph back to sequences of operations
 //! in CFG nodes.
 
+use super::cost::{pure_op_cost, Cost};
 use super::domtree::DomTreeWithChildren;
-use super::node::{op_cost, Cost, Node, NodeCtx};
-use super::Analysis;
 use super::Stats;
 use crate::dominator_tree::DominatorTree;
 use crate::fx::FxHashSet;
-use crate::ir::{Block, Function, Inst, Opcode, RelSourceLoc, Type, Value, ValueList};
-use crate::loop_analysis::LoopAnalysis;
+use crate::ir::ValueDef;
+use crate::ir::{Block, Function, Inst, Value};
+use crate::loop_analysis::{Loop, LoopAnalysis, LoopLevel};
 use crate::scoped_hash_map::ScopedHashMap;
 use crate::trace;
+use crate::unionfind::UnionFind;
 use alloc::vec::Vec;
-use cranelift_egraph::{EGraph, Id, Language, NodeKey};
-use cranelift_entity::{packed_option::PackedOption, SecondaryMap};
+use cranelift_entity::{packed_option::ReservedValue, SecondaryMap};
 use smallvec::{smallvec, SmallVec};
 use std::ops::Add;
 
-type LoopDepth = u32;
-
 pub(crate) struct Elaborator<'a> {
     func: &'a mut Function,
     domtree: &'a DominatorTree,
+    domtree_children: &'a DomTreeWithChildren,
     loop_analysis: &'a LoopAnalysis,
-    node_ctx: &'a NodeCtx,
-    egraph: &'a EGraph<NodeCtx, Analysis>,
-    id_to_value: ScopedHashMap<Id, IdValue>,
-    id_to_best_cost_and_node: SecondaryMap<Id, (Cost, Id)>,
+    eclasses: &'a mut UnionFind<Value>,
+    /// Map from Value that is produced by a pure Inst (and was thus
+    /// not in the side-effecting skeleton) to the value produced by
+    /// an elaborated inst (placed in the layout) to whose results we
+    /// refer in the final code.
+    ///
+    /// The first time we use some result of an instruction during
+    /// elaboration, we can place it and insert an identity map (inst
+    /// results to that same inst's results) in this scoped
+    /// map. Within that block and its dom-tree children, that mapping
+    /// is visible and we can continue to use it. This allows us to
+    /// avoid cloning the instruction. However, if we pop that scope
+    /// and use it somewhere else as well, we will need to
+    /// duplicate. We detect this case by checking, when a value that
+    /// we want is not present in this map, whether the producing inst
+    /// is already placed in the Layout. If so, we duplicate, and
+    /// insert non-identity mappings from the original inst's results
+    /// to the cloned inst's results.
+    value_to_elaborated_value: ScopedHashMap<Value, ElaboratedValue>,
+    /// Map from Value to the best (lowest-cost) Value in its eclass
+    /// (tree of union value-nodes).
+    value_to_best_value: SecondaryMap<Value, (Cost, Value)>,
     /// Stack of blocks and loops in current elaboration path.
     loop_stack: SmallVec<[LoopStackEntry; 8]>,
-    cur_block: Option<Block>,
-    first_branch: SecondaryMap<Block, PackedOption<Inst>>,
-    remat_ids: &'a FxHashSet<Id>,
+    /// The current block into which we are elaborating.
+    cur_block: Block,
+    /// Values that opt rules have indicated should be rematerialized
+    /// in every block they are used (e.g., immediates or other
+    /// "cheap-to-compute" ops).
+    remat_values: &'a FxHashSet<Value>,
     /// Explicitly-unrolled value elaboration stack.
     elab_stack: Vec<ElabStackEntry>,
-    elab_result_stack: Vec<IdValue>,
+    /// Results from the elab stack.
+    elab_result_stack: Vec<ElaboratedValue>,
     /// Explicitly-unrolled block elaboration stack.
     block_stack: Vec<BlockStackEntry>,
+    /// Stats for various events during egraph processing, to help
+    /// with optimization of this infrastructure.
     stats: &'a mut Stats,
 }
 
+#[derive(Clone, Copy, Debug)]
+struct ElaboratedValue {
+    in_block: Block,
+    value: Value,
+}
+
 #[derive(Clone, Debug)]
 struct LoopStackEntry {
+    /// The loop identifier.
+    lp: Loop,
     /// The hoist point: a block that immediately dominates this
     /// loop. May not be an immediate predecessor, but will be a valid
     /// point to place all loop-invariant ops: they must depend only
@@ -54,22 +85,20 @@ struct LoopStackEntry {
 
 #[derive(Clone, Debug)]
 enum ElabStackEntry {
-    /// Next action is to resolve this id into a node and elaborate
-    /// args.
-    Start { id: Id },
+    /// Next action is to resolve this value into an elaborated inst
+    /// (placed into the layout) that produces the value, and
+    /// recursively elaborate the insts that produce its args.
+    ///
+    /// Any inserted ops should be inserted before `before`, which is
+    /// the instruction demanding this value.
+    Start { value: Value, before: Inst },
     /// Args have been pushed; waiting for results.
-    PendingNode {
-        canonical: Id,
-        node_key: NodeKey,
-        remat: bool,
+    PendingInst {
+        inst: Inst,
+        result_idx: usize,
         num_args: usize,
-    },
-    /// Waiting for a result to return one projected value of a
-    /// multi-value result.
-    PendingProjection {
-        canonical: Id,
-        index: usize,
-        ty: Type,
+        remat: bool,
+        before: Inst,
     },
 }
 
@@ -79,56 +108,31 @@ enum BlockStackEntry {
     Pop,
 }
 
-#[derive(Clone, Debug)]
-enum IdValue {
-    /// A single value.
-    Value {
-        depth: LoopDepth,
-        block: Block,
-        value: Value,
-    },
-    /// Multiple results; indices in `node_args`.
-    Values {
-        depth: LoopDepth,
-        block: Block,
-        values: ValueList,
-    },
-}
-
-impl IdValue {
-    fn block(&self) -> Block {
-        match self {
-            IdValue::Value { block, .. } | IdValue::Values { block, .. } => *block,
-        }
-    }
-}
-
 impl<'a> Elaborator<'a> {
     pub(crate) fn new(
         func: &'a mut Function,
         domtree: &'a DominatorTree,
+        domtree_children: &'a DomTreeWithChildren,
         loop_analysis: &'a LoopAnalysis,
-        egraph: &'a EGraph<NodeCtx, Analysis>,
-        node_ctx: &'a NodeCtx,
-        remat_ids: &'a FxHashSet<Id>,
+        remat_values: &'a FxHashSet<Value>,
+        eclasses: &'a mut UnionFind<Value>,
         stats: &'a mut Stats,
     ) -> Self {
-        let num_blocks = func.dfg.num_blocks();
-        let mut id_to_best_cost_and_node =
-            SecondaryMap::with_default((Cost::infinity(), Id::invalid()));
-        id_to_best_cost_and_node.resize(egraph.classes.len());
+        let num_values = func.dfg.num_values();
+        let mut value_to_best_value =
+            SecondaryMap::with_default((Cost::infinity(), Value::reserved_value()));
+        value_to_best_value.resize(num_values);
         Self {
             func,
             domtree,
+            domtree_children,
             loop_analysis,
-            egraph,
-            node_ctx,
-            id_to_value: ScopedHashMap::with_capacity(egraph.classes.len()),
-            id_to_best_cost_and_node,
+            eclasses,
+            value_to_elaborated_value: ScopedHashMap::with_capacity(num_values),
+            value_to_best_value,
             loop_stack: smallvec![],
-            cur_block: None,
-            first_branch: SecondaryMap::with_capacity(num_blocks),
-            remat_ids,
+            cur_block: Block::reserved_value(),
+            remat_values,
             elab_stack: vec![],
             elab_result_stack: vec![],
             block_stack: vec![],
@@ -136,19 +140,23 @@ impl<'a> Elaborator<'a> {
         }
     }
 
-    fn cur_loop_depth(&self) -> LoopDepth {
-        self.loop_stack.len() as LoopDepth
-    }
-
-    fn start_block(&mut self, idom: Option<Block>, block: Block, block_params: &[(Id, Type)]) {
+    fn start_block(&mut self, idom: Option<Block>, block: Block) {
         trace!(
-            "start_block: block {:?} with idom {:?} at loop depth {} scope depth {}",
+            "start_block: block {:?} with idom {:?} at loop depth {:?} scope depth {}",
             block,
             idom,
-            self.cur_loop_depth(),
-            self.id_to_value.depth()
+            self.loop_stack.len(),
+            self.value_to_elaborated_value.depth()
         );
 
+        // Pop any loop levels we're no longer in.
+        while let Some(inner_loop) = self.loop_stack.last() {
+            if self.loop_analysis.is_in_loop(block, inner_loop.lp) {
+                break;
+            }
+            self.loop_stack.pop();
+        }
+
         // Note that if the *entry* block is a loop header, we will
         // not make note of the loop here because it will not have an
         // immediate dominator. We must disallow this case because we
@@ -156,14 +164,15 @@ impl<'a> Elaborator<'a> {
         // `LoopAnalysis` will otherwise still make note of this loop
         // and loop depths will not match.
         if let Some(idom) = idom {
-            if self.loop_analysis.is_loop_header(block).is_some() {
+            if let Some(lp) = self.loop_analysis.is_loop_header(block) {
                 self.loop_stack.push(LoopStackEntry {
+                    lp,
                     // Any code hoisted out of this loop will have code
                     // placed in `idom`, and will have def mappings
                     // inserted in to the scoped hashmap at that block's
                     // level.
                     hoist_block: idom,
-                    scope_depth: (self.id_to_value.depth() - 1) as u32,
+                    scope_depth: (self.value_to_elaborated_value.depth() - 1) as u32,
                 });
                 trace!(
                     " -> loop header, pushing; depth now {}",
@@ -177,391 +186,457 @@ impl<'a> Elaborator<'a> {
             );
         }
 
-        self.cur_block = Some(block);
-        for &(id, ty) in block_params {
-            let value = self.func.dfg.append_block_param(block, ty);
-            trace!(" -> block param id {:?} value {:?}", id, value);
-            self.id_to_value.insert_if_absent(
-                id,
-                IdValue::Value {
-                    depth: self.cur_loop_depth(),
-                    block,
-                    value,
-                },
-            );
-        }
+        trace!("block {}: loop stack is {:?}", block, self.loop_stack);
+
+        self.cur_block = block;
     }
 
-    fn add_node(&mut self, node: &Node, args: &[Value], to_block: Block) -> ValueList {
-        let (instdata, result_ty, arity) = match node {
-            Node::Pure { op, ty, arity, .. } | Node::Inst { op, ty, arity, .. } => (
-                op.with_args(args, &mut self.func.dfg.value_lists),
-                *ty,
-                *arity,
-            ),
-            Node::Load { op, ty, .. } => {
-                (op.with_args(args, &mut self.func.dfg.value_lists), *ty, 1)
-            }
-            _ => panic!("Cannot `add_node()` on block param or projection"),
-        };
-        let srcloc = match node {
-            Node::Inst { srcloc, .. } | Node::Load { srcloc, .. } => *srcloc,
-            _ => RelSourceLoc::default(),
-        };
-        let opcode = instdata.opcode();
-        // Is this instruction either an actual terminator (an
-        // instruction that must end the block), or at least in the
-        // group of branches at the end (including conditional
-        // branches that may be followed by an actual terminator)? We
-        // call this the "terminator group", and we record the first
-        // inst in this group (`first_branch` below) so that we do not
-        // insert instructions needed only by args of later
-        // instructions in the terminator group in the middle of the
-        // terminator group.
-        //
-        // E.g., for the original sequence
-        //   v1 = op ...
-        //   brnz vCond, block1
-        //   jump block2(v1)
-        //
-        // elaboration would naively produce
-        //
-        //   brnz vCond, block1
-        //   v1 = op ...
-        //   jump block2(v1)
-        //
-        // but we use the `first_branch` mechanism below to ensure
-        // that once we've emitted at least one branch, all other
-        // elaborated insts have to go before that. So we emit brnz
-        // first, then as we elaborate the jump, we find we need the
-        // `op`; we `insert_inst` it *before* the brnz (which is the
-        // `first_branch`).
-        let is_terminator_group_inst =
-            opcode.is_branch() || opcode.is_return() || opcode == Opcode::Trap;
-        let inst = self.func.dfg.make_inst(instdata);
-        self.func.srclocs[inst] = srcloc;
-
-        if arity == 1 {
-            self.func.dfg.append_result(inst, result_ty);
-        } else {
-            for _ in 0..arity {
-                self.func.dfg.append_result(inst, crate::ir::types::INVALID);
-            }
-        }
-
-        if is_terminator_group_inst {
-            self.func.layout.append_inst(inst, to_block);
-            if self.first_branch[to_block].is_none() {
-                self.first_branch[to_block] = Some(inst).into();
-            }
-        } else if let Some(branch) = self.first_branch[to_block].into() {
-            self.func.layout.insert_inst(inst, branch);
-        } else {
-            self.func.layout.append_inst(inst, to_block);
-        }
-        self.func.dfg.inst_results_list(inst)
-    }
-
-    fn compute_best_nodes(&mut self) {
-        let best = &mut self.id_to_best_cost_and_node;
-        for (eclass_id, eclass) in &self.egraph.classes {
-            trace!("computing best for eclass {:?}", eclass_id);
-            if let Some(child1) = eclass.child1() {
-                trace!(" -> child {:?}", child1);
-                best[eclass_id] = best[child1];
-            }
-            if let Some(child2) = eclass.child2() {
-                trace!(" -> child {:?}", child2);
-                if best[child2].0 < best[eclass_id].0 {
-                    best[eclass_id] = best[child2];
+    fn compute_best_values(&mut self) {
+        let best = &mut self.value_to_best_value;
+        for (value, def) in self.func.dfg.values_and_defs() {
+            trace!("computing best for value {:?} def {:?}", value, def);
+            match def {
+                ValueDef::Union(x, y) => {
+                    // Pick the best of the two options based on
+                    // min-cost. This works because each element of `best`
+                    // is a `(cost, value)` tuple; `cost` comes first so
+                    // the natural comparison works based on cost, and
+                    // breaks ties based on value number.
+                    trace!(" -> best of {:?} and {:?}", best[x], best[y]);
+                    best[value] = std::cmp::min(best[x], best[y]);
+                    trace!(" -> {:?}", best[value]);
                 }
-            }
-            if let Some(node_key) = eclass.get_node() {
-                let node = node_key.node(&self.egraph.nodes);
-                trace!(" -> eclass {:?}: node {:?}", eclass_id, node);
-                let (cost, id) = match node {
-                    Node::Param { .. }
-                    | Node::Inst { .. }
-                    | Node::Load { .. }
-                    | Node::Result { .. } => (Cost::zero(), eclass_id),
-                    Node::Pure { op, .. } => {
-                        let args_cost = self
-                            .node_ctx
-                            .children(node)
+                ValueDef::Param(_, _) => {
+                    best[value] = (Cost::zero(), value);
+                }
+                // If the Inst is inserted into the layout (which is,
+                // at this point, only the side-effecting skeleton),
+                // then it must be computed and thus we give it zero
+                // cost.
+                ValueDef::Result(inst, _) if self.func.layout.inst_block(inst).is_some() => {
+                    best[value] = (Cost::zero(), value);
+                }
+                ValueDef::Result(inst, _) => {
+                    trace!(" -> value {}: result, computing cost", value);
+                    let inst_data = &self.func.dfg[inst];
+                    let loop_level = self
+                        .func
+                        .layout
+                        .inst_block(inst)
+                        .map(|block| self.loop_analysis.loop_level(block))
+                        .unwrap_or(LoopLevel::root());
+                    // N.B.: at this point we know that the opcode is
+                    // pure, so `pure_op_cost`'s precondition is
+                    // satisfied.
+                    let cost = pure_op_cost(inst_data.opcode()).at_level(loop_level.level())
+                        + self
+                            .func
+                            .dfg
+                            .inst_args(inst)
                             .iter()
-                            .map(|&arg_id| {
-                                trace!("  -> arg {:?}", arg_id);
-                                best[arg_id].0
-                            })
+                            .map(|value| best[*value].0)
                             // Can't use `.sum()` for `Cost` types; do
                             // an explicit reduce instead.
                             .fold(Cost::zero(), Cost::add);
-                        let level = self.egraph.analysis_value(eclass_id).loop_level;
-                        let cost = op_cost(op).at_level(level) + args_cost;
-                        (cost, eclass_id)
-                    }
-                };
-
-                if cost < best[eclass_id].0 {
-                    best[eclass_id] = (cost, id);
+                    best[value] = (cost, value);
                 }
-            }
-            debug_assert_ne!(best[eclass_id].0, Cost::infinity());
-            debug_assert_ne!(best[eclass_id].1, Id::invalid());
-            trace!("best for eclass {:?}: {:?}", eclass_id, best[eclass_id]);
+            };
+            debug_assert_ne!(best[value].0, Cost::infinity());
+            debug_assert_ne!(best[value].1, Value::reserved_value());
+            trace!("best for eclass {:?}: {:?}", value, best[value]);
         }
     }
 
-    fn elaborate_eclass_use(&mut self, id: Id) {
-        self.elab_stack.push(ElabStackEntry::Start { id });
+    /// Elaborate use of an eclass, inserting any needed new
+    /// instructions before the given inst `before`. Should only be
+    /// given values corresponding to results of instructions or
+    /// blockparams.
+    fn elaborate_eclass_use(&mut self, value: Value, before: Inst) -> ElaboratedValue {
+        debug_assert_ne!(value, Value::reserved_value());
+
+        // Kick off the process by requesting this result
+        // value.
+        self.elab_stack
+            .push(ElabStackEntry::Start { value, before });
+
+        // Now run the explicit-stack recursion until we reach
+        // the root.
         self.process_elab_stack();
         debug_assert_eq!(self.elab_result_stack.len(), 1);
-        self.elab_result_stack.clear();
+        self.elab_result_stack.pop().unwrap()
     }
 
     fn process_elab_stack(&mut self) {
         while let Some(entry) = self.elab_stack.last() {
             match entry {
-                &ElabStackEntry::Start { id } => {
+                &ElabStackEntry::Start { value, before } => {
                     // We always replace the Start entry, so pop it now.
                     self.elab_stack.pop();
 
-                    self.stats.elaborate_visit_node += 1;
-                    let canonical = self.egraph.canonical_id(id);
-                    trace!("elaborate: id {}", id);
+                    debug_assert_ne!(value, Value::reserved_value());
+                    let value = self.func.dfg.resolve_aliases(value);
 
-                    let remat = if let Some(val) = self.id_to_value.get(&canonical) {
-                        // Look at the defined block, and determine whether this
-                        // node kind allows rematerialization if the value comes
-                        // from another block. If so, ignore the hit and recompute
-                        // below.
-                        let remat = val.block() != self.cur_block.unwrap()
-                            && self.remat_ids.contains(&canonical);
+                    self.stats.elaborate_visit_node += 1;
+                    let canonical_value = self.eclasses.find(value);
+                    debug_assert_ne!(canonical_value, Value::reserved_value());
+                    trace!(
+                        "elaborate: value {} canonical {} before {}",
+                        value,
+                        canonical_value,
+                        before
+                    );
+
+                    let remat = if let Some(elab_val) =
+                        self.value_to_elaborated_value.get(&canonical_value)
+                    {
+                        // Value is available. Look at the defined
+                        // block, and determine whether this node kind
+                        // allows rematerialization if the value comes
+                        // from another block. If so, ignore the hit
+                        // and recompute below.
+                        let remat = elab_val.in_block != self.cur_block
+                            && self.remat_values.contains(&canonical_value);
                         if !remat {
-                            trace!("elaborate: id {} -> {:?}", id, val);
+                            trace!("elaborate: value {} -> {:?}", value, elab_val);
                             self.stats.elaborate_memoize_hit += 1;
-                            self.elab_result_stack.push(val.clone());
+                            self.elab_result_stack.push(*elab_val);
                             continue;
                         }
-                        trace!("elaborate: id {} -> remat", id);
+                        trace!("elaborate: value {} -> remat", canonical_value);
                         self.stats.elaborate_memoize_miss_remat += 1;
                         // The op is pure at this point, so it is always valid to
                         // remove from this map.
-                        self.id_to_value.remove(&canonical);
+                        self.value_to_elaborated_value.remove(&canonical_value);
                         true
                     } else {
-                        self.remat_ids.contains(&canonical)
+                        // Value not available; but still look up
+                        // whether it's been flagged for remat because
+                        // this affects placement.
+                        let remat = self.remat_values.contains(&canonical_value);
+                        trace!(" -> not present in map; remat = {}", remat);
+                        remat
                     };
                     self.stats.elaborate_memoize_miss += 1;
 
-                    // Get the best option; we use `id` (latest id) here so we
-                    // have a full view of the eclass.
-                    let (_, best_node_eclass) = self.id_to_best_cost_and_node[id];
-                    debug_assert_ne!(best_node_eclass, Id::invalid());
+                    // Get the best option; we use `value` (latest
+                    // value) here so we have a full view of the
+                    // eclass.
+                    trace!("looking up best value for {}", value);
+                    let (_, best_value) = self.value_to_best_value[value];
+                    debug_assert_ne!(best_value, Value::reserved_value());
+                    trace!("elaborate: value {} -> best {}", value, best_value,);
+
+                    // Now resolve the value to its definition to see
+                    // how we can compute it.
+                    let (inst, result_idx) = match self.func.dfg.value_def(best_value) {
+                        ValueDef::Result(inst, result_idx) => {
+                            trace!(
+                                " -> value {} is result {} of {}",
+                                best_value,
+                                result_idx,
+                                inst
+                            );
+                            (inst, result_idx)
+                        }
+                        ValueDef::Param(_, _) => {
+                            // We don't need to do anything to compute
+                            // this value; just push its result on the
+                            // result stack (blockparams are already
+                            // available).
+                            trace!(" -> value {} is a blockparam", best_value);
+                            self.elab_result_stack.push(ElaboratedValue {
+                                in_block: self.cur_block,
+                                value: best_value,
+                            });
+                            continue;
+                        }
+                        ValueDef::Union(_, _) => {
+                            panic!("Should never have a Union value as the best value");
+                        }
+                    };
 
                     trace!(
-                        "elaborate: id {} -> best {} -> eclass node {:?}",
-                        id,
-                        best_node_eclass,
-                        self.egraph.classes[best_node_eclass]
+                        " -> result {} of inst {:?}",
+                        result_idx,
+                        self.func.dfg[inst]
                     );
-                    let node_key = self.egraph.classes[best_node_eclass].get_node().unwrap();
-                    let node = node_key.node(&self.egraph.nodes);
-                    trace!(" -> enode {:?}", node);
 
-                    // Is the node a block param? We should never get here if so
-                    // (they are inserted when first visiting the block).
-                    if matches!(node, Node::Param { .. }) {
-                        unreachable!("Param nodes should already be inserted");
-                    }
-
-                    // Is the node a result projection? If so, resolve
-                    // the value we are projecting a part of, then
-                    // eventually return here (saving state with a
-                    // PendingProjection).
-                    if let Node::Result {
-                        value, result, ty, ..
-                    } = node
-                    {
-                        trace!(" -> result; pushing arg value {}", value);
-                        self.elab_stack.push(ElabStackEntry::PendingProjection {
-                            index: *result,
-                            canonical,
-                            ty: *ty,
-                        });
-                        self.elab_stack.push(ElabStackEntry::Start { id: *value });
-                        continue;
-                    }
-
-                    // We're going to need to emit this
-                    // operator. First, enqueue all args to be
+                    // We're going to need to use this instruction
+                    // result, placing the instruction into the
+                    // layout. First, enqueue all args to be
                     // elaborated. Push state to receive the results
-                    // and later elab this node.
-                    let num_args = self.node_ctx.children(&node).len();
-                    self.elab_stack.push(ElabStackEntry::PendingNode {
-                        canonical,
-                        node_key,
-                        remat,
+                    // and later elab this inst.
+                    let args = self.func.dfg.inst_args(inst);
+                    let num_args = args.len();
+                    self.elab_stack.push(ElabStackEntry::PendingInst {
+                        inst,
+                        result_idx,
                         num_args,
+                        remat,
+                        before,
                     });
                     // Push args in reverse order so we process the
                     // first arg first.
-                    for &arg_id in self.node_ctx.children(&node).iter().rev() {
-                        self.elab_stack.push(ElabStackEntry::Start { id: arg_id });
+                    for &arg in args.iter().rev() {
+                        debug_assert_ne!(arg, Value::reserved_value());
+                        self.elab_stack
+                            .push(ElabStackEntry::Start { value: arg, before });
                     }
                 }
 
-                &ElabStackEntry::PendingNode {
-                    canonical,
-                    node_key,
-                    remat,
+                &ElabStackEntry::PendingInst {
+                    inst,
+                    result_idx,
                     num_args,
+                    remat,
+                    before,
                 } => {
                     self.elab_stack.pop();
 
-                    let node = node_key.node(&self.egraph.nodes);
-
-                    // We should have all args resolved at this point.
-                    let arg_idx = self.elab_result_stack.len() - num_args;
-                    let args = &self.elab_result_stack[arg_idx..];
-
-                    // Gather the individual output-CLIF `Value`s.
-                    let arg_values: SmallVec<[Value; 8]> = args
-                        .iter()
-                        .map(|idvalue| match idvalue {
-                            IdValue::Value { value, .. } => *value,
-                            IdValue::Values { .. } => {
-                                panic!("enode depends directly on multi-value result")
-                            }
-                        })
-                        .collect();
-
-                    // Compute max loop depth.
-                    let max_loop_depth = args
-                        .iter()
-                        .map(|idvalue| match idvalue {
-                            IdValue::Value { depth, .. } => *depth,
-                            IdValue::Values { .. } => unreachable!(),
-                        })
-                        .max()
-                        .unwrap_or(0);
-
-                    // Remove args from result stack.
-                    self.elab_result_stack.truncate(arg_idx);
-
-                    // Determine the location at which we emit it. This is the
-                    // current block *unless* we hoist above a loop when all args
-                    // are loop-invariant (and this op is pure).
-                    let (loop_depth, scope_depth, block) = if node.is_non_pure() {
-                        // Non-pure op: always at the current location.
-                        (
-                            self.cur_loop_depth(),
-                            self.id_to_value.depth(),
-                            self.cur_block.unwrap(),
-                        )
-                    } else if max_loop_depth == self.cur_loop_depth() || remat {
-                        // Pure op, but depends on some value at the current loop
-                        // depth, or remat forces it here: as above.
-                        (
-                            self.cur_loop_depth(),
-                            self.id_to_value.depth(),
-                            self.cur_block.unwrap(),
-                        )
-                    } else {
-                        // Pure op, and does not depend on any args at current
-                        // loop depth: hoist out of loop.
-                        self.stats.elaborate_licm_hoist += 1;
-                        let data = &self.loop_stack[max_loop_depth as usize];
-                        (max_loop_depth, data.scope_depth as usize, data.hoist_block)
-                    };
-                    // Loop scopes are a subset of all scopes.
-                    debug_assert!(scope_depth >= loop_depth as usize);
-
-                    // This is an actual operation; emit the node in sequence now.
-                    let results = self.add_node(node, &arg_values[..], block);
-                    let results_slice = results.as_slice(&self.func.dfg.value_lists);
-
-                    // Build the result and memoize in the id-to-value map.
-                    let result = if results_slice.len() == 1 {
-                        IdValue::Value {
-                            depth: loop_depth,
-                            block,
-                            value: results_slice[0],
-                        }
-                    } else {
-                        IdValue::Values {
-                            depth: loop_depth,
-                            block,
-                            values: results,
-                        }
-                    };
-
-                    self.id_to_value.insert_if_absent_with_depth(
-                        canonical,
-                        result.clone(),
-                        scope_depth,
+                    trace!(
+                        "PendingInst: {} result {} args {} remat {} before {}",
+                        inst,
+                        result_idx,
+                        num_args,
+                        remat,
+                        before
                     );
 
-                    // Push onto the elab-results stack.
-                    self.elab_result_stack.push(result)
-                }
-                &ElabStackEntry::PendingProjection {
-                    ty,
-                    index,
-                    canonical,
-                } => {
-                    self.elab_stack.pop();
+                    // We should have all args resolved at this
+                    // point. Grab them and drain them out, removing
+                    // them.
+                    let arg_idx = self.elab_result_stack.len() - num_args;
+                    let arg_values = &self.elab_result_stack[arg_idx..];
 
-                    // Grab the input from the elab-result stack.
-                    let value = self.elab_result_stack.pop().expect("Should have result");
+                    // Compute max loop depth.
+                    let loop_hoist_level = arg_values
+                        .iter()
+                        .map(|&value| {
+                            // Find the outermost loop level at which
+                            // the value's defining block *is not* a
+                            // member. This is the loop-nest level
+                            // whose hoist-block we hoist to.
+                            let hoist_level = self
+                                .loop_stack
+                                .iter()
+                                .position(|loop_entry| {
+                                    !self.loop_analysis.is_in_loop(value.in_block, loop_entry.lp)
+                                })
+                                .unwrap_or(self.loop_stack.len());
+                            trace!(
+                                " -> arg: elab_value {:?} hoist level {:?}",
+                                value,
+                                hoist_level
+                            );
+                            hoist_level
+                        })
+                        .max()
+                        .unwrap_or(self.loop_stack.len());
+                    trace!(
+                        " -> loop hoist level: {:?}; cur loop depth: {:?}, loop_stack: {:?}",
+                        loop_hoist_level,
+                        self.loop_stack.len(),
+                        self.loop_stack,
+                    );
 
-                    let (depth, block, values) = match value {
-                        IdValue::Values {
-                            depth,
-                            block,
-                            values,
-                            ..
-                        } => (depth, block, values),
-                        IdValue::Value { .. } => {
-                            unreachable!("Projection nodes should not be used on single results");
+                    // We know that this is a pure inst, because
+                    // non-pure roots have already been placed in the
+                    // value-to-elab'd-value map and are never subject
+                    // to remat, so they will not reach this stage of
+                    // processing.
+                    //
+                    // We now must determine the location at which we
+                    // place the instruction. This is the current
+                    // block *unless* we hoist above a loop when all
+                    // args are loop-invariant (and this op is pure).
+                    let (scope_depth, before, insert_block) =
+                        if loop_hoist_level == self.loop_stack.len() || remat {
+                            // Depends on some value at the current
+                            // loop depth, or remat forces it here:
+                            // place it at the current location.
+                            (
+                                self.value_to_elaborated_value.depth(),
+                                before,
+                                self.func.layout.inst_block(before).unwrap(),
+                            )
+                        } else {
+                            // Does not depend on any args at current
+                            // loop depth: hoist out of loop.
+                            self.stats.elaborate_licm_hoist += 1;
+                            let data = &self.loop_stack[loop_hoist_level];
+                            // `data.hoist_block` should dominate `before`'s block.
+                            let before_block = self.func.layout.inst_block(before).unwrap();
+                            debug_assert!(self.domtree.dominates(
+                                data.hoist_block,
+                                before_block,
+                                &self.func.layout
+                            ));
+                            // Determine the instruction at which we
+                            // insert in `data.hoist_block`.
+                            let before = self
+                                .func
+                                .layout
+                                .canonical_branch_inst(&self.func.dfg, data.hoist_block)
+                                .unwrap();
+                            (data.scope_depth as usize, before, data.hoist_block)
+                        };
+
+                    trace!(
+                        " -> decided to place: before {} insert_block {}",
+                        before,
+                        insert_block
+                    );
+
+                    //  Now we need to place `inst` at the computed
+                    //  location (just before `before`). Note that
+                    //  `inst` may already have been placed somewhere
+                    //  else, because a pure node may be elaborated at
+                    //  more than one place. In this case, we need to
+                    //  duplicate the instruction (and return the
+                    //  `Value`s for that duplicated instance
+                    //  instead).
+                    trace!("need inst {} before {}", inst, before);
+                    let inst = if self.func.layout.inst_block(inst).is_some() {
+                        // Clone the inst!
+                        let new_inst = self.func.dfg.clone_inst(inst);
+                        trace!(
+                            " -> inst {} already has a location; cloned to {}",
+                            inst,
+                            new_inst
+                        );
+                        // Create mappings in the
+                        // value-to-elab'd-value map from original
+                        // results to cloned results.
+                        for (&result, &new_result) in self
+                            .func
+                            .dfg
+                            .inst_results(inst)
+                            .iter()
+                            .zip(self.func.dfg.inst_results(new_inst).iter())
+                        {
+                            let elab_value = ElaboratedValue {
+                                value: new_result,
+                                in_block: insert_block,
+                            };
+                            self.value_to_elaborated_value.insert_if_absent_with_depth(
+                                result,
+                                elab_value,
+                                scope_depth,
+                            );
+
+                            self.eclasses.add(new_result);
+                            self.eclasses.union(result, new_result);
+                            self.value_to_best_value[new_result] = self.value_to_best_value[result];
+
+                            trace!(
+                                " -> cloned inst has new result {} for orig {}",
+                                new_result,
+                                result
+                            );
                         }
+                        new_inst
+                    } else {
+                        trace!(" -> no location; using original inst");
+                        // Create identity mappings from result values
+                        // to themselves in this scope, since we're
+                        // using the original inst.
+                        for &result in self.func.dfg.inst_results(inst) {
+                            let elab_value = ElaboratedValue {
+                                value: result,
+                                in_block: insert_block,
+                            };
+                            self.value_to_elaborated_value.insert_if_absent_with_depth(
+                                result,
+                                elab_value,
+                                scope_depth,
+                            );
+                            trace!(" -> inserting identity mapping for {}", result);
+                        }
+                        inst
                     };
-                    let values = values.as_slice(&self.func.dfg.value_lists);
-                    let value = values[index];
-                    self.func.dfg.fill_in_value_type(value, ty);
-                    let value = IdValue::Value {
-                        depth,
-                        block,
-                        value,
-                    };
-                    self.id_to_value.insert_if_absent(canonical, value.clone());
+                    // Place the inst just before `before`.
+                    self.func.layout.insert_inst(inst, before);
 
-                    self.elab_result_stack.push(value);
+                    // Update the inst's arguments.
+                    let args_dest = self.func.dfg.inst_args_mut(inst);
+                    for (dest, val) in args_dest.iter_mut().zip(arg_values.iter()) {
+                        *dest = val.value;
+                    }
+
+                    // Now that we've consumed the arg values, pop
+                    // them off the stack.
+                    self.elab_result_stack.truncate(arg_idx);
+
+                    // Push the requested result index of the
+                    // instruction onto the elab-results stack.
+                    self.elab_result_stack.push(ElaboratedValue {
+                        in_block: insert_block,
+                        value: self.func.dfg.inst_results(inst)[result_idx],
+                    });
                 }
             }
         }
     }
 
-    fn elaborate_block<'b, PF: Fn(Block) -> &'b [(Id, Type)], SEF: Fn(Block) -> &'b [Id]>(
-        &mut self,
-        idom: Option<Block>,
-        block: Block,
-        block_params_fn: &PF,
-        block_side_effects_fn: &SEF,
-    ) {
-        let blockparam_ids_tys = (block_params_fn)(block);
-        self.start_block(idom, block, blockparam_ids_tys);
-        for &id in (block_side_effects_fn)(block) {
-            self.elaborate_eclass_use(id);
+    fn elaborate_block(&mut self, idom: Option<Block>, block: Block) {
+        trace!("elaborate_block: block {}", block);
+        self.start_block(idom, block);
+
+        // Iterate over the side-effecting skeleton using the linked
+        // list in Layout. We will insert instructions that are
+        // elaborated *before* `inst`, so we can always use its
+        // next-link to continue the iteration.
+        let mut next_inst = self.func.layout.first_inst(block);
+        let mut first_branch = None;
+        while let Some(inst) = next_inst {
+            trace!(
+                "elaborating inst {} with results {:?}",
+                inst,
+                self.func.dfg.inst_results(inst)
+            );
+            // Record the first branch we see in the block; all
+            // elaboration for args of *any* branch must be inserted
+            // before the *first* branch, because the branch group
+            // must remain contiguous at the end of the block.
+            if self.func.dfg[inst].opcode().is_branch() && first_branch == None {
+                first_branch = Some(inst);
+            }
+
+            // Determine where elaboration inserts insts.
+            let before = first_branch.unwrap_or(inst);
+            trace!(" -> inserting before {}", before);
+
+            // For each arg of the inst, elaborate its value.
+            for i in 0..self.func.dfg.inst_args(inst).len() {
+                // Don't borrow across the below.
+                let arg = self.func.dfg.inst_args(inst)[i];
+                trace!(" -> arg {}", arg);
+                // Elaborate the arg, placing any newly-inserted insts
+                // before `before`. Get the updated value, which may
+                // be different than the original.
+                let arg = self.elaborate_eclass_use(arg, before);
+                trace!("   -> rewrote arg to {:?}", arg);
+                self.func.dfg.inst_args_mut(inst)[i] = arg.value;
+            }
+
+            // We need to put the results of this instruction in the
+            // map now.
+            for &result in self.func.dfg.inst_results(inst) {
+                trace!(" -> result {}", result);
+                self.value_to_elaborated_value.insert_if_absent(
+                    result,
+                    ElaboratedValue {
+                        in_block: block,
+                        value: result,
+                    },
+                );
+            }
+
+            next_inst = self.func.layout.next_inst(inst);
         }
     }
 
-    fn elaborate_domtree<'b, PF: Fn(Block) -> &'b [(Id, Type)], SEF: Fn(Block) -> &'b [Id]>(
-        &mut self,
-        block_params_fn: &PF,
-        block_side_effects_fn: &SEF,
-        domtree: &DomTreeWithChildren,
-    ) {
+    fn elaborate_domtree(&mut self, domtree: &DomTreeWithChildren) {
         let root = domtree.root();
         self.block_stack.push(BlockStackEntry::Elaborate {
             block: root,
@@ -571,9 +646,9 @@ impl<'a> Elaborator<'a> {
             match top {
                 BlockStackEntry::Elaborate { block, idom } => {
                     self.block_stack.push(BlockStackEntry::Pop);
-                    self.id_to_value.increment_depth();
+                    self.value_to_elaborated_value.increment_depth();
 
-                    self.elaborate_block(idom, block, block_params_fn, block_side_effects_fn);
+                    self.elaborate_block(idom, block);
 
                     // Push children. We are doing a preorder
                     // traversal so we do this after processing this
@@ -592,39 +667,17 @@ impl<'a> Elaborator<'a> {
                     self.block_stack[block_stack_end..].reverse();
                 }
                 BlockStackEntry::Pop => {
-                    self.id_to_value.decrement_depth();
-                    if let Some(innermost_loop) = self.loop_stack.last() {
-                        if innermost_loop.scope_depth as usize == self.id_to_value.depth() {
-                            self.loop_stack.pop();
-                        }
-                    }
+                    self.value_to_elaborated_value.decrement_depth();
                 }
             }
         }
     }
 
-    fn clear_func_body(&mut self) {
-        // Clear all instructions and args/results from the DFG. We
-        // rebuild them entirely during elaboration. (TODO: reuse the
-        // existing inst for the *first* copy of a given node.)
-        self.func.dfg.clear_insts();
-        // Clear the instructions in every block, but leave the list
-        // of blocks and their layout unmodified.
-        self.func.layout.clear_insts();
-        self.func.srclocs.clear();
-    }
-
-    pub(crate) fn elaborate<'b, PF: Fn(Block) -> &'b [(Id, Type)], SEF: Fn(Block) -> &'b [Id]>(
-        &mut self,
-        block_params_fn: PF,
-        block_side_effects_fn: SEF,
-    ) {
-        let domtree = DomTreeWithChildren::new(self.func, self.domtree);
+    pub(crate) fn elaborate(&mut self) {
         self.stats.elaborate_func += 1;
         self.stats.elaborate_func_pre_insts += self.func.dfg.num_insts() as u64;
-        self.clear_func_body();
-        self.compute_best_nodes();
-        self.elaborate_domtree(&block_params_fn, &block_side_effects_fn, &domtree);
+        self.compute_best_values();
+        self.elaborate_domtree(&self.domtree_children);
         self.stats.elaborate_func_post_insts += self.func.dfg.num_insts() as u64;
     }
 }
diff --git a/cranelift/codegen/src/egraph/node.rs b/cranelift/codegen/src/egraph/node.rs
deleted file mode 100644
index 01d8e4128c..0000000000
--- a/cranelift/codegen/src/egraph/node.rs
+++ /dev/null
@@ -1,366 +0,0 @@
-//! Node definition for EGraph representation.
-
-use super::PackedMemoryState;
-use crate::ir::{Block, DataFlowGraph, InstructionImms, Opcode, RelSourceLoc, Type};
-use crate::loop_analysis::LoopLevel;
-use cranelift_egraph::{CtxEq, CtxHash, Id, Language, UnionFind};
-use cranelift_entity::{EntityList, ListPool};
-use std::hash::{Hash, Hasher};
-
-#[derive(Debug)]
-pub enum Node {
-    /// A blockparam. Effectively an input/root; does not refer to
-    /// predecessors' branch arguments, because this would create
-    /// cycles.
-    Param {
-        /// CLIF block this param comes from.
-        block: Block,
-        /// Index of blockparam within block.
-        index: u32,
-        /// Type of the value.
-        ty: Type,
-        /// The loop level of this Param.
-        loop_level: LoopLevel,
-    },
-    /// A CLIF instruction that is pure (has no side-effects). Not
-    /// tied to any location; we will compute a set of locations at
-    /// which to compute this node during lowering back out of the
-    /// egraph.
-    Pure {
-        /// The instruction data, without SSA values.
-        op: InstructionImms,
-        /// eclass arguments to the operator.
-        args: EntityList<Id>,
-        /// Type of result, if one.
-        ty: Type,
-        /// Number of results.
-        arity: u16,
-    },
-    /// A CLIF instruction that has side-effects or is otherwise not
-    /// representable by `Pure`.
-    Inst {
-        /// The instruction data, without SSA values.
-        op: InstructionImms,
-        /// eclass arguments to the operator.
-        args: EntityList<Id>,
-        /// Type of result, if one.
-        ty: Type,
-        /// Number of results.
-        arity: u16,
-        /// The source location to preserve.
-        srcloc: RelSourceLoc,
-        /// The loop level of this Inst.
-        loop_level: LoopLevel,
-    },
-    /// A projection of one result of an `Inst` or `Pure`.
-    Result {
-        /// `Inst` or `Pure` node.
-        value: Id,
-        /// Index of the result we want.
-        result: usize,
-        /// Type of the value.
-        ty: Type,
-    },
-
-    /// A load instruction. Nominally a side-effecting `Inst` (and
-    /// included in the list of side-effecting roots so it will always
-    /// be elaborated), but represented as a distinct kind of node so
-    /// that we can leverage deduplication to do
-    /// redundant-load-elimination for free (and make store-to-load
-    /// forwarding much easier).
-    Load {
-        // -- identity depends on:
-        /// The original load operation. Must have one argument, the
-        /// address.
-        op: InstructionImms,
-        /// The type of the load result.
-        ty: Type,
-        /// Address argument. Actual address has an offset, which is
-        /// included in `op` (and thus already considered as part of
-        /// the key).
-        addr: Id,
-        /// The abstract memory state that this load accesses.
-        mem_state: PackedMemoryState,
-
-        // -- not included in dedup key:
-        /// Source location, for traps. Not included in Eq/Hash.
-        srcloc: RelSourceLoc,
-    },
-}
-
-impl Node {
-    pub(crate) fn is_non_pure(&self) -> bool {
-        match self {
-            Node::Inst { .. } | Node::Load { .. } => true,
-            _ => false,
-        }
-    }
-}
-
-/// Shared pools for type and id lists in nodes.
-pub struct NodeCtx {
-    /// Arena for arg eclass-ID lists.
-    pub args: ListPool<Id>,
-}
-
-impl NodeCtx {
-    pub(crate) fn with_capacity_for_dfg(dfg: &DataFlowGraph) -> Self {
-        let n_args = dfg.value_lists.capacity();
-        Self {
-            args: ListPool::with_capacity(n_args),
-        }
-    }
-}
-
-impl NodeCtx {
-    fn ids_eq(&self, a: &EntityList<Id>, b: &EntityList<Id>, uf: &mut UnionFind) -> bool {
-        let a = a.as_slice(&self.args);
-        let b = b.as_slice(&self.args);
-        a.len() == b.len() && a.iter().zip(b.iter()).all(|(&a, &b)| uf.equiv_id_mut(a, b))
-    }
-
-    fn hash_ids<H: Hasher>(&self, a: &EntityList<Id>, hash: &mut H, uf: &mut UnionFind) {
-        let a = a.as_slice(&self.args);
-        for &id in a {
-            uf.hash_id_mut(hash, id);
-        }
-    }
-}
-
-impl CtxEq<Node, Node> for NodeCtx {
-    fn ctx_eq(&self, a: &Node, b: &Node, uf: &mut UnionFind) -> bool {
-        match (a, b) {
-            (
-                &Node::Param {
-                    block,
-                    index,
-                    ty,
-                    loop_level: _,
-                },
-                &Node::Param {
-                    block: other_block,
-                    index: other_index,
-                    ty: other_ty,
-                    loop_level: _,
-                },
-            ) => block == other_block && index == other_index && ty == other_ty,
-            (
-                &Node::Result { value, result, ty },
-                &Node::Result {
-                    value: other_value,
-                    result: other_result,
-                    ty: other_ty,
-                },
-            ) => uf.equiv_id_mut(value, other_value) && result == other_result && ty == other_ty,
-            (
-                &Node::Pure {
-                    ref op,
-                    ref args,
-                    ty,
-                    arity: _,
-                },
-                &Node::Pure {
-                    op: ref other_op,
-                    args: ref other_args,
-                    ty: other_ty,
-                    arity: _,
-                },
-            ) => *op == *other_op && self.ids_eq(args, other_args, uf) && ty == other_ty,
-            (
-                &Node::Inst { ref args, .. },
-                &Node::Inst {
-                    args: ref other_args,
-                    ..
-                },
-            ) => self.ids_eq(args, other_args, uf),
-            (
-                &Node::Load {
-                    ref op,
-                    ty,
-                    addr,
-                    mem_state,
-                    ..
-                },
-                &Node::Load {
-                    op: ref other_op,
-                    ty: other_ty,
-                    addr: other_addr,
-                    mem_state: other_mem_state,
-                    // Explicitly exclude: `inst` and `srcloc`. We
-                    // want loads to merge if identical in
-                    // opcode/offset, address expression, and last
-                    // store (this does implicit
-                    // redundant-load-elimination.)
-                    //
-                    // Note however that we *do* include `ty` (the
-                    // type) and match on that: we otherwise would
-                    // have no way of disambiguating loads of
-                    // different widths to the same address.
-                    ..
-                },
-            ) => {
-                op == other_op
-                    && ty == other_ty
-                    && uf.equiv_id_mut(addr, other_addr)
-                    && mem_state == other_mem_state
-            }
-            _ => false,
-        }
-    }
-}
-
-impl CtxHash<Node> for NodeCtx {
-    fn ctx_hash(&self, value: &Node, uf: &mut UnionFind) -> u64 {
-        let mut state = crate::fx::FxHasher::default();
-        std::mem::discriminant(value).hash(&mut state);
-        match value {
-            &Node::Param {
-                block,
-                index,
-                ty: _,
-                loop_level: _,
-            } => {
-                block.hash(&mut state);
-                index.hash(&mut state);
-            }
-            &Node::Result {
-                value,
-                result,
-                ty: _,
-            } => {
-                uf.hash_id_mut(&mut state, value);
-                result.hash(&mut state);
-            }
-            &Node::Pure {
-                ref op,
-                ref args,
-                ty,
-                arity: _,
-            } => {
-                op.hash(&mut state);
-                self.hash_ids(args, &mut state, uf);
-                ty.hash(&mut state);
-            }
-            &Node::Inst { ref args, .. } => {
-                self.hash_ids(args, &mut state, uf);
-            }
-            &Node::Load {
-                ref op,
-                ty,
-                addr,
-                mem_state,
-                ..
-            } => {
-                op.hash(&mut state);
-                ty.hash(&mut state);
-                uf.hash_id_mut(&mut state, addr);
-                mem_state.hash(&mut state);
-            }
-        }
-
-        state.finish()
-    }
-}
-
-#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
-pub(crate) struct Cost(u32);
-impl Cost {
-    pub(crate) fn at_level(&self, loop_level: LoopLevel) -> Cost {
-        let loop_level = std::cmp::min(2, loop_level.level());
-        let multiplier = 1u32 << ((10 * loop_level) as u32);
-        Cost(self.0.saturating_mul(multiplier)).finite()
-    }
-
-    pub(crate) fn infinity() -> Cost {
-        // 2^32 - 1 is, uh, pretty close to infinite... (we use `Cost`
-        // only for heuristics and always saturate so this suffices!)
-        Cost(u32::MAX)
-    }
-
-    pub(crate) fn zero() -> Cost {
-        Cost(0)
-    }
-
-    /// Clamp this cost at a "finite" value. Can be used in
-    /// conjunction with saturating ops to avoid saturating into
-    /// `infinity()`.
-    fn finite(self) -> Cost {
-        Cost(std::cmp::min(u32::MAX - 1, self.0))
-    }
-}
-
-impl std::default::Default for Cost {
-    fn default() -> Cost {
-        Cost::zero()
-    }
-}
-
-impl std::ops::Add<Cost> for Cost {
-    type Output = Cost;
-    fn add(self, other: Cost) -> Cost {
-        Cost(self.0.saturating_add(other.0)).finite()
-    }
-}
-
-pub(crate) fn op_cost(op: &InstructionImms) -> Cost {
-    match op.opcode() {
-        // Constants.
-        Opcode::Iconst | Opcode::F32const | Opcode::F64const => Cost(0),
-        // Extends/reduces.
-        Opcode::Uextend | Opcode::Sextend | Opcode::Ireduce | Opcode::Iconcat | Opcode::Isplit => {
-            Cost(1)
-        }
-        // "Simple" arithmetic.
-        Opcode::Iadd
-        | Opcode::Isub
-        | Opcode::Band
-        | Opcode::BandNot
-        | Opcode::Bor
-        | Opcode::BorNot
-        | Opcode::Bxor
-        | Opcode::BxorNot
-        | Opcode::Bnot => Cost(2),
-        // Everything else.
-        _ => Cost(3),
-    }
-}
-
-impl Language for NodeCtx {
-    type Node = Node;
-
-    fn children<'a>(&'a self, node: &'a Node) -> &'a [Id] {
-        match node {
-            Node::Param { .. } => &[],
-            Node::Pure { args, .. } | Node::Inst { args, .. } => args.as_slice(&self.args),
-            Node::Load { addr, .. } => std::slice::from_ref(addr),
-            Node::Result { value, .. } => std::slice::from_ref(value),
-        }
-    }
-
-    fn children_mut<'a>(&'a mut self, node: &'a mut Node) -> &'a mut [Id] {
-        match node {
-            Node::Param { .. } => &mut [],
-            Node::Pure { args, .. } | Node::Inst { args, .. } => args.as_mut_slice(&mut self.args),
-            Node::Load { addr, .. } => std::slice::from_mut(addr),
-            Node::Result { value, .. } => std::slice::from_mut(value),
-        }
-    }
-
-    fn needs_dedup(&self, node: &Node) -> bool {
-        match node {
-            Node::Pure { .. } | Node::Load { .. } => true,
-            _ => false,
-        }
-    }
-}
-
-#[cfg(test)]
-mod test {
-    #[test]
-    #[cfg(target_pointer_width = "64")]
-    fn node_size() {
-        use super::*;
-        assert_eq!(std::mem::size_of::<InstructionImms>(), 16);
-        assert_eq!(std::mem::size_of::<Node>(), 32);
-    }
-}
diff --git a/cranelift/codegen/src/egraph/stores.rs b/cranelift/codegen/src/egraph/stores.rs
deleted file mode 100644
index 9746eba159..0000000000
--- a/cranelift/codegen/src/egraph/stores.rs
+++ /dev/null
@@ -1,293 +0,0 @@
-//! Last-store tracking via alias analysis.
-//!
-//! We partition memory state into several *disjoint pieces* of
-//! "abstract state". There are a finite number of such pieces:
-//! currently, we call them "heap", "table", "vmctx", and "other". Any
-//! given address in memory belongs to exactly one disjoint piece.
-//!
-//! One never tracks which piece a concrete address belongs to at
-//! runtime; this is a purely static concept. Instead, all
-//! memory-accessing instructions (loads and stores) are labeled with
-//! one of these four categories in the `MemFlags`. It is forbidden
-//! for a load or store to access memory under one category and a
-//! later load or store to access the same memory under a different
-//! category. This is ensured to be true by construction during
-//! frontend translation into CLIF and during legalization.
-//!
-//! Given that this non-aliasing property is ensured by the producer
-//! of CLIF, we can compute a *may-alias* property: one load or store
-//! may-alias another load or store if both access the same category
-//! of abstract state.
-//!
-//! The "last store" pass helps to compute this aliasing: we perform a
-//! fixpoint analysis to track the last instruction that *might have*
-//! written to a given part of abstract state. We also track the block
-//! containing this store.
-//!
-//! We can't say for sure that the "last store" *did* actually write
-//! that state, but we know for sure that no instruction *later* than
-//! it (up to the current instruction) did. However, we can get a
-//! must-alias property from this: if at a given load or store, we
-//! look backward to the "last store", *AND* we find that it has
-//! exactly the same address expression and value type, then we know
-//! that the current instruction's access *must* be to the same memory
-//! location.
-//!
-//! To get this must-alias property, we leverage the node
-//! hashconsing. We design the Eq/Hash (node identity relation
-//! definition) of the `Node` struct so that all loads with (i) the
-//! same "last store", and (ii) the same address expression, and (iii)
-//! the same opcode-and-offset, will deduplicate (the first will be
-//! computed, and the later ones will use the same value). Furthermore
-//! we have an optimization that rewrites a load into the stored value
-//! of the last store *if* the last store has the same address
-//! expression and constant offset.
-//!
-//! This gives us two optimizations, "redundant load elimination" and
-//! "store-to-load forwarding".
-//!
-//! In theory we could also do *dead-store elimination*, where if a
-//! store overwrites a value earlier written by another store, *and*
-//! if no other load/store to the abstract state category occurred,
-//! *and* no other trapping instruction occurred (at which point we
-//! need an up-to-date memory state because post-trap-termination
-//! memory state can be observed), *and* we can prove the original
-//! store could not have trapped, then we can eliminate the original
-//! store. Because this is so complex, and the conditions for doing it
-//! correctly when post-trap state must be correct likely reduce the
-//! potential benefit, we don't yet do this.
-
-use crate::flowgraph::ControlFlowGraph;
-use crate::fx::{FxHashMap, FxHashSet};
-use crate::inst_predicates::has_memory_fence_semantics;
-use crate::ir::{Block, Function, Inst, InstructionData, MemFlags, Opcode};
-use crate::trace;
-use cranelift_entity::{EntityRef, SecondaryMap};
-use smallvec::{smallvec, SmallVec};
-
-/// For a given program point, the vector of last-store instruction
-/// indices for each disjoint category of abstract state.
-#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
-struct LastStores {
-    heap: MemoryState,
-    table: MemoryState,
-    vmctx: MemoryState,
-    other: MemoryState,
-}
-
-/// State of memory seen by a load.
-#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Default)]
-pub enum MemoryState {
-    /// State at function entry: nothing is known (but it is one
-    /// consistent value, so two loads from "entry" state at the same
-    /// address will still provide the same result).
-    #[default]
-    Entry,
-    /// State just after a store by the given instruction. The
-    /// instruction is a store from which we can forward.
-    Store(Inst),
-    /// State just before the given instruction. Used for abstract
-    /// value merges at merge-points when we cannot name a single
-    /// producing site.
-    BeforeInst(Inst),
-    /// State just after the given instruction. Used when the
-    /// instruction may update the associated state, but is not a
-    /// store whose value we can cleanly forward. (E.g., perhaps a
-    /// barrier of some sort.)
-    AfterInst(Inst),
-}
-
-/// Memory state index, packed into a u32.
-#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
-pub struct PackedMemoryState(u32);
-
-impl From<MemoryState> for PackedMemoryState {
-    fn from(state: MemoryState) -> Self {
-        match state {
-            MemoryState::Entry => Self(0),
-            MemoryState::Store(i) => Self(1 | (i.index() as u32) << 2),
-            MemoryState::BeforeInst(i) => Self(2 | (i.index() as u32) << 2),
-            MemoryState::AfterInst(i) => Self(3 | (i.index() as u32) << 2),
-        }
-    }
-}
-
-impl PackedMemoryState {
-    /// Does this memory state refer to a specific store instruction?
-    pub fn as_store(&self) -> Option<Inst> {
-        if self.0 & 3 == 1 {
-            Some(Inst::from_bits(self.0 >> 2))
-        } else {
-            None
-        }
-    }
-}
-
-impl LastStores {
-    fn update(&mut self, func: &Function, inst: Inst) {
-        let opcode = func.dfg[inst].opcode();
-        if has_memory_fence_semantics(opcode) {
-            self.heap = MemoryState::AfterInst(inst);
-            self.table = MemoryState::AfterInst(inst);
-            self.vmctx = MemoryState::AfterInst(inst);
-            self.other = MemoryState::AfterInst(inst);
-        } else if opcode.can_store() {
-            if let Some(memflags) = func.dfg[inst].memflags() {
-                *self.for_flags(memflags) = MemoryState::Store(inst);
-            } else {
-                self.heap = MemoryState::AfterInst(inst);
-                self.table = MemoryState::AfterInst(inst);
-                self.vmctx = MemoryState::AfterInst(inst);
-                self.other = MemoryState::AfterInst(inst);
-            }
-        }
-    }
-
-    fn for_flags(&mut self, memflags: MemFlags) -> &mut MemoryState {
-        if memflags.heap() {
-            &mut self.heap
-        } else if memflags.table() {
-            &mut self.table
-        } else if memflags.vmctx() {
-            &mut self.vmctx
-        } else {
-            &mut self.other
-        }
-    }
-
-    fn meet_from(&mut self, other: &LastStores, loc: Inst) {
-        let meet = |a: MemoryState, b: MemoryState| -> MemoryState {
-            match (a, b) {
-                (a, b) if a == b => a,
-                _ => MemoryState::BeforeInst(loc),
-            }
-        };
-
-        self.heap = meet(self.heap, other.heap);
-        self.table = meet(self.table, other.table);
-        self.vmctx = meet(self.vmctx, other.vmctx);
-        self.other = meet(self.other, other.other);
-    }
-}
-
-/// An alias-analysis pass.
-pub struct AliasAnalysis {
-    /// Last-store instruction (or none) for a given load. Use a hash map
-    /// instead of a `SecondaryMap` because this is sparse.
-    load_mem_state: FxHashMap<Inst, PackedMemoryState>,
-}
-
-impl AliasAnalysis {
-    /// Perform an alias analysis pass.
-    pub fn new(func: &Function, cfg: &ControlFlowGraph) -> AliasAnalysis {
-        log::trace!("alias analysis: input is:\n{:?}", func);
-        let block_input = Self::compute_block_input_states(func, cfg);
-        let load_mem_state = Self::compute_load_last_stores(func, block_input);
-        AliasAnalysis { load_mem_state }
-    }
-
-    fn compute_block_input_states(
-        func: &Function,
-        cfg: &ControlFlowGraph,
-    ) -> SecondaryMap<Block, Option<LastStores>> {
-        let mut block_input = SecondaryMap::with_capacity(func.dfg.num_blocks());
-        let mut worklist: SmallVec<[Block; 16]> = smallvec![];
-        let mut worklist_set = FxHashSet::default();
-        let entry = func.layout.entry_block().unwrap();
-        worklist.push(entry);
-        worklist_set.insert(entry);
-        block_input[entry] = Some(LastStores::default());
-
-        while let Some(block) = worklist.pop() {
-            worklist_set.remove(&block);
-            let state = block_input[block].clone().unwrap();
-
-            trace!("alias analysis: input to {} is {:?}", block, state);
-
-            let state = func
-                .layout
-                .block_insts(block)
-                .fold(state, |mut state, inst| {
-                    state.update(func, inst);
-                    trace!("after {}: state is {:?}", inst, state);
-                    state
-                });
-
-            for succ in cfg.succ_iter(block) {
-                let succ_first_inst = func.layout.first_inst(succ).unwrap();
-                let succ_state = &mut block_input[succ];
-                let old = succ_state.clone();
-                if let Some(succ_state) = succ_state.as_mut() {
-                    succ_state.meet_from(&state, succ_first_inst);
-                } else {
-                    *succ_state = Some(state);
-                };
-                let updated = *succ_state != old;
-
-                if updated && worklist_set.insert(succ) {
-                    worklist.push(succ);
-                }
-            }
-        }
-
-        block_input
-    }
-
-    fn compute_load_last_stores(
-        func: &Function,
-        block_input: SecondaryMap<Block, Option<LastStores>>,
-    ) -> FxHashMap<Inst, PackedMemoryState> {
-        let mut load_mem_state = FxHashMap::default();
-        load_mem_state.reserve(func.dfg.num_insts() / 8);
-
-        for block in func.layout.blocks() {
-            let mut state = block_input[block].clone().unwrap();
-
-            for inst in func.layout.block_insts(block) {
-                trace!(
-                    "alias analysis: scanning at {} with state {:?} ({:?})",
-                    inst,
-                    state,
-                    func.dfg[inst],
-                );
-
-                // N.B.: we match `Load` specifically, and not any
-                // other kinds of loads (or any opcode such that
-                // `opcode.can_load()` returns true), because some
-                // "can load" instructions actually have very
-                // different semantics (are not just a load of a
-                // particularly-typed value). For example, atomic
-                // (load/store, RMW, CAS) instructions "can load" but
-                // definitely should not participate in store-to-load
-                // forwarding or redundant-load elimination. Our goal
-                // here is to provide a `MemoryState` just for plain
-                // old loads whose semantics we can completely reason
-                // about.
-                if let InstructionData::Load {
-                    opcode: Opcode::Load,
-                    flags,
-                    ..
-                } = func.dfg[inst]
-                {
-                    let mem_state = *state.for_flags(flags);
-                    trace!(
-                        "alias analysis: at {}: load with mem_state {:?}",
-                        inst,
-                        mem_state,
-                    );
-
-                    load_mem_state.insert(inst, mem_state.into());
-                }
-
-                state.update(func, inst);
-            }
-        }
-
-        load_mem_state
-    }
-
-    /// Get the state seen by a load, if any.
-    pub fn get_state_for_load(&self, inst: Inst) -> Option<PackedMemoryState> {
-        self.load_mem_state.get(&inst).copied()
-    }
-}
diff --git a/cranelift/codegen/src/inst_predicates.rs b/cranelift/codegen/src/inst_predicates.rs
index 76245722f5..b67f110137 100644
--- a/cranelift/codegen/src/inst_predicates.rs
+++ b/cranelift/codegen/src/inst_predicates.rs
@@ -45,6 +45,35 @@ pub fn has_side_effect(func: &Function, inst: Inst) -> bool {
     trivially_has_side_effects(opcode) || is_load_with_defined_trapping(opcode, data)
 }
 
+/// Does the given instruction behave as a "pure" node with respect to
+/// aegraph semantics?
+///
+/// - Actual pure nodes (arithmetic, etc)
+/// - Loads with the `readonly` flag set
+pub fn is_pure_for_egraph(func: &Function, inst: Inst) -> bool {
+    let is_readonly_load = match func.dfg[inst] {
+        InstructionData::Load {
+            opcode: Opcode::Load,
+            flags,
+            ..
+        } => flags.readonly() && flags.notrap(),
+        _ => false,
+    };
+    // Multi-value results do not play nicely with much of the egraph
+    // infrastructure. They are in practice used only for multi-return
+    // calls and some other odd instructions (e.g. iadd_cout) which,
+    // for now, we can afford to leave in place as opaque
+    // side-effecting ops. So if more than one result, then the inst
+    // is "not pure". Similarly, ops with zero results can be used
+    // only for their side-effects, so are never pure. (Or if they
+    // are, we can always trivially eliminate them with no effect.)
+    let has_one_result = func.dfg.inst_results(inst).len() == 1;
+
+    let op = func.dfg[inst].opcode();
+
+    has_one_result && (is_readonly_load || (!op.can_load() && !trivially_has_side_effects(op)))
+}
+
 /// Does the given instruction have any side-effect as per [has_side_effect], or else is a load,
 /// but not the get_pinned_reg opcode?
 pub fn has_lowering_side_effect(func: &Function, inst: Inst) -> bool {
diff --git a/cranelift/codegen/src/ir/dfg.rs b/cranelift/codegen/src/ir/dfg.rs
index 48eb4fa910..877fc2cb83 100644
--- a/cranelift/codegen/src/ir/dfg.rs
+++ b/cranelift/codegen/src/ir/dfg.rs
@@ -125,23 +125,6 @@ impl DataFlowGraph {
         self.immediates.clear();
     }
 
-    /// Clear all instructions, but keep blocks and other metadata
-    /// (signatures, constants, immediates). Everything to do with
-    /// `Value`s is cleared, including block params and debug info.
-    ///
-    /// Used during egraph-based optimization to clear out the pre-opt
-    /// body so that we can regenerate it from the egraph.
-    pub(crate) fn clear_insts(&mut self) {
-        self.insts.clear();
-        self.results.clear();
-        self.value_lists.clear();
-        self.values.clear();
-        self.values_labels = None;
-        for block in self.blocks.values_mut() {
-            block.params = ValueList::new();
-        }
-    }
-
     /// Get the total number of instructions created in this function, whether they are currently
     /// inserted in the layout or not.
     ///
@@ -173,6 +156,11 @@ impl DataFlowGraph {
         self.values.len()
     }
 
+    /// Get an iterator over all values and their definitions.
+    pub fn values_and_defs(&self) -> impl Iterator<Item = (Value, ValueDef)> + '_ {
+        self.values().map(|value| (value, self.value_def(value)))
+    }
+
     /// Starts collection of debug information.
     pub fn collect_debug_info(&mut self) {
         if self.values_labels.is_none() {
@@ -279,12 +267,6 @@ impl DataFlowGraph {
         self.values[v].ty()
     }
 
-    /// Fill in the type of a value, only if currently invalid (as a placeholder).
-    pub(crate) fn fill_in_value_type(&mut self, v: Value, ty: Type) {
-        debug_assert!(self.values[v].ty().is_invalid() || self.values[v].ty() == ty);
-        self.values[v].set_type(ty);
-    }
-
     /// Get the definition of a value.
     ///
     /// This is either the instruction that defined it or the Block that has the value as an
@@ -298,6 +280,7 @@ impl DataFlowGraph {
                 // detect alias loops without overrunning the stack.
                 self.value_def(self.resolve_aliases(original))
             }
+            ValueData::Union { x, y, .. } => ValueDef::Union(x, y),
         }
     }
 
@@ -313,6 +296,7 @@ impl DataFlowGraph {
             Inst { inst, num, .. } => Some(&v) == self.inst_results(inst).get(num as usize),
             Param { block, num, .. } => Some(&v) == self.block_params(block).get(num as usize),
             Alias { .. } => false,
+            Union { .. } => false,
         }
     }
 
@@ -422,6 +406,8 @@ pub enum ValueDef {
     Result(Inst, usize),
     /// Value is the n'th parameter to a block.
     Param(Block, usize),
+    /// Value is a union of two other values.
+    Union(Value, Value),
 }
 
 impl ValueDef {
@@ -458,6 +444,7 @@ impl ValueDef {
     pub fn num(self) -> usize {
         match self {
             Self::Result(_, n) | Self::Param(_, n) => n,
+            Self::Union(_, _) => 0,
         }
     }
 }
@@ -476,6 +463,11 @@ enum ValueData {
     /// An alias value can't be linked as an instruction result or block parameter. It is used as a
     /// placeholder when the original instruction or block has been rewritten or modified.
     Alias { ty: Type, original: Value },
+
+    /// Union is a "fork" in representation: the value can be
+    /// represented as either of the values named here. This is used
+    /// for aegraph (acyclic egraph) representation in the DFG.
+    Union { ty: Type, x: Value, y: Value },
 }
 
 /// Bit-packed version of ValueData, for efficiency.
@@ -483,40 +475,71 @@ enum ValueData {
 /// Layout:
 ///
 /// ```plain
-///        | tag:2 |  type:14        |    num:16       | index:32          |
+///        | tag:2 |  type:14        |    x:24       | y:24          |
+///
+/// Inst       00     ty               inst output     inst index
+/// Param      01     ty               blockparam num  block index
+/// Alias      10     ty               0               value index
+/// Union      11     ty               first value     second value
 /// ```
 #[derive(Clone, Copy, Debug, PartialEq, Hash)]
 #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 struct ValueDataPacked(u64);
 
+/// Encodes a value in 0..2^32 into 0..2^n, where n is less than 32
+/// (and is implied by `mask`), by translating 2^32-1 (0xffffffff)
+/// into 2^n-1 and panic'ing on 2^n..2^32-1.
+fn encode_narrow_field(x: u32, bits: u8) -> u32 {
+    if x == 0xffff_ffff {
+        (1 << bits) - 1
+    } else {
+        debug_assert!(x < (1 << bits));
+        x
+    }
+}
+
+/// The inverse of the above `encode_narrow_field`: unpacks 2^n-1 into
+/// 2^32-1.
+fn decode_narrow_field(x: u32, bits: u8) -> u32 {
+    if x == (1 << bits) - 1 {
+        0xffff_ffff
+    } else {
+        x
+    }
+}
+
 impl ValueDataPacked {
-    const INDEX_SHIFT: u64 = 0;
-    const INDEX_BITS: u64 = 32;
-    const NUM_SHIFT: u64 = Self::INDEX_SHIFT + Self::INDEX_BITS;
-    const NUM_BITS: u64 = 16;
-    const TYPE_SHIFT: u64 = Self::NUM_SHIFT + Self::NUM_BITS;
-    const TYPE_BITS: u64 = 14;
-    const TAG_SHIFT: u64 = Self::TYPE_SHIFT + Self::TYPE_BITS;
-    const TAG_BITS: u64 = 2;
+    const Y_SHIFT: u8 = 0;
+    const Y_BITS: u8 = 24;
+    const X_SHIFT: u8 = Self::Y_SHIFT + Self::Y_BITS;
+    const X_BITS: u8 = 24;
+    const TYPE_SHIFT: u8 = Self::X_SHIFT + Self::X_BITS;
+    const TYPE_BITS: u8 = 14;
+    const TAG_SHIFT: u8 = Self::TYPE_SHIFT + Self::TYPE_BITS;
+    const TAG_BITS: u8 = 2;
 
-    const TAG_INST: u64 = 1;
-    const TAG_PARAM: u64 = 2;
-    const TAG_ALIAS: u64 = 3;
+    const TAG_INST: u64 = 0;
+    const TAG_PARAM: u64 = 1;
+    const TAG_ALIAS: u64 = 2;
+    const TAG_UNION: u64 = 3;
 
-    fn make(tag: u64, ty: Type, num: u16, index: u32) -> ValueDataPacked {
+    fn make(tag: u64, ty: Type, x: u32, y: u32) -> ValueDataPacked {
         debug_assert!(tag < (1 << Self::TAG_BITS));
         debug_assert!(ty.repr() < (1 << Self::TYPE_BITS));
 
+        let x = encode_narrow_field(x, Self::X_BITS);
+        let y = encode_narrow_field(y, Self::Y_BITS);
+
         ValueDataPacked(
             (tag << Self::TAG_SHIFT)
                 | ((ty.repr() as u64) << Self::TYPE_SHIFT)
-                | ((num as u64) << Self::NUM_SHIFT)
-                | ((index as u64) << Self::INDEX_SHIFT),
+                | ((x as u64) << Self::X_SHIFT)
+                | ((y as u64) << Self::Y_SHIFT),
         )
     }
 
     #[inline(always)]
-    fn field(self, shift: u64, bits: u64) -> u64 {
+    fn field(self, shift: u8, bits: u8) -> u64 {
         (self.0 >> shift) & ((1 << bits) - 1)
     }
 
@@ -537,14 +560,17 @@ impl From<ValueData> for ValueDataPacked {
     fn from(data: ValueData) -> Self {
         match data {
             ValueData::Inst { ty, num, inst } => {
-                Self::make(Self::TAG_INST, ty, num, inst.as_bits())
+                Self::make(Self::TAG_INST, ty, num.into(), inst.as_bits())
             }
             ValueData::Param { ty, num, block } => {
-                Self::make(Self::TAG_PARAM, ty, num, block.as_bits())
+                Self::make(Self::TAG_PARAM, ty, num.into(), block.as_bits())
             }
             ValueData::Alias { ty, original } => {
                 Self::make(Self::TAG_ALIAS, ty, 0, original.as_bits())
             }
+            ValueData::Union { ty, x, y } => {
+                Self::make(Self::TAG_ALIAS, ty, x.as_bits(), y.as_bits())
+            }
         }
     }
 }
@@ -552,25 +578,33 @@ impl From<ValueData> for ValueDataPacked {
 impl From<ValueDataPacked> for ValueData {
     fn from(data: ValueDataPacked) -> Self {
         let tag = data.field(ValueDataPacked::TAG_SHIFT, ValueDataPacked::TAG_BITS);
-        let ty = data.field(ValueDataPacked::TYPE_SHIFT, ValueDataPacked::TYPE_BITS) as u16;
-        let num = data.field(ValueDataPacked::NUM_SHIFT, ValueDataPacked::NUM_BITS) as u16;
-        let index = data.field(ValueDataPacked::INDEX_SHIFT, ValueDataPacked::INDEX_BITS) as u32;
+        let ty = u16::try_from(data.field(ValueDataPacked::TYPE_SHIFT, ValueDataPacked::TYPE_BITS))
+            .expect("Mask should ensure result fits in a u16");
+        let x = u32::try_from(data.field(ValueDataPacked::X_SHIFT, ValueDataPacked::X_BITS))
+            .expect("Mask should ensure result fits in a u32");
+        let y = u32::try_from(data.field(ValueDataPacked::Y_SHIFT, ValueDataPacked::Y_BITS))
+            .expect("Mask should ensure result fits in a u32");
 
         let ty = Type::from_repr(ty);
         match tag {
             ValueDataPacked::TAG_INST => ValueData::Inst {
                 ty,
-                num,
-                inst: Inst::from_bits(index),
+                num: u16::try_from(x).expect("Inst result num should fit in u16"),
+                inst: Inst::from_bits(decode_narrow_field(y, ValueDataPacked::Y_BITS)),
             },
             ValueDataPacked::TAG_PARAM => ValueData::Param {
                 ty,
-                num,
-                block: Block::from_bits(index),
+                num: u16::try_from(x).expect("Blockparam index should fit in u16"),
+                block: Block::from_bits(decode_narrow_field(y, ValueDataPacked::Y_BITS)),
             },
             ValueDataPacked::TAG_ALIAS => ValueData::Alias {
                 ty,
-                original: Value::from_bits(index),
+                original: Value::from_bits(decode_narrow_field(y, ValueDataPacked::Y_BITS)),
+            },
+            ValueDataPacked::TAG_UNION => ValueData::Union {
+                ty,
+                x: Value::from_bits(decode_narrow_field(x, ValueDataPacked::X_BITS)),
+                y: Value::from_bits(decode_narrow_field(y, ValueDataPacked::Y_BITS)),
             },
             _ => panic!("Invalid tag {} in ValueDataPacked 0x{:x}", tag, data.0),
         }
@@ -582,8 +616,11 @@ impl From<ValueDataPacked> for ValueData {
 impl DataFlowGraph {
     /// Create a new instruction.
     ///
-    /// The type of the first result is indicated by `data.ty`. If the instruction produces
-    /// multiple results, also call `make_inst_results` to allocate value table entries.
+    /// The type of the first result is indicated by `data.ty`. If the
+    /// instruction produces multiple results, also call
+    /// `make_inst_results` to allocate value table entries. (It is
+    /// always safe to call `make_inst_results`, regardless of how
+    /// many results the instruction has.)
     pub fn make_inst(&mut self, data: InstructionData) -> Inst {
         let n = self.num_insts() + 1;
         self.results.resize(n);
@@ -608,6 +645,7 @@ impl DataFlowGraph {
         match self.value_def(value) {
             ir::ValueDef::Result(inst, _) => self.display_inst(inst),
             ir::ValueDef::Param(_, _) => panic!("value is not defined by an instruction"),
+            ir::ValueDef::Union(_, _) => panic!("value is a union of two other values"),
         }
     }
 
@@ -823,6 +861,19 @@ impl DataFlowGraph {
         self.insts[inst].put_value_list(branch_values)
     }
 
+    /// Clone an instruction, attaching new result `Value`s and
+    /// returning them.
+    pub fn clone_inst(&mut self, inst: Inst) -> Inst {
+        // First, add a clone of the InstructionData.
+        let inst_data = self[inst].clone();
+        let new_inst = self.make_inst(inst_data);
+        // Get the controlling type variable.
+        let ctrl_typevar = self.ctrl_typevar(inst);
+        // Create new result values.
+        self.make_inst_results(new_inst, ctrl_typevar);
+        new_inst
+    }
+
     /// Get the first result of an instruction.
     ///
     /// This function panics if the instruction doesn't have any result.
@@ -847,6 +898,14 @@ impl DataFlowGraph {
         self.results[inst]
     }
 
+    /// Create a union of two values.
+    pub fn union(&mut self, x: Value, y: Value) -> Value {
+        // Get the type.
+        let ty = self.value_type(x);
+        debug_assert_eq!(ty, self.value_type(y));
+        self.make_value(ValueData::Union { ty, x, y })
+    }
+
     /// Get the call signature of a direct or indirect call instruction.
     /// Returns `None` if `inst` is not a call instruction.
     pub fn call_signature(&self, inst: Inst) -> Option<SigRef> {
diff --git a/cranelift/codegen/src/ir/layout.rs b/cranelift/codegen/src/ir/layout.rs
index 819c332d45..7162c848c5 100644
--- a/cranelift/codegen/src/ir/layout.rs
+++ b/cranelift/codegen/src/ir/layout.rs
@@ -61,18 +61,6 @@ impl Layout {
         self.last_block = None;
     }
 
-    /// Clear instructions from every block, but keep the blocks.
-    ///
-    /// Used by the egraph-based optimization to clear out the
-    /// function body but keep the CFG skeleton.
-    pub(crate) fn clear_insts(&mut self) {
-        self.insts.clear();
-        for block in self.blocks.values_mut() {
-            block.first_inst = None.into();
-            block.last_inst = None.into();
-        }
-    }
-
     /// Returns the capacity of the `BlockData` map.
     pub fn block_capacity(&self) -> usize {
         self.blocks.capacity()
diff --git a/cranelift/codegen/src/ir/mod.rs b/cranelift/codegen/src/ir/mod.rs
index 23f952738e..3858a56917 100644
--- a/cranelift/codegen/src/ir/mod.rs
+++ b/cranelift/codegen/src/ir/mod.rs
@@ -48,7 +48,7 @@ pub use crate::ir::function::{DisplayFunctionAnnotations, Function};
 pub use crate::ir::globalvalue::GlobalValueData;
 pub use crate::ir::heap::{HeapData, HeapStyle};
 pub use crate::ir::instructions::{
-    InstructionData, InstructionImms, Opcode, ValueList, ValueListPool, VariableArgs,
+    InstructionData, Opcode, ValueList, ValueListPool, VariableArgs,
 };
 pub use crate::ir::jumptable::JumpTableData;
 pub use crate::ir::known_symbol::KnownSymbol;
diff --git a/cranelift/codegen/src/ir/progpoint.rs b/cranelift/codegen/src/ir/progpoint.rs
index 0152949e7a..39c4d98fbe 100644
--- a/cranelift/codegen/src/ir/progpoint.rs
+++ b/cranelift/codegen/src/ir/progpoint.rs
@@ -37,6 +37,7 @@ impl From<ValueDef> for ProgramPoint {
         match def {
             ValueDef::Result(inst, _) => inst.into(),
             ValueDef::Param(block, _) => block.into(),
+            ValueDef::Union(_, _) => panic!("Union does not have a single program point"),
         }
     }
 }
@@ -78,6 +79,7 @@ impl From<ValueDef> for ExpandedProgramPoint {
         match def {
             ValueDef::Result(inst, _) => inst.into(),
             ValueDef::Param(block, _) => block.into(),
+            ValueDef::Union(_, _) => panic!("Union does not have a single program point"),
         }
     }
 }
diff --git a/cranelift/codegen/src/isle_prelude.rs b/cranelift/codegen/src/isle_prelude.rs
index bdee39cbd8..97460f9a1a 100644
--- a/cranelift/codegen/src/isle_prelude.rs
+++ b/cranelift/codegen/src/isle_prelude.rs
@@ -585,5 +585,27 @@ macro_rules! isle_common_prelude_methods {
                 | IntCC::SignedLessThan => Some(*cc),
             }
         }
+
+        #[inline]
+        fn unpack_value_array_2(&mut self, arr: &ValueArray2) -> (Value, Value) {
+            let [a, b] = *arr;
+            (a, b)
+        }
+
+        #[inline]
+        fn pack_value_array_2(&mut self, a: Value, b: Value) -> ValueArray2 {
+            [a, b]
+        }
+
+        #[inline]
+        fn unpack_value_array_3(&mut self, arr: &ValueArray3) -> (Value, Value, Value) {
+            let [a, b, c] = *arr;
+            (a, b, c)
+        }
+
+        #[inline]
+        fn pack_value_array_3(&mut self, a: Value, b: Value, c: Value) -> ValueArray3 {
+            [a, b, c]
+        }
     };
 }
diff --git a/cranelift/codegen/src/lib.rs b/cranelift/codegen/src/lib.rs
index 3c2ffad5c3..621b9b572f 100644
--- a/cranelift/codegen/src/lib.rs
+++ b/cranelift/codegen/src/lib.rs
@@ -95,6 +95,7 @@ mod alias_analysis;
 mod bitset;
 mod constant_hash;
 mod context;
+mod ctxhash;
 mod dce;
 mod divconst_magic_numbers;
 mod egraph;
@@ -111,6 +112,7 @@ mod result;
 mod scoped_hash_map;
 mod simple_gvn;
 mod simple_preopt;
+mod unionfind;
 mod unreachable_code;
 mod value_label;
 
diff --git a/cranelift/codegen/src/loop_analysis.rs b/cranelift/codegen/src/loop_analysis.rs
index be6d5e588e..f93e6ce87e 100644
--- a/cranelift/codegen/src/loop_analysis.rs
+++ b/cranelift/codegen/src/loop_analysis.rs
@@ -37,7 +37,7 @@ struct LoopData {
 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
 pub struct LoopLevel(u8);
 impl LoopLevel {
-    const INVALID: u8 = 0xff;
+    const INVALID: u8 = u8::MAX;
 
     /// Get the root level (no loop).
     pub fn root() -> Self {
diff --git a/cranelift/codegen/src/machinst/isle.rs b/cranelift/codegen/src/machinst/isle.rs
index ab1bc3bb7a..ee1c2e1214 100644
--- a/cranelift/codegen/src/machinst/isle.rs
+++ b/cranelift/codegen/src/machinst/isle.rs
@@ -56,25 +56,8 @@ macro_rules! isle_lower_prelude_methods {
         }
 
         #[inline]
-        fn unpack_value_array_2(&mut self, arr: &ValueArray2) -> (Value, Value) {
-            let [a, b] = *arr;
-            (a, b)
-        }
-
-        #[inline]
-        fn pack_value_array_2(&mut self, a: Value, b: Value) -> ValueArray2 {
-            [a, b]
-        }
-
-        #[inline]
-        fn unpack_value_array_3(&mut self, arr: &ValueArray3) -> (Value, Value, Value) {
-            let [a, b, c] = *arr;
-            (a, b, c)
-        }
-
-        #[inline]
-        fn pack_value_array_3(&mut self, a: Value, b: Value, c: Value) -> ValueArray3 {
-            [a, b, c]
+        fn value_type(&mut self, val: Value) -> Type {
+            self.lower_ctx.dfg().value_type(val)
         }
 
         #[inline]
@@ -230,11 +213,6 @@ macro_rules! isle_lower_prelude_methods {
             self.lower_ctx.dfg()[inst]
         }
 
-        #[inline]
-        fn value_type(&mut self, val: Value) -> Type {
-            self.lower_ctx.dfg().value_type(val)
-        }
-
         #[inline]
         fn def_inst(&mut self, val: Value) -> Option<Inst> {
             self.lower_ctx.dfg().value_def(val).inst()
diff --git a/cranelift/codegen/src/opts.rs b/cranelift/codegen/src/opts.rs
index 7e77370e29..8f0ca0c9ee 100644
--- a/cranelift/codegen/src/opts.rs
+++ b/cranelift/codegen/src/opts.rs
@@ -1,308 +1,131 @@
 //! Optimization driver using ISLE rewrite rules on an egraph.
 
-use crate::egraph::Analysis;
-use crate::egraph::FuncEGraph;
-pub use crate::egraph::{Node, NodeCtx};
+use crate::egraph::{NewOrExistingInst, OptimizeCtx};
 use crate::ir::condcodes;
 pub use crate::ir::condcodes::{FloatCC, IntCC};
+use crate::ir::dfg::ValueDef;
 pub use crate::ir::immediates::{Ieee32, Ieee64, Imm64, Offset32, Uimm32, Uimm64, Uimm8};
 pub use crate::ir::types::*;
 pub use crate::ir::{
-    dynamic_to_fixed, AtomicRmwOp, Block, Constant, DynamicStackSlot, FuncRef, GlobalValue, Heap,
-    HeapImm, Immediate, InstructionImms, JumpTable, MemFlags, Opcode, StackSlot, Table, TrapCode,
-    Type, Value,
+    dynamic_to_fixed, AtomicRmwOp, Block, Constant, DataFlowGraph, DynamicStackSlot, FuncRef,
+    GlobalValue, Heap, HeapImm, Immediate, InstructionData, JumpTable, MemFlags, Opcode, StackSlot,
+    Table, TrapCode, Type, Value,
 };
 use crate::isle_common_prelude_methods;
 use crate::machinst::isle::*;
 use crate::trace;
-pub use cranelift_egraph::{Id, NewOrExisting, NodeIter};
-use cranelift_entity::{EntityList, EntityRef};
-use smallvec::SmallVec;
+use cranelift_entity::packed_option::ReservedValue;
+use smallvec::{smallvec, SmallVec};
 use std::marker::PhantomData;
 
-pub type IdArray = EntityList<Id>;
 #[allow(dead_code)]
 pub type Unit = ();
 pub type Range = (usize, usize);
+pub type ValueArray2 = [Value; 2];
+pub type ValueArray3 = [Value; 3];
 
 pub type ConstructorVec<T> = SmallVec<[T; 8]>;
 
-mod generated_code;
+pub(crate) mod generated_code;
 use generated_code::ContextIter;
 
-struct IsleContext<'a, 'b> {
-    egraph: &'a mut FuncEGraph<'b>,
+pub(crate) struct IsleContext<'a, 'b, 'c> {
+    pub(crate) ctx: &'a mut OptimizeCtx<'b, 'c>,
 }
 
-const REWRITE_LIMIT: usize = 5;
-
-pub fn optimize_eclass<'a>(id: Id, egraph: &mut FuncEGraph<'a>) -> Id {
-    trace!("running rules on eclass {}", id.index());
-    egraph.stats.rewrite_rule_invoked += 1;
-
-    if egraph.rewrite_depth > REWRITE_LIMIT {
-        egraph.stats.rewrite_depth_limit += 1;
-        return id;
-    }
-    egraph.rewrite_depth += 1;
-
-    // Find all possible rewrites and union them in, returning the
-    // union.
-    let mut ctx = IsleContext { egraph };
-    let optimized_ids = generated_code::constructor_simplify(&mut ctx, id);
-    let mut union_id = id;
-    if let Some(mut ids) = optimized_ids {
-        while let Some(new_id) = ids.next(&mut ctx) {
-            if ctx.egraph.subsume_ids.contains(&new_id) {
-                trace!(" -> eclass {} subsumes {}", new_id, id);
-                ctx.egraph.stats.node_subsume += 1;
-                // Merge in the unionfind so canonicalization still
-                // works, but take *only* the subsuming ID, and break
-                // now.
-                ctx.egraph.egraph.unionfind.union(union_id, new_id);
-                union_id = new_id;
-                break;
-            }
-            ctx.egraph.stats.node_union += 1;
-            let old_union_id = union_id;
-            union_id = ctx
-                .egraph
-                .egraph
-                .union(&ctx.egraph.node_ctx, union_id, new_id);
-            trace!(
-                " -> union eclass {} with {} to get {}",
-                new_id,
-                old_union_id,
-                union_id
-            );
-        }
-    }
-    trace!(" -> optimize {} got {}", id, union_id);
-    ctx.egraph.rewrite_depth -= 1;
-    union_id
-}
-
-pub(crate) fn store_to_load<'a>(id: Id, egraph: &mut FuncEGraph<'a>) -> Id {
-    // Note that we only examine the latest enode in the eclass: opts
-    // are invoked for every new enode added to an eclass, so
-    // traversing the whole eclass would be redundant.
-    let load_key = egraph.egraph.classes[id].get_node().unwrap();
-    if let Node::Load {
-        op:
-            InstructionImms::Load {
-                opcode: Opcode::Load,
-                offset: load_offset,
-                ..
-            },
-        ty: load_ty,
-        addr: load_addr,
-        mem_state,
-        ..
-    } = load_key.node(&egraph.egraph.nodes)
-    {
-        if let Some(store_inst) = mem_state.as_store() {
-            trace!(" -> got load op for id {}", id);
-            if let Some((store_ty, store_id)) = egraph.store_nodes.get(&store_inst) {
-                trace!(" -> got store id: {} ty: {}", store_id, store_ty);
-                let store_key = egraph.egraph.classes[*store_id].get_node().unwrap();
-                if let Node::Inst {
-                    op:
-                        InstructionImms::Store {
-                            opcode: Opcode::Store,
-                            offset: store_offset,
-                            ..
-                        },
-                    args: store_args,
-                    ..
-                } = store_key.node(&egraph.egraph.nodes)
-                {
-                    let store_args = store_args.as_slice(&egraph.node_ctx.args);
-                    let store_data = store_args[0];
-                    let store_addr = store_args[1];
-                    if *load_offset == *store_offset
-                        && *load_ty == *store_ty
-                        && egraph.egraph.unionfind.equiv_id_mut(*load_addr, store_addr)
-                    {
-                        trace!(" -> same offset, type, address; forwarding");
-                        egraph.stats.store_to_load_forward += 1;
-                        return store_data;
-                    }
-                }
-            }
-        }
-    }
-
-    id
-}
-
-struct NodesEtorIter<'a, 'b>
-where
-    'b: 'a,
-{
-    root: Id,
-    iter: NodeIter<NodeCtx, Analysis>,
+pub(crate) struct InstDataEtorIter<'a, 'b, 'c> {
+    stack: SmallVec<[Value; 8]>,
     _phantom1: PhantomData<&'a ()>,
     _phantom2: PhantomData<&'b ()>,
+    _phantom3: PhantomData<&'c ()>,
 }
-
-impl<'a, 'b> generated_code::ContextIter for NodesEtorIter<'a, 'b>
-where
-    'b: 'a,
-{
-    type Context = IsleContext<'a, 'b>;
-    type Output = (Type, InstructionImms, IdArray);
-
-    fn next(&mut self, ctx: &mut IsleContext<'a, 'b>) -> Option<Self::Output> {
-        while let Some(node) = self.iter.next(&ctx.egraph.egraph) {
-            trace!("iter from root {}: node {:?}", self.root, node);
-            match node {
-                Node::Pure {
-                    op,
-                    args,
-                    ty,
-                    arity,
-                }
-                | Node::Inst {
-                    op,
-                    args,
-                    ty,
-                    arity,
-                    ..
-                } if *arity == 1 => {
-                    return Some((*ty, op.clone(), args.clone()));
-                }
-                _ => {}
-            }
-        }
-        None
-    }
-}
-
-impl<'a, 'b> generated_code::Context for IsleContext<'a, 'b> {
-    isle_common_prelude_methods!();
-
-    fn eclass_type(&mut self, eclass: Id) -> Option<Type> {
-        let mut iter = self.egraph.egraph.enodes(eclass);
-        while let Some(node) = iter.next(&self.egraph.egraph) {
-            match node {
-                &Node::Pure { ty, arity, .. } | &Node::Inst { ty, arity, .. } if arity == 1 => {
-                    return Some(ty);
-                }
-                &Node::Load { ty, .. } => return Some(ty),
-                &Node::Result { ty, .. } => return Some(ty),
-                &Node::Param { ty, .. } => return Some(ty),
-                _ => {}
-            }
-        }
-        None
-    }
-
-    fn at_loop_level(&mut self, eclass: Id) -> (u8, Id) {
-        (
-            self.egraph.egraph.analysis_value(eclass).loop_level.level() as u8,
-            eclass,
-        )
-    }
-
-    type enodes_etor_iter = NodesEtorIter<'a, 'b>;
-
-    fn enodes_etor(&mut self, eclass: Id) -> Option<NodesEtorIter<'a, 'b>> {
-        Some(NodesEtorIter {
-            root: eclass,
-            iter: self.egraph.egraph.enodes(eclass),
+impl<'a, 'b, 'c> InstDataEtorIter<'a, 'b, 'c> {
+    fn new(root: Value) -> Self {
+        debug_assert_ne!(root, Value::reserved_value());
+        Self {
+            stack: smallvec![root],
             _phantom1: PhantomData,
             _phantom2: PhantomData,
-        })
-    }
-
-    fn pure_enode_ctor(&mut self, ty: Type, op: &InstructionImms, args: IdArray) -> Id {
-        let op = op.clone();
-        match self.egraph.egraph.add(
-            Node::Pure {
-                op,
-                args,
-                ty,
-                arity: 1,
-            },
-            &mut self.egraph.node_ctx,
-        ) {
-            NewOrExisting::New(id) => {
-                self.egraph.stats.node_created += 1;
-                self.egraph.stats.node_pure += 1;
-                self.egraph.stats.node_ctor_created += 1;
-                optimize_eclass(id, self.egraph)
-            }
-            NewOrExisting::Existing(id) => {
-                self.egraph.stats.node_ctor_deduped += 1;
-                id
-            }
+            _phantom3: PhantomData,
         }
     }
-
-    fn id_array_0_etor(&mut self, arg0: IdArray) -> Option<()> {
-        let values = arg0.as_slice(&self.egraph.node_ctx.args);
-        if values.len() == 0 {
-            Some(())
-        } else {
-            None
-        }
-    }
-
-    fn id_array_0_ctor(&mut self) -> IdArray {
-        EntityList::default()
-    }
-
-    fn id_array_1_etor(&mut self, arg0: IdArray) -> Option<Id> {
-        let values = arg0.as_slice(&self.egraph.node_ctx.args);
-        if values.len() == 1 {
-            Some(values[0])
-        } else {
-            None
-        }
-    }
-
-    fn id_array_1_ctor(&mut self, arg0: Id) -> IdArray {
-        EntityList::from_iter([arg0].into_iter(), &mut self.egraph.node_ctx.args)
-    }
-
-    fn id_array_2_etor(&mut self, arg0: IdArray) -> Option<(Id, Id)> {
-        let values = arg0.as_slice(&self.egraph.node_ctx.args);
-        if values.len() == 2 {
-            Some((values[0], values[1]))
-        } else {
-            None
-        }
-    }
-
-    fn id_array_2_ctor(&mut self, arg0: Id, arg1: Id) -> IdArray {
-        EntityList::from_iter([arg0, arg1].into_iter(), &mut self.egraph.node_ctx.args)
-    }
-
-    fn id_array_3_etor(&mut self, arg0: IdArray) -> Option<(Id, Id, Id)> {
-        let values = arg0.as_slice(&self.egraph.node_ctx.args);
-        if values.len() == 3 {
-            Some((values[0], values[1], values[2]))
-        } else {
-            None
-        }
-    }
-
-    fn id_array_3_ctor(&mut self, arg0: Id, arg1: Id, arg2: Id) -> IdArray {
-        EntityList::from_iter(
-            [arg0, arg1, arg2].into_iter(),
-            &mut self.egraph.node_ctx.args,
-        )
-    }
-
-    fn remat(&mut self, id: Id) -> Id {
-        trace!("remat: {}", id);
-        self.egraph.remat_ids.insert(id);
-        id
-    }
-
-    fn subsume(&mut self, id: Id) -> Id {
-        trace!("subsume: {}", id);
-        self.egraph.subsume_ids.insert(id);
-        id
-    }
+}
+
+impl<'a, 'b, 'c> ContextIter for InstDataEtorIter<'a, 'b, 'c>
+where
+    'b: 'a,
+    'c: 'b,
+{
+    type Context = IsleContext<'a, 'b, 'c>;
+    type Output = (Type, InstructionData);
+
+    fn next(&mut self, ctx: &mut IsleContext<'a, 'b, 'c>) -> Option<Self::Output> {
+        while let Some(value) = self.stack.pop() {
+            debug_assert_ne!(value, Value::reserved_value());
+            let value = ctx.ctx.func.dfg.resolve_aliases(value);
+            trace!("iter: value {:?}", value);
+            match ctx.ctx.func.dfg.value_def(value) {
+                ValueDef::Union(x, y) => {
+                    debug_assert_ne!(x, Value::reserved_value());
+                    debug_assert_ne!(y, Value::reserved_value());
+                    trace!(" -> {}, {}", x, y);
+                    self.stack.push(x);
+                    self.stack.push(y);
+                    continue;
+                }
+                ValueDef::Result(inst, _) if ctx.ctx.func.dfg.inst_results(inst).len() == 1 => {
+                    let ty = ctx.ctx.func.dfg.value_type(value);
+                    trace!(" -> value of type {}", ty);
+                    return Some((ty, ctx.ctx.func.dfg[inst].clone()));
+                }
+                _ => {}
+            }
+        }
+        None
+    }
+}
+
+impl<'a, 'b, 'c> generated_code::Context for IsleContext<'a, 'b, 'c> {
+    isle_common_prelude_methods!();
+
+    type inst_data_etor_iter = InstDataEtorIter<'a, 'b, 'c>;
+
+    fn inst_data_etor(&mut self, eclass: Value) -> Option<InstDataEtorIter<'a, 'b, 'c>> {
+        Some(InstDataEtorIter::new(eclass))
+    }
+
+    fn make_inst_ctor(&mut self, ty: Type, op: &InstructionData) -> Value {
+        let value = self
+            .ctx
+            .insert_pure_enode(NewOrExistingInst::New(op.clone(), ty));
+        trace!("make_inst_ctor: {:?} -> {}", op, value);
+        value
+    }
+
+    fn value_array_2_ctor(&mut self, arg0: Value, arg1: Value) -> ValueArray2 {
+        [arg0, arg1]
+    }
+
+    fn value_array_3_ctor(&mut self, arg0: Value, arg1: Value, arg2: Value) -> ValueArray3 {
+        [arg0, arg1, arg2]
+    }
+
+    #[inline]
+    fn value_type(&mut self, val: Value) -> Type {
+        self.ctx.func.dfg.value_type(val)
+    }
+
+    fn remat(&mut self, value: Value) -> Value {
+        trace!("remat: {}", value);
+        self.ctx.remat_values.insert(value);
+        self.ctx.stats.remat += 1;
+        value
+    }
+
+    fn subsume(&mut self, value: Value) -> Value {
+        trace!("subsume: {}", value);
+        self.ctx.subsume_values.insert(value);
+        self.ctx.stats.subsume += 1;
+        value
+    }
 }
diff --git a/cranelift/codegen/src/opts/algebraic.isle b/cranelift/codegen/src/opts/algebraic.isle
index 9a75b3d6b3..caed553ba7 100644
--- a/cranelift/codegen/src/opts/algebraic.isle
+++ b/cranelift/codegen/src/opts/algebraic.isle
@@ -145,31 +145,15 @@
       (iadd ty x x))
 
 ;; x<<32>>32: uextend/sextend 32->64.
-(rule (simplify (ushr $I64 (ishl $I64 (uextend $I64 x @ (eclass_type $I32)) (iconst _ (simm32 32))) (iconst _ (simm32 32))))
+(rule (simplify (ushr $I64 (ishl $I64 (uextend $I64 x @ (value_type $I32)) (iconst _ (simm32 32))) (iconst _ (simm32 32))))
       (uextend $I64 x))
 
-(rule (simplify (sshr $I64 (ishl $I64 (uextend $I64 x @ (eclass_type $I32)) (iconst _ (simm32 32))) (iconst _ (simm32 32))))
+(rule (simplify (sshr $I64 (ishl $I64 (uextend $I64 x @ (value_type $I32)) (iconst _ (simm32 32))) (iconst _ (simm32 32))))
       (sextend $I64 x))
 
 ;; TODO: strength reduction: mul/div to shifts
 ;; TODO: div/rem by constants -> magic multiplications
 
-;; Reassociate when it benefits LICM.
-(rule (simplify (iadd ty (iadd ty x y) z))
-      (if-let (at_loop_level lx _) x)
-      (if-let (at_loop_level ly _) y)
-      (if-let (at_loop_level lz _) z)
-      (if (u8_lt lx ly))
-      (if (u8_lt lz ly))
-      (iadd ty (iadd ty x z) y))
-(rule (simplify (iadd ty (iadd ty x y) z))
-      (if-let (at_loop_level lx _) x)
-      (if-let (at_loop_level ly _) y)
-      (if-let (at_loop_level lz _) z)
-      (if (u8_lt ly lx))
-      (if (u8_lt lz lx))
-      (iadd ty (iadd ty y z) x))
-
 ;; Rematerialize ALU-op-with-imm and iconsts in each block where they're
 ;; used. This is neutral (add-with-imm) or positive (iconst) for
 ;; register pressure, and these ops are very cheap.
diff --git a/cranelift/codegen/src/opts/cprop.isle b/cranelift/codegen/src/opts/cprop.isle
index e3573bcc3a..ef4e8c28fd 100644
--- a/cranelift/codegen/src/opts/cprop.isle
+++ b/cranelift/codegen/src/opts/cprop.isle
@@ -107,7 +107,7 @@
 (rule (simplify (isub ty
                       (iadd ty x (iconst ty (u64_from_imm64 k1)))
                       (iconst ty (u64_from_imm64 k2))))
-      (isub ty x (iconst ty (imm64 (u64_sub k1 k2)))))
+      (isub ty x (iconst ty (imm64 (u64_sub k2 k1)))))
 (rule (simplify (iadd ty
                       (isub ty x (iconst ty (u64_from_imm64 k1)))
                       (iconst ty (u64_from_imm64 k2))))
diff --git a/cranelift/codegen/src/prelude.isle b/cranelift/codegen/src/prelude.isle
index ca9a307d28..68d06ddf24 100644
--- a/cranelift/codegen/src/prelude.isle
+++ b/cranelift/codegen/src/prelude.isle
@@ -32,6 +32,15 @@
 
 ;; `cranelift-entity`-based identifiers.
 (type Type (primitive Type))
+(type Value (primitive Value))
+(type ValueList (primitive ValueList))
+
+;; ISLE representation of `&[Value]`.
+(type ValueSlice (primitive ValueSlice))
+
+;; Extract the type of a `Value`.
+(decl value_type (Type) Value)
+(extern extractor infallible value_type value_type)
 
 (decl u32_add (u32 u32) u32)
 (extern constructor u32_add u32_add)
diff --git a/cranelift/codegen/src/prelude_lower.isle b/cranelift/codegen/src/prelude_lower.isle
index 597ddb0043..b35ca78c28 100644
--- a/cranelift/codegen/src/prelude_lower.isle
+++ b/cranelift/codegen/src/prelude_lower.isle
@@ -5,15 +5,10 @@
 
 ;; `cranelift-entity`-based identifiers.
 (type Inst (primitive Inst))
-(type Value (primitive Value))
-
-;; ISLE representation of `&[Value]`.
-(type ValueSlice (primitive ValueSlice))
 
 ;; ISLE representation of `Vec<u8>`
 (type VecMask extern (enum))
 
-(type ValueList (primitive ValueList))
 (type ValueRegs (primitive ValueRegs))
 (type WritableValueRegs (primitive WritableValueRegs))
 
@@ -214,10 +209,6 @@
 (decl inst_data (InstructionData) Inst)
 (extern extractor infallible inst_data inst_data)
 
-;; Extract the type of a `Value`.
-(decl value_type (Type) Value)
-(extern extractor infallible value_type value_type)
-
 ;; Extract the type of the instruction's first result.
 (decl result_type (Type) Inst)
 (extractor (result_type ty)
diff --git a/cranelift/codegen/src/prelude_opt.isle b/cranelift/codegen/src/prelude_opt.isle
index 46baaddd13..d3fc0d1bb4 100644
--- a/cranelift/codegen/src/prelude_opt.isle
+++ b/cranelift/codegen/src/prelude_opt.isle
@@ -2,60 +2,33 @@
 
 ;;;;; eclass and enode access ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-;; An eclass ID.
-(type Id (primitive Id))
-
-;; What is the type of an eclass (if a single type)?
-(decl eclass_type (Type) Id)
-(extern extractor eclass_type eclass_type)
-
-;; Helper to wrap an Id-matching pattern and extract type.
-(decl has_type (Type Id) Id)
-(extractor (has_type ty id)
-           (and (eclass_type ty)
-                id))
-
 ;; Extract any node(s) for the given eclass ID.
-(decl multi enodes (Type InstructionImms IdArray) Id)
-(extern extractor enodes enodes_etor)
+(decl multi inst_data (Type InstructionData) Value)
+(extern extractor inst_data inst_data_etor)
 
 ;; Construct a pure node, returning a new (or deduplicated
 ;; already-existing) eclass ID.
-(decl pure_enode (Type InstructionImms IdArray) Id)
-(extern constructor pure_enode pure_enode_ctor)
+(decl make_inst (Type InstructionData) Value)
+(extern constructor make_inst make_inst_ctor)
 
-;; Type of an Id slice (for args).
-(type IdArray (primitive IdArray))
-
-(decl id_array_0 () IdArray)
-(extern constructor id_array_0 id_array_0_ctor)
-(extern extractor id_array_0 id_array_0_etor)
-(decl id_array_1 (Id) IdArray)
-(extern constructor id_array_1 id_array_1_ctor)
-(extern extractor id_array_1 id_array_1_etor)
-(decl id_array_2 (Id Id) IdArray)
-(extern constructor id_array_2 id_array_2_ctor)
-(extern extractor id_array_2 id_array_2_etor)
-(decl id_array_3 (Id Id Id) IdArray)
-(extern constructor id_array_3 id_array_3_ctor)
-(extern extractor id_array_3 id_array_3_etor)
-
-;; Extractor to get the min loop-level of an eclass.
-(decl at_loop_level (u8 Id) Id)
-(extern extractor infallible at_loop_level at_loop_level)
+;; Constructors for value arrays.
+(decl value_array_2_ctor (Value Value) ValueArray2)
+(extern constructor value_array_2_ctor value_array_2_ctor)
+(decl value_array_3_ctor (Value Value Value) ValueArray3)
+(extern constructor value_array_3_ctor value_array_3_ctor)
 
 ;;;;; optimization toplevel ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 ;; The main matcher rule invoked by the toplevel driver.
-(decl multi simplify (Id) Id)
+(decl multi simplify (Value) Value)
 
 ;; Mark a node as requiring remat when used in a different block.
-(decl remat (Id) Id)
+(decl remat (Value) Value)
 (extern constructor remat remat)
 
 ;; Mark a node as subsuming whatever else it's rewritten from -- this
 ;; is definitely preferable, not just a possible option. Useful for,
 ;; e.g., constant propagation where we arrive at a definite "final
 ;; answer".
-(decl subsume (Id) Id)
+(decl subsume (Value) Value)
 (extern constructor subsume subsume)
diff --git a/cranelift/codegen/src/simple_gvn.rs b/cranelift/codegen/src/simple_gvn.rs
index 327ff8f4bd..87dc4d1a96 100644
--- a/cranelift/codegen/src/simple_gvn.rs
+++ b/cranelift/codegen/src/simple_gvn.rs
@@ -39,14 +39,14 @@ struct HashKey<'a, 'f: 'a> {
 impl<'a, 'f: 'a> Hash for HashKey<'a, 'f> {
     fn hash<H: Hasher>(&self, state: &mut H) {
         let pool = &self.pos.borrow().func.dfg.value_lists;
-        self.inst.hash(state, pool);
+        self.inst.hash(state, pool, |value| value);
         self.ty.hash(state);
     }
 }
 impl<'a, 'f: 'a> PartialEq for HashKey<'a, 'f> {
     fn eq(&self, other: &Self) -> bool {
         let pool = &self.pos.borrow().func.dfg.value_lists;
-        self.inst.eq(&other.inst, pool) && self.ty == other.ty
+        self.inst.eq(&other.inst, pool, |value| value) && self.ty == other.ty
     }
 }
 impl<'a, 'f: 'a> Eq for HashKey<'a, 'f> {}
diff --git a/cranelift/codegen/src/unionfind.rs b/cranelift/codegen/src/unionfind.rs
new file mode 100644
index 0000000000..b6c534aa5f
--- /dev/null
+++ b/cranelift/codegen/src/unionfind.rs
@@ -0,0 +1,74 @@
+//! Simple union-find data structure.
+
+use crate::trace;
+use cranelift_entity::{packed_option::ReservedValue, EntityRef, SecondaryMap};
+use std::hash::Hash;
+
+/// A union-find data structure. The data structure can allocate
+/// `Id`s, indicating eclasses, and can merge eclasses together.
+#[derive(Clone, Debug, PartialEq)]
+pub struct UnionFind<Idx: EntityRef> {
+    parent: SecondaryMap<Idx, Val<Idx>>,
+}
+
+#[derive(Clone, Debug, PartialEq)]
+struct Val<Idx>(Idx);
+impl<Idx: EntityRef + ReservedValue> Default for Val<Idx> {
+    fn default() -> Self {
+        Self(Idx::reserved_value())
+    }
+}
+
+impl<Idx: EntityRef + Hash + std::fmt::Display + Ord + ReservedValue> UnionFind<Idx> {
+    /// Create a new `UnionFind` with the given capacity.
+    pub fn with_capacity(cap: usize) -> Self {
+        UnionFind {
+            parent: SecondaryMap::with_capacity(cap),
+        }
+    }
+
+    /// Add an `Idx` to the `UnionFind`, with its own equivalence class
+    /// initially. All `Idx`s must be added before being queried or
+    /// unioned.
+    pub fn add(&mut self, id: Idx) {
+        debug_assert!(id != Idx::reserved_value());
+        self.parent[id] = Val(id);
+    }
+
+    /// Find the canonical `Idx` of a given `Idx`.
+    pub fn find(&self, mut node: Idx) -> Idx {
+        while node != self.parent[node].0 {
+            node = self.parent[node].0;
+        }
+        node
+    }
+
+    /// Find the canonical `Idx` of a given `Idx`, updating the data
+    /// structure in the process so that future queries for this `Idx`
+    /// (and others in its chain up to the root of the equivalence
+    /// class) will be faster.
+    pub fn find_and_update(&mut self, mut node: Idx) -> Idx {
+        // "Path splitting" mutating find (Tarjan and Van Leeuwen).
+        debug_assert!(node != Idx::reserved_value());
+        while node != self.parent[node].0 {
+            let next = self.parent[self.parent[node].0].0;
+            debug_assert!(next != Idx::reserved_value());
+            self.parent[node] = Val(next);
+            node = next;
+        }
+        debug_assert!(node != Idx::reserved_value());
+        node
+    }
+
+    /// Merge the equivalence classes of the two `Idx`s.
+    pub fn union(&mut self, a: Idx, b: Idx) {
+        let a = self.find_and_update(a);
+        let b = self.find_and_update(b);
+        let (a, b) = (std::cmp::min(a, b), std::cmp::max(a, b));
+        if a != b {
+            // Always canonicalize toward lower IDs.
+            self.parent[b] = Val(a);
+            trace!("union: {}, {}", a, b);
+        }
+    }
+}
diff --git a/cranelift/codegen/src/verifier/mod.rs b/cranelift/codegen/src/verifier/mod.rs
index 535fe0d99e..b19523b7ef 100644
--- a/cranelift/codegen/src/verifier/mod.rs
+++ b/cranelift/codegen/src/verifier/mod.rs
@@ -1041,6 +1041,10 @@ impl<'a> Verifier<'a> {
                     ));
                 }
             }
+            ValueDef::Union(_, _) => {
+                // Nothing: union nodes themselves have no location,
+                // so we cannot check any dominance properties.
+            }
         }
         Ok(())
     }
@@ -1070,6 +1074,11 @@ impl<'a> Verifier<'a> {
                 self.context(loc_inst),
                 format!("instruction result {} is not defined by the instruction", v),
             )),
+            ValueDef::Union(_, _) => errors.fatal((
+                loc_inst,
+                self.context(loc_inst),
+                format!("instruction result {} is a union node", v),
+            )),
         }
     }
 
diff --git a/cranelift/codegen/src/write.rs b/cranelift/codegen/src/write.rs
index 58d73e4ba4..552cc8549b 100644
--- a/cranelift/codegen/src/write.rs
+++ b/cranelift/codegen/src/write.rs
@@ -298,6 +298,7 @@ fn type_suffix(func: &Function, inst: Inst) -> Option<Type> {
         let def_block = match func.dfg.value_def(ctrl_var) {
             ValueDef::Result(instr, _) => func.layout.inst_block(instr),
             ValueDef::Param(block, _) => Some(block),
+            ValueDef::Union(..) => None,
         };
         if def_block.is_some() && def_block == func.layout.inst_block(inst) {
             return None;
diff --git a/cranelift/egraph/Cargo.toml b/cranelift/egraph/Cargo.toml
deleted file mode 100644
index 7d591a187f..0000000000
--- a/cranelift/egraph/Cargo.toml
+++ /dev/null
@@ -1,24 +0,0 @@
-[package]
-authors = ["The Cranelift Project Developers"]
-name = "cranelift-egraph"
-version = "0.92.0"
-description = "acyclic-egraph (aegraph) implementation for Cranelift"
-license = "Apache-2.0 WITH LLVM-exception"
-documentation = "https://docs.rs/cranelift-egraph"
-repository = "https://github.com/bytecodealliance/wasmtime"
-edition = "2021"
-
-[dependencies]
-cranelift-entity = { workspace = true }
-log = { workspace = true }
-smallvec = { workspace = true }
-indexmap = { version = "1.9.1" }
-hashbrown = { version = "0.12.2", features = ["raw"] }
-fxhash = "0.2.1"
-
-[features]
-default = []
-
-# Enable detailed trace-level debug logging. Excluded by default to
-# omit the dynamic overhead of checking the logging level.
-trace-log = []
diff --git a/cranelift/egraph/src/bumpvec.rs b/cranelift/egraph/src/bumpvec.rs
deleted file mode 100644
index 7c8d210cb9..0000000000
--- a/cranelift/egraph/src/bumpvec.rs
+++ /dev/null
@@ -1,524 +0,0 @@
-//! Vectors allocated in arenas, with small per-vector overhead.
-
-use std::marker::PhantomData;
-use std::mem::MaybeUninit;
-use std::ops::Range;
-
-/// A vector of `T` stored within a `BumpArena`.
-///
-/// This is something like a normal `Vec`, except that all accesses
-/// and updates require a separate borrow of the `BumpArena`. This, in
-/// turn, makes the Vec itself very compact: only three `u32`s (12
-/// bytes). The `BumpSlice` variant is only two `u32`s (8 bytes) and
-/// is sufficient to reconstruct a slice, but not grow the vector.
-///
-/// The `BumpVec` does *not* implement `Clone` or `Copy`; it
-/// represents unique ownership of a range of indices in the arena. If
-/// dropped, those indices will be unavailable until the arena is
-/// freed. This is "fine" (it is normally how arena allocation
-/// works). To explicitly free and make available for some
-/// allocations, a very rudimentary reuse mechanism exists via
-/// `BumpVec::free(arena)`. (The allocation path opportunistically
-/// checks the first range on the freelist, and can carve off a piece
-/// of it if larger than needed, but it does not attempt to traverse
-/// the entire freelist; this is a compromise between bump-allocation
-/// speed and memory efficiency, which also influences speed through
-/// cached-memory reuse.)
-///
-/// The type `T` should not have a `Drop` implementation. This
-/// typically means that it does not own any boxed memory,
-/// sub-collections, or other resources. This is important for the
-/// efficiency of the data structure (otherwise, to call `Drop` impls,
-/// the arena needs to track which indices are live or dead; the
-/// BumpVec itself cannot do the drop because it does not retain a
-/// reference to the arena). Note that placing a `T` with a `Drop`
-/// impl in the arena is still *safe*, because leaking (that is, never
-/// calling `Drop::drop()`) is safe. It is merely less efficient, and
-/// so should be avoided if possible.
-#[derive(Debug)]
-pub struct BumpVec<T> {
-    base: u32,
-    len: u32,
-    cap: u32,
-    _phantom: PhantomData<T>,
-}
-
-/// A slice in an arena: like a `BumpVec`, but has a fixed size that
-/// cannot grow. The size of this struct is one 32-bit word smaller
-/// than `BumpVec`. It is copyable/cloneable because it will never be
-/// freed.
-#[derive(Debug, Clone, Copy)]
-pub struct BumpSlice<T> {
-    base: u32,
-    len: u32,
-    _phantom: PhantomData<T>,
-}
-
-#[derive(Default)]
-pub struct BumpArena<T> {
-    vec: Vec<MaybeUninit<T>>,
-    freelist: Vec<Range<u32>>,
-}
-
-impl<T> BumpArena<T> {
-    /// Create a new arena into which one can allocate `BumpVec`s.
-    pub fn new() -> Self {
-        Self {
-            vec: vec![],
-            freelist: vec![],
-        }
-    }
-
-    /// Create a new arena, pre-allocating space for `cap` total `T`
-    /// elements.
-    pub fn arena_with_capacity(cap: usize) -> Self {
-        Self {
-            vec: Vec::with_capacity(cap),
-            freelist: Vec::with_capacity(cap / 16),
-        }
-    }
-
-    /// Create a new `BumpVec` with the given pre-allocated capacity
-    /// and zero length.
-    pub fn vec_with_capacity(&mut self, cap: usize) -> BumpVec<T> {
-        let cap = u32::try_from(cap).unwrap();
-        if let Some(range) = self.maybe_freelist_alloc(cap) {
-            BumpVec {
-                base: range.start,
-                len: 0,
-                cap,
-                _phantom: PhantomData,
-            }
-        } else {
-            let base = self.vec.len() as u32;
-            for _ in 0..cap {
-                self.vec.push(MaybeUninit::uninit());
-            }
-            BumpVec {
-                base,
-                len: 0,
-                cap,
-                _phantom: PhantomData,
-            }
-        }
-    }
-
-    /// Create a new `BumpVec` with a single element. The capacity is
-    /// also only one element; growing the vector further will require
-    /// a reallocation.
-    pub fn single(&mut self, t: T) -> BumpVec<T> {
-        let mut vec = self.vec_with_capacity(1);
-        unsafe {
-            self.write_into_index(vec.base, t);
-        }
-        vec.len = 1;
-        vec
-    }
-
-    /// Create a new `BumpVec` with the sequence from an iterator.
-    pub fn from_iter<I: Iterator<Item = T>>(&mut self, i: I) -> BumpVec<T> {
-        let base = self.vec.len() as u32;
-        self.vec.extend(i.map(|item| MaybeUninit::new(item)));
-        let len = self.vec.len() as u32 - base;
-        BumpVec {
-            base,
-            len,
-            cap: len,
-            _phantom: PhantomData,
-        }
-    }
-
-    /// Append two `BumpVec`s, returning a new one. Consumes both
-    /// vectors. This will use the capacity at the end of `a` if
-    /// possible to move `b`'s elements into place; otherwise it will
-    /// need to allocate new space.
-    pub fn append(&mut self, a: BumpVec<T>, b: BumpVec<T>) -> BumpVec<T> {
-        if (a.cap - a.len) >= b.len {
-            self.append_into_cap(a, b)
-        } else {
-            self.append_into_new(a, b)
-        }
-    }
-
-    /// Helper: read the `T` out of a given arena index. After
-    /// reading, that index becomes uninitialized.
-    unsafe fn read_out_of_index(&self, index: u32) -> T {
-        // Note that we don't actually *track* uninitialized status
-        // (and this is fine because we will never `Drop` and we never
-        // allow a `BumpVec` to refer to an uninitialized index, so
-        // the bits are effectively dead). We simply read the bits out
-        // and return them.
-        self.vec[index as usize].as_ptr().read()
-    }
-
-    /// Helper: write a `T` into the given arena index. Index must
-    /// have been uninitialized previously.
-    unsafe fn write_into_index(&mut self, index: u32, t: T) {
-        self.vec[index as usize].as_mut_ptr().write(t);
-    }
-
-    /// Helper: move a `T` from one index to another. Old index
-    /// becomes uninitialized and new index must have previously been
-    /// uninitialized.
-    unsafe fn move_item(&mut self, from: u32, to: u32) {
-        let item = self.read_out_of_index(from);
-        self.write_into_index(to, item);
-    }
-
-    /// Helper: push a `T` onto the end of the arena, growing its
-    /// storage. The `T` to push is read out of another index, and
-    /// that index subsequently becomes uninitialized.
-    unsafe fn push_item(&mut self, from: u32) -> u32 {
-        let index = self.vec.len() as u32;
-        let item = self.read_out_of_index(from);
-        self.vec.push(MaybeUninit::new(item));
-        index
-    }
-
-    /// Helper: append `b` into the capacity at the end of `a`.
-    fn append_into_cap(&mut self, mut a: BumpVec<T>, b: BumpVec<T>) -> BumpVec<T> {
-        debug_assert!(a.cap - a.len >= b.len);
-        for i in 0..b.len {
-            // Safety: initially, the indices in `b` are initialized;
-            // the indices in `a`'s cap, beyond its length, are
-            // uninitialized. We move the initialized contents from
-            // `b` to the tail beyond `a`, and we consume `b` (so it
-            // no longer exists), and we update `a`'s length to cover
-            // the initialized contents in their new location.
-            unsafe {
-                self.move_item(b.base + i, a.base + a.len + i);
-            }
-        }
-        a.len += b.len;
-        b.free(self);
-        a
-    }
-
-    /// Helper: return a range of indices that are available
-    /// (uninitialized) according to the freelist for `len` elements,
-    /// if possible.
-    fn maybe_freelist_alloc(&mut self, len: u32) -> Option<Range<u32>> {
-        if let Some(entry) = self.freelist.last_mut() {
-            if entry.len() >= len as usize {
-                let base = entry.start;
-                entry.start += len;
-                if entry.start == entry.end {
-                    self.freelist.pop();
-                }
-                return Some(base..(base + len));
-            }
-        }
-        None
-    }
-
-    /// Helper: append `a` and `b` into a completely new allocation.
-    fn append_into_new(&mut self, a: BumpVec<T>, b: BumpVec<T>) -> BumpVec<T> {
-        // New capacity: round up to a power of two.
-        let len = a.len + b.len;
-        let cap = round_up_power_of_two(len);
-
-        if let Some(range) = self.maybe_freelist_alloc(cap) {
-            for i in 0..a.len {
-                // Safety: the indices in `a` must be initialized. We read
-                // out the item and copy it to a new index; the old index
-                // is no longer covered by a BumpVec, because we consume
-                // `a`.
-                unsafe {
-                    self.move_item(a.base + i, range.start + i);
-                }
-            }
-            for i in 0..b.len {
-                // Safety: the indices in `b` must be initialized. We read
-                // out the item and copy it to a new index; the old index
-                // is no longer covered by a BumpVec, because we consume
-                // `b`.
-                unsafe {
-                    self.move_item(b.base + i, range.start + a.len + i);
-                }
-            }
-
-            a.free(self);
-            b.free(self);
-
-            BumpVec {
-                base: range.start,
-                len,
-                cap,
-                _phantom: PhantomData,
-            }
-        } else {
-            self.vec.reserve(cap as usize);
-            let base = self.vec.len() as u32;
-            for i in 0..a.len {
-                // Safety: the indices in `a` must be initialized. We read
-                // out the item and copy it to a new index; the old index
-                // is no longer covered by a BumpVec, because we consume
-                // `a`.
-                unsafe {
-                    self.push_item(a.base + i);
-                }
-            }
-            for i in 0..b.len {
-                // Safety: the indices in `b` must be initialized. We read
-                // out the item and copy it to a new index; the old index
-                // is no longer covered by a BumpVec, because we consume
-                // `b`.
-                unsafe {
-                    self.push_item(b.base + i);
-                }
-            }
-            let len = self.vec.len() as u32 - base;
-
-            for _ in len..cap {
-                self.vec.push(MaybeUninit::uninit());
-            }
-
-            a.free(self);
-            b.free(self);
-
-            BumpVec {
-                base,
-                len,
-                cap,
-                _phantom: PhantomData,
-            }
-        }
-    }
-
-    /// Returns the size of the backing `Vec`.
-    pub fn size(&self) -> usize {
-        self.vec.len()
-    }
-}
-
-fn round_up_power_of_two(x: u32) -> u32 {
-    debug_assert!(x > 0);
-    debug_assert!(x < 0x8000_0000);
-    let log2 = 32 - (x - 1).leading_zeros();
-    1 << log2
-}
-
-impl<T> BumpVec<T> {
-    /// Returns a slice view of this `BumpVec`, given a borrow of the
-    /// arena.
-    pub fn as_slice<'a>(&'a self, arena: &'a BumpArena<T>) -> &'a [T] {
-        let maybe_uninit_slice =
-            &arena.vec[(self.base as usize)..((self.base + self.len) as usize)];
-        // Safety: the index range we represent must be initialized.
-        unsafe { std::mem::transmute(maybe_uninit_slice) }
-    }
-
-    /// Returns a mutable slice view of this `BumpVec`, given a
-    /// mutable borrow of the arena.
-    pub fn as_mut_slice<'a>(&'a mut self, arena: &'a mut BumpArena<T>) -> &'a mut [T] {
-        let maybe_uninit_slice =
-            &mut arena.vec[(self.base as usize)..((self.base + self.len) as usize)];
-        // Safety: the index range we represent must be initialized.
-        unsafe { std::mem::transmute(maybe_uninit_slice) }
-    }
-
-    /// Returns the length of this vector. Does not require access to
-    /// the arena.
-    pub fn len(&self) -> usize {
-        self.len as usize
-    }
-
-    /// Returns the capacity of this vector. Does not require access
-    /// to the arena.
-    pub fn cap(&self) -> usize {
-        self.cap as usize
-    }
-
-    /// Reserve `extra_len` capacity at the end of the vector,
-    /// reallocating if necessary.
-    pub fn reserve(&mut self, extra_len: usize, arena: &mut BumpArena<T>) {
-        let extra_len = u32::try_from(extra_len).unwrap();
-        if self.cap - self.len < extra_len {
-            if self.base + self.cap == arena.vec.len() as u32 {
-                for _ in 0..extra_len {
-                    arena.vec.push(MaybeUninit::uninit());
-                }
-                self.cap += extra_len;
-            } else {
-                let new_cap = self.cap + extra_len;
-                let new = arena.vec_with_capacity(new_cap as usize);
-                unsafe {
-                    for i in 0..self.len {
-                        arena.move_item(self.base + i, new.base + i);
-                    }
-                }
-                self.base = new.base;
-                self.cap = new.cap;
-            }
-        }
-    }
-
-    /// Push an item, growing the capacity if needed.
-    pub fn push(&mut self, t: T, arena: &mut BumpArena<T>) {
-        if self.cap > self.len {
-            unsafe {
-                arena.write_into_index(self.base + self.len, t);
-            }
-            self.len += 1;
-        } else if (self.base + self.cap) as usize == arena.vec.len() {
-            arena.vec.push(MaybeUninit::new(t));
-            self.cap += 1;
-            self.len += 1;
-        } else {
-            let new_cap = round_up_power_of_two(self.cap + 1);
-            let extra = new_cap - self.cap;
-            self.reserve(extra as usize, arena);
-            unsafe {
-                arena.write_into_index(self.base + self.len, t);
-            }
-            self.len += 1;
-        }
-    }
-
-    /// Clone, if `T` is cloneable.
-    pub fn clone(&self, arena: &mut BumpArena<T>) -> BumpVec<T>
-    where
-        T: Clone,
-    {
-        let mut new = arena.vec_with_capacity(self.len as usize);
-        for i in 0..self.len {
-            let item = self.as_slice(arena)[i as usize].clone();
-            new.push(item, arena);
-        }
-        new
-    }
-
-    /// Truncate the length to a smaller-or-equal length.
-    pub fn truncate(&mut self, len: usize) {
-        let len = len as u32;
-        assert!(len <= self.len);
-        self.len = len;
-    }
-
-    /// Consume the BumpVec and return its indices to a free pool in
-    /// the arena.
-    pub fn free(self, arena: &mut BumpArena<T>) {
-        arena.freelist.push(self.base..(self.base + self.cap));
-    }
-
-    /// Freeze the capacity of this BumpVec, turning it into a slice,
-    /// for a smaller struct (8 bytes rather than 12). Once this
-    /// exists, it is copyable, because the slice will never be freed.
-    pub fn freeze(self, arena: &mut BumpArena<T>) -> BumpSlice<T> {
-        if self.cap > self.len {
-            arena
-                .freelist
-                .push((self.base + self.len)..(self.base + self.cap));
-        }
-        BumpSlice {
-            base: self.base,
-            len: self.len,
-            _phantom: PhantomData,
-        }
-    }
-}
-
-impl<T> BumpSlice<T> {
-    /// Returns a slice view of the `BumpSlice`, given a borrow of the
-    /// arena.
-    pub fn as_slice<'a>(&'a self, arena: &'a BumpArena<T>) -> &'a [T] {
-        let maybe_uninit_slice =
-            &arena.vec[(self.base as usize)..((self.base + self.len) as usize)];
-        // Safety: the index range we represent must be initialized.
-        unsafe { std::mem::transmute(maybe_uninit_slice) }
-    }
-
-    /// Returns a mutable slice view of the `BumpSlice`, given a
-    /// mutable borrow of the arena.
-    pub fn as_mut_slice<'a>(&'a mut self, arena: &'a mut BumpArena<T>) -> &'a mut [T] {
-        let maybe_uninit_slice =
-            &mut arena.vec[(self.base as usize)..((self.base + self.len) as usize)];
-        // Safety: the index range we represent must be initialized.
-        unsafe { std::mem::transmute(maybe_uninit_slice) }
-    }
-
-    /// Returns the length of the `BumpSlice`.
-    pub fn len(&self) -> usize {
-        self.len as usize
-    }
-}
-
-impl<T> std::default::Default for BumpVec<T> {
-    fn default() -> Self {
-        BumpVec {
-            base: 0,
-            len: 0,
-            cap: 0,
-            _phantom: PhantomData,
-        }
-    }
-}
-
-impl<T> std::default::Default for BumpSlice<T> {
-    fn default() -> Self {
-        BumpSlice {
-            base: 0,
-            len: 0,
-            _phantom: PhantomData,
-        }
-    }
-}
-
-#[cfg(test)]
-mod test {
-    use super::*;
-
-    #[test]
-    fn test_round_up() {
-        assert_eq!(1, round_up_power_of_two(1));
-        assert_eq!(2, round_up_power_of_two(2));
-        assert_eq!(4, round_up_power_of_two(3));
-        assert_eq!(4, round_up_power_of_two(4));
-        assert_eq!(32, round_up_power_of_two(24));
-        assert_eq!(0x8000_0000, round_up_power_of_two(0x7fff_ffff));
-    }
-
-    #[test]
-    fn test_basic() {
-        let mut arena: BumpArena<u32> = BumpArena::new();
-
-        let a = arena.single(1);
-        let b = arena.single(2);
-        let c = arena.single(3);
-        let ab = arena.append(a, b);
-        assert_eq!(ab.as_slice(&arena), &[1, 2]);
-        assert_eq!(ab.cap(), 2);
-        let abc = arena.append(ab, c);
-        assert_eq!(abc.len(), 3);
-        assert_eq!(abc.cap(), 4);
-        assert_eq!(abc.as_slice(&arena), &[1, 2, 3]);
-        assert_eq!(arena.size(), 9);
-        let mut d = arena.single(4);
-        // Should have reused the freelist.
-        assert_eq!(arena.size(), 9);
-        assert_eq!(d.len(), 1);
-        assert_eq!(d.cap(), 1);
-        assert_eq!(d.as_slice(&arena), &[4]);
-        d.as_mut_slice(&mut arena)[0] = 5;
-        assert_eq!(d.as_slice(&arena), &[5]);
-        abc.free(&mut arena);
-        let d2 = d.clone(&mut arena);
-        let dd = arena.append(d, d2);
-        // Should have reused the freelist.
-        assert_eq!(arena.size(), 9);
-        assert_eq!(dd.as_slice(&arena), &[5, 5]);
-        let mut e = arena.from_iter([10, 11, 12].into_iter());
-        e.push(13, &mut arena);
-        assert_eq!(arena.size(), 13);
-        e.reserve(4, &mut arena);
-        assert_eq!(arena.size(), 17);
-        let _f = arena.from_iter([1, 2, 3, 4, 5, 6, 7, 8].into_iter());
-        assert_eq!(arena.size(), 25);
-        e.reserve(8, &mut arena);
-        assert_eq!(e.cap(), 16);
-        assert_eq!(e.as_slice(&arena), &[10, 11, 12, 13]);
-        // `e` must have been copied now that `f` is at the end of the
-        // arena.
-        assert_eq!(arena.size(), 41);
-    }
-}
diff --git a/cranelift/egraph/src/ctxhash.rs b/cranelift/egraph/src/ctxhash.rs
deleted file mode 100644
index f70086a68c..0000000000
--- a/cranelift/egraph/src/ctxhash.rs
+++ /dev/null
@@ -1,281 +0,0 @@
-//! A hashmap with "external hashing": nodes are hashed or compared for
-//! equality only with some external context provided on lookup/insert.
-//! This allows very memory-efficient data structures where
-//! node-internal data references some other storage (e.g., offsets into
-//! an array or pool of shared data).
-
-use super::unionfind::UnionFind;
-use hashbrown::raw::{Bucket, RawTable};
-use std::hash::{Hash, Hasher};
-use std::marker::PhantomData;
-
-/// Trait that allows for equality comparison given some external
-/// context.
-///
-/// Note that this trait is implemented by the *context*, rather than
-/// the item type, for somewhat complex lifetime reasons (lack of GATs
-/// to allow `for<'ctx> Ctx<'ctx>`-like associated types in traits on
-/// the value type).
-///
-/// Furthermore, the `ctx_eq` method includes a `UnionFind` parameter,
-/// because in practice we require this and a borrow to it cannot be
-/// included in the context type without GATs (similarly to above).
-pub trait CtxEq<V1: ?Sized, V2: ?Sized> {
-    /// Determine whether `a` and `b` are equal, given the context in
-    /// `self` and the union-find data structure `uf`.
-    fn ctx_eq(&self, a: &V1, b: &V2, uf: &mut UnionFind) -> bool;
-}
-
-/// Trait that allows for hashing given some external context.
-pub trait CtxHash<Value: ?Sized>: CtxEq<Value, Value> {
-    /// Compute the hash of `value`, given the context in `self` and
-    /// the union-find data structure `uf`.
-    fn ctx_hash(&self, value: &Value, uf: &mut UnionFind) -> u64;
-}
-
-/// A null-comparator context type for underlying value types that
-/// already have `Eq` and `Hash`.
-#[derive(Default)]
-pub struct NullCtx;
-
-impl<V: Eq + Hash> CtxEq<V, V> for NullCtx {
-    fn ctx_eq(&self, a: &V, b: &V, _: &mut UnionFind) -> bool {
-        a.eq(b)
-    }
-}
-impl<V: Eq + Hash> CtxHash<V> for NullCtx {
-    fn ctx_hash(&self, value: &V, _: &mut UnionFind) -> u64 {
-        let mut state = fxhash::FxHasher::default();
-        value.hash(&mut state);
-        state.finish()
-    }
-}
-
-/// A bucket in the hash table.
-///
-/// Some performance-related design notes: we cache the hashcode for
-/// speed, as this often buys a few percent speed in
-/// interning-table-heavy workloads. We only keep the low 32 bits of
-/// the hashcode, for memory efficiency: in common use, `K` and `V`
-/// are often 32 bits also, and a 12-byte bucket is measurably better
-/// than a 16-byte bucket.
-struct BucketData<K, V> {
-    hash: u32,
-    k: K,
-    v: V,
-}
-
-/// A HashMap that takes external context for all operations.
-pub struct CtxHashMap<K, V> {
-    raw: RawTable<BucketData<K, V>>,
-}
-
-impl<K, V> CtxHashMap<K, V> {
-    /// Create an empty hashmap.
-    pub fn new() -> Self {
-        Self {
-            raw: RawTable::new(),
-        }
-    }
-
-    /// Create an empty hashmap with pre-allocated space for the given
-    /// capacity.
-    pub fn with_capacity(capacity: usize) -> Self {
-        Self {
-            raw: RawTable::with_capacity(capacity),
-        }
-    }
-}
-
-impl<K, V> CtxHashMap<K, V> {
-    /// Insert a new key-value pair, returning the old value associated
-    /// with this key (if any).
-    pub fn insert<Ctx: CtxEq<K, K> + CtxHash<K>>(
-        &mut self,
-        k: K,
-        v: V,
-        ctx: &Ctx,
-        uf: &mut UnionFind,
-    ) -> Option<V> {
-        let hash = ctx.ctx_hash(&k, uf) as u32;
-        match self.raw.find(hash as u64, |bucket| {
-            hash == bucket.hash && ctx.ctx_eq(&bucket.k, &k, uf)
-        }) {
-            Some(bucket) => {
-                let data = unsafe { bucket.as_mut() };
-                Some(std::mem::replace(&mut data.v, v))
-            }
-            None => {
-                let data = BucketData { hash, k, v };
-                self.raw
-                    .insert_entry(hash as u64, data, |bucket| bucket.hash as u64);
-                None
-            }
-        }
-    }
-
-    /// Look up a key, returning a borrow of the value if present.
-    pub fn get<'a, Q, Ctx: CtxEq<K, Q> + CtxHash<Q> + CtxHash<K>>(
-        &'a self,
-        k: &Q,
-        ctx: &Ctx,
-        uf: &mut UnionFind,
-    ) -> Option<&'a V> {
-        let hash = ctx.ctx_hash(k, uf) as u32;
-        self.raw
-            .find(hash as u64, |bucket| {
-                hash == bucket.hash && ctx.ctx_eq(&bucket.k, k, uf)
-            })
-            .map(|bucket| {
-                let data = unsafe { bucket.as_ref() };
-                &data.v
-            })
-    }
-
-    /// Return an Entry cursor on a given bucket for a key, allowing
-    /// for fetching the current value or inserting a new one.
-    #[inline(always)]
-    pub fn entry<'a, Ctx: CtxEq<K, K> + CtxHash<K>>(
-        &'a mut self,
-        k: K,
-        ctx: &'a Ctx,
-        uf: &mut UnionFind,
-    ) -> Entry<'a, K, V> {
-        let hash = ctx.ctx_hash(&k, uf) as u32;
-        match self.raw.find(hash as u64, |bucket| {
-            hash == bucket.hash && ctx.ctx_eq(&bucket.k, &k, uf)
-        }) {
-            Some(bucket) => Entry::Occupied(OccupiedEntry {
-                bucket,
-                _phantom: PhantomData,
-            }),
-            None => Entry::Vacant(VacantEntry {
-                raw: &mut self.raw,
-                hash,
-                key: k,
-            }),
-        }
-    }
-}
-
-/// An entry in the hashmap.
-pub enum Entry<'a, K: 'a, V> {
-    Occupied(OccupiedEntry<'a, K, V>),
-    Vacant(VacantEntry<'a, K, V>),
-}
-
-/// An occupied entry.
-pub struct OccupiedEntry<'a, K, V> {
-    bucket: Bucket<BucketData<K, V>>,
-    _phantom: PhantomData<&'a ()>,
-}
-
-impl<'a, K: 'a, V> OccupiedEntry<'a, K, V> {
-    /// Get the value.
-    pub fn get(&self) -> &'a V {
-        let bucket = unsafe { self.bucket.as_ref() };
-        &bucket.v
-    }
-}
-
-/// A vacant entry.
-pub struct VacantEntry<'a, K, V> {
-    raw: &'a mut RawTable<BucketData<K, V>>,
-    hash: u32,
-    key: K,
-}
-
-impl<'a, K, V> VacantEntry<'a, K, V> {
-    /// Insert a value.
-    pub fn insert(self, v: V) -> &'a V {
-        let bucket = self.raw.insert(
-            self.hash as u64,
-            BucketData {
-                hash: self.hash,
-                k: self.key,
-                v,
-            },
-            |bucket| bucket.hash as u64,
-        );
-        let data = unsafe { bucket.as_ref() };
-        &data.v
-    }
-}
-
-#[cfg(test)]
-mod test {
-    use super::*;
-    use std::hash::Hash;
-
-    #[derive(Clone, Copy, Debug)]
-    struct Key {
-        index: u32,
-    }
-    struct Ctx {
-        vals: &'static [&'static str],
-    }
-    impl CtxEq<Key, Key> for Ctx {
-        fn ctx_eq(&self, a: &Key, b: &Key, _: &mut UnionFind) -> bool {
-            self.vals[a.index as usize].eq(self.vals[b.index as usize])
-        }
-    }
-    impl CtxHash<Key> for Ctx {
-        fn ctx_hash(&self, value: &Key, _: &mut UnionFind) -> u64 {
-            let mut state = fxhash::FxHasher::default();
-            self.vals[value.index as usize].hash(&mut state);
-            state.finish()
-        }
-    }
-
-    #[test]
-    fn test_basic() {
-        let ctx = Ctx {
-            vals: &["a", "b", "a"],
-        };
-        let mut uf = UnionFind::new();
-
-        let k0 = Key { index: 0 };
-        let k1 = Key { index: 1 };
-        let k2 = Key { index: 2 };
-
-        assert!(ctx.ctx_eq(&k0, &k2, &mut uf));
-        assert!(!ctx.ctx_eq(&k0, &k1, &mut uf));
-        assert!(!ctx.ctx_eq(&k2, &k1, &mut uf));
-
-        let mut map: CtxHashMap<Key, u64> = CtxHashMap::new();
-        assert_eq!(map.insert(k0, 42, &ctx, &mut uf), None);
-        assert_eq!(map.insert(k2, 84, &ctx, &mut uf), Some(42));
-        assert_eq!(map.get(&k1, &ctx, &mut uf), None);
-        assert_eq!(*map.get(&k0, &ctx, &mut uf).unwrap(), 84);
-    }
-
-    #[test]
-    fn test_entry() {
-        let mut ctx = Ctx {
-            vals: &["a", "b", "a"],
-        };
-        let mut uf = UnionFind::new();
-
-        let k0 = Key { index: 0 };
-        let k1 = Key { index: 1 };
-        let k2 = Key { index: 2 };
-
-        let mut map: CtxHashMap<Key, u64> = CtxHashMap::new();
-        match map.entry(k0, &mut ctx, &mut uf) {
-            Entry::Vacant(v) => {
-                v.insert(1);
-            }
-            _ => panic!(),
-        }
-        match map.entry(k1, &mut ctx, &mut uf) {
-            Entry::Vacant(_) => {}
-            Entry::Occupied(_) => panic!(),
-        }
-        match map.entry(k2, &mut ctx, &mut uf) {
-            Entry::Occupied(o) => {
-                assert_eq!(*o.get(), 1);
-            }
-            _ => panic!(),
-        }
-    }
-}
diff --git a/cranelift/egraph/src/lib.rs b/cranelift/egraph/src/lib.rs
deleted file mode 100644
index e94416c2f2..0000000000
--- a/cranelift/egraph/src/lib.rs
+++ /dev/null
@@ -1,666 +0,0 @@
-//! # ægraph (aegraph, or acyclic e-graph) implementation.
-//!
-//! An aegraph is a form of e-graph. We will first describe the
-//! e-graph, then the aegraph as a slightly less powerful but highly
-//! optimized variant of it.
-//!
-//! The main goal of this library is to be explicitly memory-efficient
-//! and light on allocations. We need to be as fast and as small as
-//! possible in order to minimize impact on compile time in a
-//! production compiler.
-//!
-//! ## The e-graph
-//!
-//! An e-graph, or equivalence graph, is a kind of node-based
-//! intermediate representation (IR) data structure that consists of
-//! *eclasses* and *enodes*. An eclass contains one or more enodes;
-//! semantically an eclass is like a value, and an enode is one way to
-//! compute that value. If several enodes are in one eclass, the data
-//! structure is asserting that any of these enodes, if evaluated,
-//! would produce the value.
-//!
-//! An e-graph also contains a deduplicating hash-map of nodes, so if
-//! the user creates the same e-node more than once, they get the same
-//! e-class ID.
-//!
-//! In the usual use-case, an e-graph is used to build a sea-of-nodes
-//! IR for a function body or other expression-based code, and then
-//! *rewrite rules* are applied to the e-graph. Each rewrite
-//! potentially introduces a new e-node that is equivalent to an
-//! existing e-node, and then unions the two e-nodes' classes
-//! together.
-//!
-//! In the trivial case this results in an e-class containing a series
-//! of e-nodes that are newly added -- all known forms of an
-//! expression -- but Note how if a rewrite rule rewrites into an
-//! existing e-node (discovered via deduplication), rewriting can
-//! result in unioning of two e-classes that have existed for some
-//! time.
-//!
-//! An e-graph's enodes refer to *classes* for their arguments, rather
-//! than other nodes directly. This is key to the ability of an
-//! e-graph to canonicalize: when two e-classes that are already used
-//! as arguments by other e-nodes are unioned, all e-nodes that refer
-//! to those e-classes are themselves re-canonicalized. This can
-//! result in "cascading" unioning of eclasses, in a process that
-//! discovers the transitive implications of all individual
-//! equalities. This process is known as "equality saturation".
-//!
-//! ## The acyclic e-graph (aegraph)
-//!
-//! An e-graph is powerful, but it can also be expensive to build and
-//! saturate: there are often many different forms an expression can
-//! take (because many different rewrites are possible), and cascading
-//! canonicalization requires heavyweight data structure bookkeeping
-//! that is expensive to maintain.
-//!
-//! This crate introduces the aegraph: an acyclic e-graph. This data
-//! structure stores an e-class as an *immutable persistent data
-//! structure*. An id can refer to some *level* of an eclass: a
-//! snapshot of the nodes in the eclass at one point in time. The
-//! nodes referred to by this id never change, though the eclass may
-//! grow later.
-//!
-//! A *union* is also an operation that creates a new eclass id: the
-//! original eclass IDs refer to the original eclass contents, while
-//! the id resulting from the `union()` operation refers to an eclass
-//! that has all nodes.
-//!
-//! In order to allow for adequate canonicalization, an enode normally
-//! stores the *latest* eclass id for each argument, but computes
-//! hashes and equality using a *canonical* eclass id. We define such
-//! a canonical id with a union-find data structure, just as for a
-//! traditional e-graph. It is normally the lowest id referring to
-//! part of the eclass.
-//!
-//! The persistent/immutable nature of this data structure yields one
-//! extremely important property: it is acyclic! This simplifies
-//! operation greatly:
-//!
-//! - When "elaborating" out of the e-graph back to linearized code,
-//!   so that we can generate machine code, we do not need to break
-//!   cycles. A given enode cannot indirectly refer back to itself.
-//!
-//! - When applying rewrite rules, the nodes visible from a given id
-//!   for an eclass never change. This means that we only need to
-//!   apply rewrite rules at that node id *once*.
-//!
-//! ## Data Structure and Example
-//!
-//! Each eclass id refers to a table entry ("eclass node", which is
-//! different than an "enode") that can be one of:
-//!
-//! - A single enode;
-//! - An enode and an earlier eclass id it is appended to (a "child"
-//!   eclass node);
-//! - A "union node" with two earlier eclass ids.
-//!
-//! Building the aegraph consists solely of adding new entries to the
-//! end of this table of eclass nodes. An enode referenced from any
-//! given eclass node can only refer to earlier eclass ids.
-//!
-//! For example, consider the following eclass table:
-//!
-//! ```plain
-//!
-//!    eclass/enode table
-//!
-//!     eclass1    iconst(1)
-//!     eclass2    blockparam(block0, 0)
-//!     eclass3    iadd(eclass1, eclass2)
-//! ```
-//!
-//! This represents the expression `iadd(blockparam(block0, 0),
-//! iconst(1))` (as the sole enode for eclass3).
-//!
-//! Now, say that as we further build the function body, we add
-//! another enode `iadd(eclass3, iconst(1))`. The `iconst(1)` will be
-//! deduplicated to `eclass1`, and the toplevel `iadd` will become its
-//! own new eclass (`eclass4`).
-//!
-//! ```plain
-//!     eclass4    iadd(eclass3, eclass1)
-//! ```
-//!
-//! Now we apply our body of rewrite rules, and these results can
-//! combine `x + 1 + 1` into `x + 2`; so we get:
-//!
-//! ```plain
-//!     eclass5    iconst(2)
-//!     eclass6    union(iadd(eclass2, eclass5), eclass4)
-//! ```
-//!
-//! Note that we added the nodes for the new expression, and then we
-//! union'd it with the earlier `eclass4`. Logically this represents a
-//! single eclass that contains two nodes -- the `x + 1 + 1` and `x +
-//! 2` representations -- and the *latest* id for the eclass,
-//! `eclass6`, can reach all nodes in the eclass (here the node stored
-//! in `eclass6` and the earlier one in `elcass4`).
-//!
-//! ## aegraph vs. egraph
-//!
-//! Where does an aegraph fall short of an e-graph -- or in other
-//! words, why maintain the data structures to allow for full
-//! (re)canonicalization at all, with e.g. parent pointers to
-//! recursively update parents?
-//!
-//! This question deserves further study, but right now, it appears
-//! that the difference is limited to a case like the following:
-//!
-//! - expression E1 is interned into the aegraph.
-//! - expression E2 is interned into the aegraph. It uses E1 as an
-//!   argument to one or more operators, and so refers to the
-//!   (currently) latest id for E1.
-//! - expression E3 is interned into the aegraph. A rewrite rule fires
-//!   that unions E3 with E1.
-//!
-//! In an e-graph, the last action would trigger a re-canonicalization
-//! of all "parents" (users) of E1; so E2 would be re-canonicalized
-//! using an id that represents the union of E1 and E3. At
-//! code-generation time, E2 could choose to use a value computed by
-//! either E1's or E3's operator. In an aegraph, this is not the case:
-//! E2's e-class and e-nodes are immutable once created, so E2 refers
-//! only to E1's representation of the value (a "slice" of the whole
-//! e-class).
-//!
-//! While at first this sounds quite limiting, there actually appears
-//! to be a nice mutually-beneficial interaction with the immediate
-//! application of rewrite rules: by applying all rewrites we know
-//! about right when E1 is interned, E2 can refer to the best version
-//! when it is created. The above scenario only leads to a missed
-//! optimization if:
-//!
-//! - a rewrite rule exists from E3 to E1, but not E1 to E3; and
-//! - E3 is *cheaper* than E1.
-//!
-//! Or in other words, this only matters if there is a rewrite rule
-//! that rewrites into a more expensive direction. This is unlikely
-//! for the sorts of rewrite rules we plan to write; it may matter
-//! more if many possible equalities are expressed, such as
-//! associativity, commutativity, etc.
-//!
-//! Note that the above represents the best of our understanding, but
-//! there may be cases we have missed; a more complete examination of
-//! this question would involve building a full equality saturation
-//! loop on top of the (a)egraph in this crate, and testing with many
-//! benchmarks to see if it makes any difference.
-//!
-//! ## Rewrite Rules (FLAX: Fast Localized Aegraph eXpansion)
-//!
-//! The most common use of an e-graph or aegraph is to serve as the IR
-//! for a compiler. In this use-case, we usually wish to transform the
-//! program using a body of rewrite rules that represent valid
-//! transformations (equivalent and hopefully simpler ways of
-//! computing results). An aegraph supports applying rules in a fairly
-//! straightforward way: whenever a new eclass entry is added to the
-//! table, we invoke a toplevel "apply all rewrite rules" entry
-//! point. This entry point creates new nodes as needed, and when
-//! done, unions the rewritten nodes with the original. We thus
-//! *immediately* expand a new value into all of its representations.
-//!
-//! This immediate expansion stands in contrast to a traditional
-//! "equality saturation" e-egraph system, in which it is usually best
-//! to apply rules in batches and then fix up the
-//! canonicalization. This approach was introduced in the `egg`
-//! e-graph engine [^1]. We call our system FLAX (because flax is an
-//! alternative to egg): Fast Localized Aegraph eXpansion.
-//!
-//! The reason that this is possible in an aegraph but not
-//! (efficiently, at least) in a traditional e-graph is that the data
-//! structure nodes are immutable once created: an eclass id will
-//! always refer to a fixed set of enodes. There is no
-//! recanonicalizing of eclass arguments as they union; but also this
-//! is not usually necessary, because args will have already been
-//! processed and eagerly rewritten as well. In other words, eager
-//! rewriting and the immutable data structure mutually allow each
-//! other to be practical; both work together.
-//!
-//! [^1]: M Willsey, C Nandi, Y R Wang, O Flatt, Z Tatlock, P
-//!       Panchekha. "egg: Fast and Flexible Equality Saturation." In
-//!       POPL 2021. <https://dl.acm.org/doi/10.1145/3434304>
-
-use cranelift_entity::PrimaryMap;
-use cranelift_entity::{entity_impl, packed_option::ReservedValue, SecondaryMap};
-use smallvec::{smallvec, SmallVec};
-use std::fmt::Debug;
-use std::hash::Hash;
-use std::marker::PhantomData;
-
-mod bumpvec;
-mod ctxhash;
-mod unionfind;
-
-pub use bumpvec::{BumpArena, BumpSlice, BumpVec};
-pub use ctxhash::{CtxEq, CtxHash, CtxHashMap, Entry};
-pub use unionfind::UnionFind;
-
-/// An eclass ID.
-#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
-pub struct Id(u32);
-entity_impl!(Id, "eclass");
-
-impl Id {
-    pub fn invalid() -> Id {
-        Self::reserved_value()
-    }
-}
-impl std::default::Default for Id {
-    fn default() -> Self {
-        Self::invalid()
-    }
-}
-
-/// A trait implemented by all "languages" (types that can be enodes).
-pub trait Language: CtxEq<Self::Node, Self::Node> + CtxHash<Self::Node> {
-    type Node: Debug;
-    fn children<'a>(&'a self, node: &'a Self::Node) -> &'a [Id];
-    fn children_mut<'a>(&'a mut self, ctx: &'a mut Self::Node) -> &'a mut [Id];
-    fn needs_dedup(&self, node: &Self::Node) -> bool;
-}
-
-/// A trait that allows the aegraph to compute a property of each
-/// node as it is created.
-pub trait Analysis {
-    type L: Language;
-    type Value: Clone + Default;
-    fn for_node(
-        &self,
-        ctx: &Self::L,
-        n: &<Self::L as Language>::Node,
-        values: &SecondaryMap<Id, Self::Value>,
-    ) -> Self::Value;
-    fn meet(&self, ctx: &Self::L, v1: &Self::Value, v2: &Self::Value) -> Self::Value;
-}
-
-/// Conditionally-compiled trace-log macro. (Borrowed from
-/// `cranelift-codegen`; it's not worth factoring out a common
-/// subcrate for this.)
-#[macro_export]
-macro_rules! trace {
-    ($($tt:tt)*) => {
-        if cfg!(feature = "trace-log") {
-            ::log::trace!($($tt)*);
-        }
-    };
-}
-
-/// An egraph.
-pub struct EGraph<L: Language, A: Analysis<L = L>> {
-    /// Node-allocation arena.
-    pub nodes: Vec<L::Node>,
-    /// Hash-consing map from Nodes to eclass IDs.
-    node_map: CtxHashMap<NodeKey, Id>,
-    /// Eclass definitions. Each eclass consists of an enode, and
-    /// child pointer to the rest of the eclass.
-    pub classes: PrimaryMap<Id, EClass>,
-    /// Union-find for canonical ID generation. This lets us name an
-    /// eclass with a canonical ID that is the same for all
-    /// generations of the class.
-    pub unionfind: UnionFind,
-    /// Analysis and per-node state.
-    pub analysis: Option<(A, SecondaryMap<Id, A::Value>)>,
-}
-
-/// A reference to a node.
-#[derive(Clone, Copy, Debug)]
-pub struct NodeKey {
-    index: u32,
-}
-
-impl NodeKey {
-    fn from_node_idx(node_idx: usize) -> NodeKey {
-        NodeKey {
-            index: u32::try_from(node_idx).unwrap(),
-        }
-    }
-
-    /// Get the node for this NodeKey, given the `nodes` from the
-    /// appropriate `EGraph`.
-    pub fn node<'a, N>(&self, nodes: &'a [N]) -> &'a N {
-        &nodes[self.index as usize]
-    }
-
-    fn bits(self) -> u32 {
-        self.index
-    }
-
-    fn from_bits(bits: u32) -> Self {
-        NodeKey { index: bits }
-    }
-}
-
-struct NodeKeyCtx<'a, 'b, L: Language> {
-    nodes: &'a [L::Node],
-    node_ctx: &'b L,
-}
-
-impl<'a, 'b, L: Language> CtxEq<NodeKey, NodeKey> for NodeKeyCtx<'a, 'b, L> {
-    fn ctx_eq(&self, a: &NodeKey, b: &NodeKey, uf: &mut UnionFind) -> bool {
-        let a = a.node(self.nodes);
-        let b = b.node(self.nodes);
-        self.node_ctx.ctx_eq(a, b, uf)
-    }
-}
-
-impl<'a, 'b, L: Language> CtxHash<NodeKey> for NodeKeyCtx<'a, 'b, L> {
-    fn ctx_hash(&self, value: &NodeKey, uf: &mut UnionFind) -> u64 {
-        self.node_ctx.ctx_hash(value.node(self.nodes), uf)
-    }
-}
-
-/// An EClass entry. Contains either a single new enode and a child
-/// eclass (i.e., adds one new enode), or unions two child eclasses
-/// together.
-#[derive(Debug, Clone, Copy)]
-pub struct EClass {
-    // formats:
-    //
-    // 00 | unused  (31 bits)        | NodeKey (31 bits)
-    // 01 | eclass_child   (31 bits) | NodeKey (31 bits)
-    // 10 | eclass_child_1 (31 bits) | eclass_child_id_2 (31 bits)
-    bits: u64,
-}
-
-impl EClass {
-    fn node(node: NodeKey) -> EClass {
-        let node_idx = node.bits() as u64;
-        debug_assert!(node_idx < (1 << 31));
-        EClass {
-            bits: (0b00 << 62) | node_idx,
-        }
-    }
-
-    fn node_and_child(node: NodeKey, eclass_child: Id) -> EClass {
-        let node_idx = node.bits() as u64;
-        debug_assert!(node_idx < (1 << 31));
-        debug_assert!(eclass_child != Id::invalid());
-        let child = eclass_child.0 as u64;
-        debug_assert!(child < (1 << 31));
-        EClass {
-            bits: (0b01 << 62) | (child << 31) | node_idx,
-        }
-    }
-
-    fn union(child1: Id, child2: Id) -> EClass {
-        debug_assert!(child1 != Id::invalid());
-        let child1 = child1.0 as u64;
-        debug_assert!(child1 < (1 << 31));
-
-        debug_assert!(child2 != Id::invalid());
-        let child2 = child2.0 as u64;
-        debug_assert!(child2 < (1 << 31));
-
-        EClass {
-            bits: (0b10 << 62) | (child1 << 31) | child2,
-        }
-    }
-
-    /// Get the node, if any, from a node-only or node-and-child
-    /// eclass.
-    pub fn get_node(&self) -> Option<NodeKey> {
-        self.as_node()
-            .or_else(|| self.as_node_and_child().map(|(node, _)| node))
-    }
-
-    /// Get the first child, if any.
-    pub fn child1(&self) -> Option<Id> {
-        self.as_node_and_child()
-            .map(|(_, p1)| p1)
-            .or(self.as_union().map(|(p1, _)| p1))
-    }
-
-    /// Get the second child, if any.
-    pub fn child2(&self) -> Option<Id> {
-        self.as_union().map(|(_, p2)| p2)
-    }
-
-    /// If this EClass is just a lone enode, return it.
-    pub fn as_node(&self) -> Option<NodeKey> {
-        if (self.bits >> 62) == 0b00 {
-            let node_idx = (self.bits & ((1 << 31) - 1)) as u32;
-            Some(NodeKey::from_bits(node_idx))
-        } else {
-            None
-        }
-    }
-
-    /// If this EClass is one new enode and a child, return the node
-    /// and child ID.
-    pub fn as_node_and_child(&self) -> Option<(NodeKey, Id)> {
-        if (self.bits >> 62) == 0b01 {
-            let node_idx = (self.bits & ((1 << 31) - 1)) as u32;
-            let child = ((self.bits >> 31) & ((1 << 31) - 1)) as u32;
-            Some((NodeKey::from_bits(node_idx), Id::from_bits(child)))
-        } else {
-            None
-        }
-    }
-
-    /// If this EClass is the union variety, return the two child
-    /// EClasses. Both are guaranteed not to be `Id::invalid()`.
-    pub fn as_union(&self) -> Option<(Id, Id)> {
-        if (self.bits >> 62) == 0b10 {
-            let child1 = ((self.bits >> 31) & ((1 << 31) - 1)) as u32;
-            let child2 = (self.bits & ((1 << 31) - 1)) as u32;
-            Some((Id::from_bits(child1), Id::from_bits(child2)))
-        } else {
-            None
-        }
-    }
-}
-
-/// A new or existing `T` when adding to a deduplicated set or data
-/// structure, like an egraph.
-#[derive(Clone, Copy, Debug)]
-pub enum NewOrExisting<T> {
-    New(T),
-    Existing(T),
-}
-
-impl<T> NewOrExisting<T> {
-    /// Get the underlying value.
-    pub fn get(self) -> T {
-        match self {
-            NewOrExisting::New(t) => t,
-            NewOrExisting::Existing(t) => t,
-        }
-    }
-}
-
-impl<L: Language, A: Analysis<L = L>> EGraph<L, A>
-where
-    L::Node: 'static,
-{
-    /// Create a new aegraph.
-    pub fn new(analysis: Option<A>) -> Self {
-        let analysis = analysis.map(|a| (a, SecondaryMap::new()));
-        Self {
-            nodes: vec![],
-            node_map: CtxHashMap::new(),
-            classes: PrimaryMap::new(),
-            unionfind: UnionFind::new(),
-            analysis,
-        }
-    }
-
-    /// Create a new aegraph with the given capacity.
-    pub fn with_capacity(nodes: usize, analysis: Option<A>) -> Self {
-        let analysis = analysis.map(|a| (a, SecondaryMap::with_capacity(nodes)));
-        Self {
-            nodes: Vec::with_capacity(nodes),
-            node_map: CtxHashMap::with_capacity(nodes),
-            classes: PrimaryMap::with_capacity(nodes),
-            unionfind: UnionFind::with_capacity(nodes),
-            analysis,
-        }
-    }
-
-    /// Add a new node.
-    pub fn add(&mut self, node: L::Node, node_ctx: &L) -> NewOrExisting<Id> {
-        // Push the node. We can then build a NodeKey that refers to
-        // it and look for an existing interned copy. If one exists,
-        // we can pop the pushed node and return the existing Id.
-        let node_idx = self.nodes.len();
-        trace!("adding node: {:?}", node);
-        let needs_dedup = node_ctx.needs_dedup(&node);
-        self.nodes.push(node);
-
-        let key = NodeKey::from_node_idx(node_idx);
-        if needs_dedup {
-            let ctx = NodeKeyCtx {
-                nodes: &self.nodes[..],
-                node_ctx,
-            };
-
-            match self.node_map.entry(key, &ctx, &mut self.unionfind) {
-                Entry::Occupied(o) => {
-                    let eclass_id = *o.get();
-                    self.nodes.pop();
-                    trace!(" -> existing id {}", eclass_id);
-                    NewOrExisting::Existing(eclass_id)
-                }
-                Entry::Vacant(v) => {
-                    // We're creating a new eclass now.
-                    let eclass_id = self.classes.push(EClass::node(key));
-                    trace!(" -> new node and eclass: {}", eclass_id);
-                    self.unionfind.add(eclass_id);
-
-                    // Add to interning map with a NodeKey referring to the eclass.
-                    v.insert(eclass_id);
-
-                    // Update analysis.
-                    let node_ctx = ctx.node_ctx;
-                    self.update_analysis_new(node_ctx, eclass_id, key);
-
-                    NewOrExisting::New(eclass_id)
-                }
-            }
-        } else {
-            let eclass_id = self.classes.push(EClass::node(key));
-            self.unionfind.add(eclass_id);
-            NewOrExisting::New(eclass_id)
-        }
-    }
-
-    /// Merge one eclass into another, maintaining the acyclic
-    /// property (args must have lower eclass Ids than the eclass
-    /// containing the node with those args). Returns the Id of the
-    /// merged eclass.
-    pub fn union(&mut self, ctx: &L, a: Id, b: Id) -> Id {
-        assert_ne!(a, Id::invalid());
-        assert_ne!(b, Id::invalid());
-        let (a, b) = (std::cmp::max(a, b), std::cmp::min(a, b));
-        trace!("union: id {} and id {}", a, b);
-        if a == b {
-            trace!(" -> no-op");
-            return a;
-        }
-
-        self.unionfind.union(a, b);
-
-        // If the younger eclass has no child, we can link it
-        // directly and return that eclass. Otherwise, we create a new
-        // union eclass.
-        if let Some(node) = self.classes[a].as_node() {
-            trace!(
-                " -> id {} is one-node eclass; making into node-and-child with id {}",
-                a,
-                b
-            );
-            self.classes[a] = EClass::node_and_child(node, b);
-            self.update_analysis_union(ctx, a, a, b);
-            return a;
-        }
-
-        let u = self.classes.push(EClass::union(a, b));
-        self.unionfind.add(u);
-        self.unionfind.union(u, b);
-        trace!(" -> union id {} and id {} into id {}", a, b, u);
-        self.update_analysis_union(ctx, u, a, b);
-        u
-    }
-
-    /// Get the canonical ID for an eclass. This may be an older
-    /// generation, so will not be able to see all enodes in the
-    /// eclass; but it will allow us to unambiguously refer to an
-    /// eclass, even across merging.
-    pub fn canonical_id_mut(&mut self, eclass: Id) -> Id {
-        self.unionfind.find_and_update(eclass)
-    }
-
-    /// Get the canonical ID for an eclass. This may be an older
-    /// generation, so will not be able to see all enodes in the
-    /// eclass; but it will allow us to unambiguously refer to an
-    /// eclass, even across merging.
-    pub fn canonical_id(&self, eclass: Id) -> Id {
-        self.unionfind.find(eclass)
-    }
-
-    /// Get the enodes for a given eclass.
-    pub fn enodes(&self, eclass: Id) -> NodeIter<L, A> {
-        NodeIter {
-            stack: smallvec![eclass],
-            _phantom1: PhantomData,
-            _phantom2: PhantomData,
-        }
-    }
-
-    /// Update analysis for a given eclass node (new-enode case).
-    fn update_analysis_new(&mut self, ctx: &L, eclass: Id, node: NodeKey) {
-        if let Some((analysis, state)) = self.analysis.as_mut() {
-            let node = node.node(&self.nodes);
-            state[eclass] = analysis.for_node(ctx, node, state);
-        }
-    }
-
-    /// Update analysis for a given eclass node (union case).
-    fn update_analysis_union(&mut self, ctx: &L, eclass: Id, a: Id, b: Id) {
-        if let Some((analysis, state)) = self.analysis.as_mut() {
-            let a = &state[a];
-            let b = &state[b];
-            state[eclass] = analysis.meet(ctx, a, b);
-        }
-    }
-
-    /// Get the analysis value for a given eclass. Panics if no analysis is present.
-    pub fn analysis_value(&self, eclass: Id) -> &A::Value {
-        &self.analysis.as_ref().unwrap().1[eclass]
-    }
-}
-
-/// An iterator over all nodes in an eclass.
-///
-/// Because eclasses are immutable once created, this does *not* need
-/// to hold an open borrow on the egraph; it is free to add new nodes,
-/// while our existing Ids will remain valid.
-pub struct NodeIter<L: Language, A: Analysis<L = L>> {
-    stack: SmallVec<[Id; 8]>,
-    _phantom1: PhantomData<L>,
-    _phantom2: PhantomData<A>,
-}
-
-impl<L: Language, A: Analysis<L = L>> NodeIter<L, A> {
-    #[inline(always)]
-    pub fn next<'a>(&mut self, egraph: &'a EGraph<L, A>) -> Option<&'a L::Node> {
-        while let Some(next) = self.stack.pop() {
-            let eclass = egraph.classes[next];
-            if let Some(node) = eclass.as_node() {
-                return Some(&egraph.nodes[node.index as usize]);
-            } else if let Some((node, child)) = eclass.as_node_and_child() {
-                if child != Id::invalid() {
-                    self.stack.push(child);
-                }
-                return Some(&egraph.nodes[node.index as usize]);
-            } else if let Some((child1, child2)) = eclass.as_union() {
-                debug_assert!(child1 != Id::invalid());
-                debug_assert!(child2 != Id::invalid());
-                self.stack.push(child2);
-                self.stack.push(child1);
-                continue;
-            } else {
-                unreachable!("Invalid eclass format");
-            }
-        }
-        None
-    }
-}
diff --git a/cranelift/egraph/src/unionfind.rs b/cranelift/egraph/src/unionfind.rs
deleted file mode 100644
index dd90fc8c23..0000000000
--- a/cranelift/egraph/src/unionfind.rs
+++ /dev/null
@@ -1,85 +0,0 @@
-//! Simple union-find data structure.
-
-use crate::{trace, Id};
-use cranelift_entity::SecondaryMap;
-use std::hash::{Hash, Hasher};
-
-/// A union-find data structure. The data structure can allocate
-/// `Id`s, indicating eclasses, and can merge eclasses together.
-#[derive(Clone, Debug)]
-pub struct UnionFind {
-    parent: SecondaryMap<Id, Id>,
-}
-
-impl UnionFind {
-    /// Create a new `UnionFind`.
-    pub fn new() -> Self {
-        UnionFind {
-            parent: SecondaryMap::new(),
-        }
-    }
-
-    /// Create a new `UnionFind` with the given capacity.
-    pub fn with_capacity(cap: usize) -> Self {
-        UnionFind {
-            parent: SecondaryMap::with_capacity(cap),
-        }
-    }
-
-    /// Add an `Id` to the `UnionFind`, with its own equivalence class
-    /// initially. All `Id`s must be added before being queried or
-    /// unioned.
-    pub fn add(&mut self, id: Id) {
-        self.parent[id] = id;
-    }
-
-    /// Find the canonical `Id` of a given `Id`.
-    pub fn find(&self, mut node: Id) -> Id {
-        while node != self.parent[node] {
-            node = self.parent[node];
-        }
-        node
-    }
-
-    /// Find the canonical `Id` of a given `Id`, updating the data
-    /// structure in the process so that future queries for this `Id`
-    /// (and others in its chain up to the root of the equivalence
-    /// class) will be faster.
-    pub fn find_and_update(&mut self, mut node: Id) -> Id {
-        // "Path splitting" mutating find (Tarjan and Van Leeuwen).
-        let orig = node;
-        while node != self.parent[node] {
-            let next = self.parent[self.parent[node]];
-            self.parent[node] = next;
-            node = next;
-        }
-        trace!("find_and_update: {} -> {}", orig, node);
-        node
-    }
-
-    /// Merge the equivalence classes of the two `Id`s.
-    pub fn union(&mut self, a: Id, b: Id) {
-        let a = self.find_and_update(a);
-        let b = self.find_and_update(b);
-        let (a, b) = (std::cmp::min(a, b), std::cmp::max(a, b));
-        if a != b {
-            // Always canonicalize toward lower IDs.
-            self.parent[b] = a;
-            trace!("union: {}, {}", a, b);
-        }
-    }
-
-    /// Determine if two `Id`s are equivalent, after
-    /// canonicalizing. Update union-find data structure during our
-    /// canonicalization to make future lookups faster.
-    pub fn equiv_id_mut(&mut self, a: Id, b: Id) -> bool {
-        self.find_and_update(a) == self.find_and_update(b)
-    }
-
-    /// Hash an `Id` after canonicalizing it. Update union-find data
-    /// structure to make future lookups/hashing faster.
-    pub fn hash_id_mut<H: Hasher>(&mut self, hash: &mut H, id: Id) {
-        let id = self.find_and_update(id);
-        id.hash(hash);
-    }
-}
diff --git a/cranelift/filetests/filetests/egraph/algebraic.clif b/cranelift/filetests/filetests/egraph/algebraic.clif
index 348dbf7212..6eaa6fcda9 100644
--- a/cranelift/filetests/filetests/egraph/algebraic.clif
+++ b/cranelift/filetests/filetests/egraph/algebraic.clif
@@ -7,8 +7,8 @@ function %f0(i32) -> i32 {
 block0(v0: i32):
     v1 = iconst.i32 2
     v2 = imul v0, v1
-    ; check: v1 = iadd v0, v0
-    ; nextln: return v1
+    ; check: v3 = iadd v0, v0
+    ; check: return v3
     return v2
 }
 
@@ -17,6 +17,6 @@ block0:
   v0 = iconst.i32 0xffff_ffff_9876_5432
   v1 = uextend.i64 v0
   return v1
-  ; check: v0 = iconst.i64 0x9876_5432
-  ; nextln: return v0  ; v0 = 0x9876_5432
+  ; check: v2 = iconst.i64 0x9876_5432
+  ; check: return v2  ; v2 = 0x9876_5432
 }
diff --git a/cranelift/filetests/filetests/egraph/alias_analysis.clif b/cranelift/filetests/filetests/egraph/alias_analysis.clif
index 340455dfad..ce78431469 100644
--- a/cranelift/filetests/filetests/egraph/alias_analysis.clif
+++ b/cranelift/filetests/filetests/egraph/alias_analysis.clif
@@ -16,7 +16,7 @@ block0(v0: i64):
     return v7
 }
 
-; check: v1 = load.i64 heap v0
-; nextln: store v0, v1
-; nextln: v2 = load.i64 v0
-; nextln: return v2
+; check: v3 = load.i64 heap v0
+; check: store v0, v3
+; check: v7 = load.i64 v0
+; check: return v7
diff --git a/cranelift/filetests/filetests/egraph/basic-gvn.clif b/cranelift/filetests/filetests/egraph/basic-gvn.clif
index d8023f0ac9..7b38786228 100644
--- a/cranelift/filetests/filetests/egraph/basic-gvn.clif
+++ b/cranelift/filetests/filetests/egraph/basic-gvn.clif
@@ -21,9 +21,9 @@ block2(v6: i32):
 ;; Check that the `iadd` for `v4` is subsumed by `v2`:
 
 ; check: block0(v0: i32, v1: i32):
-; nextln:     v2 = iadd v0, v1
+; check:      v2 = iadd v0, v1
 ; check:  block1:
-; nextln:     v3 = iadd.i32 v2, v0
-; nextln:     return v3
+; check:      v5 = iadd.i32 v2, v0
+; nextln:     return v5
 ; check: block2:
 ; nextln:    return v1
diff --git a/cranelift/filetests/filetests/egraph/licm.clif b/cranelift/filetests/filetests/egraph/licm.clif
index 233763d9e6..a6f4585567 100644
--- a/cranelift/filetests/filetests/egraph/licm.clif
+++ b/cranelift/filetests/filetests/egraph/licm.clif
@@ -26,15 +26,15 @@ block2(v9: i32):
 
 ; check:  block1(v2: i32):
 ;; constants are not lifted; they are rematerialized in each block where used
-; nextln:     v3 = iconst.i32 40
-; nextln:     v4 = icmp eq v2, v3
-; nextln:     v5 = iconst.i32 1
-; nextln:     v6 = iadd v2, v5
-; nextln:     brnz v4, block2
-; nextln:     jump block1(v6)
+; check:      v5 = iconst.i32 40
+; check:      v6 = icmp eq v2, v5
+; check:      v3 = iconst.i32 1
+; check:      v8 = iadd v2, v3
+; check:      brnz v6, block2
+; check:      jump block1(v8)
 
 ; check:  block2:
-; nextln:     v7 = iconst.i32 1
-; nextln:     v8 = iadd.i32 v1, v7
-; nextln:     return v8
+; check:      v10 = iconst.i32 1
+; check:      v4 = iadd.i32 v1, v10
+; check:      return v4
 
diff --git a/cranelift/filetests/filetests/egraph/misc.clif b/cranelift/filetests/filetests/egraph/misc.clif
index 33b4c88197..668c643cd5 100644
--- a/cranelift/filetests/filetests/egraph/misc.clif
+++ b/cranelift/filetests/filetests/egraph/misc.clif
@@ -15,7 +15,7 @@ block0(v0: i64):
 ; check: function %stack_load(i64) -> i64 fast {
 ; nextln:    ss0 = explicit_slot 8
 ; check:  block0(v0: i64):
-; nextln:     v1 = stack_addr.i64 ss0
-; nextln:     store notrap aligned v0, v1
+; nextln:     v2 = stack_addr.i64 ss0
+; nextln:     store notrap aligned v0, v2
 ; nextln:     return v0
 ; nextln: }
diff --git a/cranelift/filetests/filetests/egraph/remat.clif b/cranelift/filetests/filetests/egraph/remat.clif
index 0df7db6141..69289b7cdf 100644
--- a/cranelift/filetests/filetests/egraph/remat.clif
+++ b/cranelift/filetests/filetests/egraph/remat.clif
@@ -20,16 +20,16 @@ block2:
 }
 
 ; check:  block0(v0: i32):
-; nextln:     v1 = iconst.i32 42
-; nextln:     v2 = iadd v0, v1
-; nextln:     brnz v2, block1
-; nextln:     jump block2
+; check:      v1 = iconst.i32 42
+; check:      v2 = iadd v0, v1
+; check:      brnz v2, block1
+; check:      jump block2
 ; check:   block1:
-; nextln:     v5 = iconst.i32 126
-; nextln:     v6 = iadd.i32 v0, v5
-; nextln:     return v6
+; check:      v11 = iconst.i32 126
+; check:      v13 = iadd.i32 v0, v11
+; check:      return v13
 ; check:   block2:
-; nextln:     v3 = iconst.i32 42
-; nextln:     v4 = iadd.i32 v0, v3
-; nextln:     return v4
+; check:      v15 = iconst.i32 42
+; check:      v16 = iadd.i32 v0, v15
+; check:      return v16
 
diff --git a/cranelift/preopt/src/constant_folding.rs b/cranelift/preopt/src/constant_folding.rs
index 1faf22e927..59432255cc 100644
--- a/cranelift/preopt/src/constant_folding.rs
+++ b/cranelift/preopt/src/constant_folding.rs
@@ -67,6 +67,7 @@ fn resolve_value_to_imm(dfg: &ir::DataFlowGraph, value: ir::Value) -> Option<Con
     let inst = match dfg.value_def(original) {
         ValueDef::Result(inst, _) => inst,
         ValueDef::Param(_, _) => return None,
+        ValueDef::Union(_, _) => return None,
     };
 
     use self::ir::{InstructionData::*, Opcode::*};