Merge pull request #3506 from fitzgen/isle

Initial ISLE integration for x64
2021-11-15 15:38:09 -08:00
parent 37adfb898b 0b1bf104c6
commit b38a96955c
56 changed files with 16206 additions and 1192 deletions
--- a/cranelift/codegen/Cargo.toml
+++ b/cranelift/codegen/Cargo.toml
@@ -39,6 +39,8 @@ criterion = "0.3"

 [build-dependencies]
 cranelift-codegen-meta = { path = "meta", version = "0.78.0" }
+isle = { path = "../isle/isle", version = "0.78.0", optional = true }
+miette = { version = "3", features = ["fancy"] }

 [features]
 default = ["std", "unwind"]
@@ -98,6 +100,9 @@ enable-peepmatic = ["peepmatic-runtime", "peepmatic-traits", "serde"]
 # Enable support for the Souper harvester.
 souper-harvest = ["souper-ir", "souper-ir/stringify"]

+# Recompile ISLE DSL source files into their generated Rust code.
+rebuild-isle = ["isle", "cranelift-codegen-meta/rebuild-isle"]
+
 [badges]
 maintenance = { status = "experimental" }

--- a/cranelift/codegen/build.rs
+++ b/cranelift/codegen/build.rs
@@ -46,9 +46,12 @@ fn main() {
        isa_targets
    };

+    let cur_dir = env::current_dir().expect("Can't access current working directory");
+    let crate_dir = cur_dir.as_path();
+
    println!("cargo:rerun-if-changed=build.rs");

-    if let Err(err) = meta::generate(&isas, &out_dir) {
+    if let Err(err) = meta::generate(&isas, &out_dir, crate_dir) {
        eprintln!("Error: {}", err);
        process::exit(1);
    }
@@ -74,6 +77,19 @@ fn main() {
        .unwrap()
    }

+    #[cfg(feature = "rebuild-isle")]
+    {
+        if let Err(e) = rebuild_isle(crate_dir) {
+            eprintln!("Error building ISLE files: {:?}", e);
+            let mut source = e.source();
+            while let Some(e) = source {
+                eprintln!("{:?}", e);
+                source = e.source();
+            }
+            std::process::abort();
+        }
+    }
+
    let pkg_version = env::var("CARGO_PKG_VERSION").unwrap();
    let mut cmd = std::process::Command::new("git");
    cmd.arg("rev-parse")
@@ -110,3 +126,136 @@ fn main() {
    )
    .unwrap();
 }
+
+/// Rebuild ISLE DSL source text into generated Rust code.
+///
+/// NB: This must happen *after* the `cranelift-codegen-meta` functions, since
+/// it consumes files generated by them.
+#[cfg(feature = "rebuild-isle")]
+fn rebuild_isle(crate_dir: &std::path::Path) -> Result<(), Box<dyn std::error::Error + 'static>> {
+    use std::sync::Once;
+    static SET_MIETTE_HOOK: Once = Once::new();
+    SET_MIETTE_HOOK.call_once(|| {
+        let _ = miette::set_hook(Box::new(|_| {
+            Box::new(
+                miette::MietteHandlerOpts::new()
+                    // This is necessary for `miette` to properly display errors
+                    // until https://github.com/zkat/miette/issues/93 is fixed.
+                    .force_graphical(true)
+                    .build(),
+            )
+        }));
+    });
+
+    let clif_isle = crate_dir.join("src").join("clif.isle");
+    let prelude_isle = crate_dir.join("src").join("prelude.isle");
+    let src_isa_x64 = crate_dir.join("src").join("isa").join("x64");
+
+    // This is a set of ISLE compilation units.
+    //
+    // The format of each entry is:
+    //
+    //     (output Rust code file, input ISLE source files)
+    //
+    // There should be one entry for each backend that uses ISLE for lowering,
+    // and if/when we replace our peephole optimization passes with ISLE, there
+    // should be an entry for each of those as well.
+    let isle_compilations = vec![
+        // The x86-64 instruction selector.
+        (
+            src_isa_x64
+                .join("lower")
+                .join("isle")
+                .join("generated_code.rs"),
+            vec![
+                clif_isle,
+                prelude_isle,
+                src_isa_x64.join("inst.isle"),
+                src_isa_x64.join("lower.isle"),
+            ],
+        ),
+    ];
+
+    let cur_dir = std::env::current_dir()?;
+    for (out_file, mut files) in isle_compilations {
+        for file in files.iter_mut() {
+            println!("cargo:rerun-if-changed={}", file.display());
+
+            // Strip the current directory from the file paths, because `islec`
+            // includes them in the generated source, and this helps us maintain
+            // deterministic builds that don't include those local file paths.
+            if let Ok(suffix) = file.strip_prefix(&cur_dir) {
+                *file = suffix.to_path_buf();
+            }
+        }
+
+        let code = (|| {
+            let lexer = isle::lexer::Lexer::from_files(files)?;
+            let defs = isle::parser::parse(lexer)?;
+            isle::compile::compile(&defs)
+        })()
+        .map_err(|e| {
+            // Make sure to include the source snippets location info along with
+            // the error messages.
+
+            let report = miette::Report::new(e);
+            return DebugReport(report);
+
+            struct DebugReport(miette::Report);
+
+            impl std::fmt::Display for DebugReport {
+                fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+                    self.0.handler().debug(&*self.0, f)
+                }
+            }
+
+            impl std::fmt::Debug for DebugReport {
+                fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+                    std::fmt::Display::fmt(self, f)
+                }
+            }
+
+            impl std::error::Error for DebugReport {}
+        })?;
+
+        let code = rustfmt(&code).unwrap_or_else(|e| {
+            println!(
+                "cargo:warning=Failed to run `rustfmt` on ISLE-generated code: {:?}",
+                e
+            );
+            code
+        });
+
+        println!("Writing ISLE-generated Rust code to {}", out_file.display());
+        std::fs::write(out_file, code)?;
+    }
+
+    return Ok(());
+
+    fn rustfmt(code: &str) -> std::io::Result<String> {
+        use std::io::Write;
+
+        let mut rustfmt = std::process::Command::new("rustfmt")
+            .stdin(std::process::Stdio::piped())
+            .stdout(std::process::Stdio::piped())
+            .spawn()?;
+
+        let mut stdin = rustfmt.stdin.take().unwrap();
+        stdin.write_all(code.as_bytes())?;
+        drop(stdin);
+
+        let mut stdout = rustfmt.stdout.take().unwrap();
+        let mut data = vec![];
+        stdout.read_to_end(&mut data)?;
+
+        let status = rustfmt.wait()?;
+        if !status.success() {
+            return Err(std::io::Error::new(
+                std::io::ErrorKind::Other,
+                format!("`rustfmt` exited with status {}", status),
+            ));
+        }
+
+        Ok(String::from_utf8(data).expect("rustfmt always writs utf-8 to stdout"))
+    }
+}
--- a/cranelift/codegen/meta/Cargo.toml
+++ b/cranelift/codegen/meta/Cargo.toml
@@ -17,3 +17,6 @@ cranelift-codegen-shared = { path = "../shared", version = "0.78.0" }

 [badges]
 maintenance = { status = "experimental" }
+
+[features]
+rebuild-isle = []
--- a/cranelift/codegen/meta/src/gen_inst.rs
+++ b/cranelift/codegen/meta/src/gen_inst.rs
@@ -1,5 +1,6 @@
 //! Generate instruction data (including opcodes, formats, builders, etc.).
 use std::fmt;
+use std::path::Path;

 use cranelift_codegen_shared::constant_hash;

@@ -1084,6 +1085,243 @@ fn gen_inst_builder(inst: &Instruction, format: &InstructionFormat, fmt: &mut Fo
    fmtln!(fmt, "}")
 }

+#[cfg(feature = "rebuild-isle")]
+fn gen_isle(formats: &[&InstructionFormat], instructions: &AllInstructions, fmt: &mut Formatter) {
+    use std::collections::BTreeSet;
+    use std::fmt::Write;
+
+    fmt.multi_line(
+        r#"
+;; GENERATED BY `gen_isle`. DO NOT EDIT!!!
+;;
+;; This ISLE file defines all the external type declarations for Cranelift's
+;; data structures that ISLE will process, such as `InstructionData` and
+;; `Opcode`.
+        "#,
+    );
+    fmt.empty_line();
+
+    // Generate all the extern type declarations we need for various immediates.
+    fmt.line(";;;; Extern type declarations for immediates ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;");
+    fmt.empty_line();
+    let imm_tys: BTreeSet<_> = formats
+        .iter()
+        .flat_map(|f| {
+            f.imm_fields
+                .iter()
+                .map(|i| i.kind.rust_type.rsplit("::").next().unwrap())
+                .collect::<Vec<_>>()
+        })
+        .collect();
+    for ty in imm_tys {
+        fmtln!(fmt, "(type {} (primitive {}))", ty, ty);
+    }
+    fmt.empty_line();
+
+    // Generate all of the value arrays we need for `InstructionData` as well as
+    // the constructors and extractors for them.
+    fmt.line(";;;; Value Arrays ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;");
+    fmt.empty_line();
+    let value_array_arities: BTreeSet<_> = formats
+        .iter()
+        .filter(|f| f.typevar_operand.is_some() && !f.has_value_list && f.num_value_operands != 1)
+        .map(|f| f.num_value_operands)
+        .collect();
+    for n in value_array_arities {
+        fmtln!(fmt, ";; ISLE representation of `[Value; {}]`.", n);
+        fmtln!(fmt, "(type ValueArray{} extern (enum))", n);
+        fmt.empty_line();
+
+        fmtln!(
+            fmt,
+            "(decl value_array_{} ({}) ValueArray{})",
+            n,
+            (0..n).map(|_| "Value").collect::<Vec<_>>().join(" "),
+            n
+        );
+        fmtln!(
+            fmt,
+            "(extern constructor value_array_{} pack_value_array_{})",
+            n,
+            n
+        );
+        fmtln!(
+            fmt,
+            "(extern extractor infallible value_array_{} unpack_value_array_{})",
+            n,
+            n
+        );
+        fmt.empty_line();
+    }
+
+    // Generate the extern type declaration for `Opcode`.
+    fmt.line(";;;; `Opcode` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;");
+    fmt.empty_line();
+    fmt.line("(type Opcode extern");
+    fmt.indent(|fmt| {
+        fmt.line("(enum");
+        fmt.indent(|fmt| {
+            for inst in instructions {
+                fmtln!(fmt, "{}", inst.camel_name);
+            }
+        });
+        fmt.line(")");
+    });
+    fmt.line(")");
+    fmt.empty_line();
+
+    // Generate the extern type declaration for `InstructionData`.
+    fmt.line(";;;; `InstructionData` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;");
+    fmt.empty_line();
+    fmt.line("(type InstructionData extern");
+    fmt.indent(|fmt| {
+        fmt.line("(enum");
+        fmt.indent(|fmt| {
+            for format in formats {
+                let mut s = format!("({} (opcode Opcode)", format.name);
+                if format.typevar_operand.is_some() {
+                    if format.has_value_list {
+                        s.push_str(" (args ValueList)");
+                    } else if format.num_value_operands == 1 {
+                        s.push_str(" (arg Value)");
+                    } else {
+                        write!(&mut s, " (args ValueArray{})", format.num_value_operands).unwrap();
+                    }
+                }
+                for field in &format.imm_fields {
+                    write!(
+                        &mut s,
+                        " ({} {})",
+                        field.member,
+                        field.kind.rust_type.rsplit("::").next().unwrap()
+                    )
+                    .unwrap();
+                }
+                s.push(')');
+                fmt.line(&s);
+            }
+        });
+        fmt.line(")");
+    });
+    fmt.line(")");
+    fmt.empty_line();
+
+    // Generate the helper extractors for each opcode's full instruction.
+    //
+    // TODO: if/when we port our peephole optimization passes to ISLE we will
+    // want helper constructors as well.
+    fmt.line(";;;; Extracting Opcode, Operands, and Immediates from `InstructionData` ;;;;;;;;");
+    fmt.empty_line();
+    for inst in instructions {
+        fmtln!(
+            fmt,
+            "(decl {} ({}) Inst)",
+            inst.name,
+            inst.operands_in
+                .iter()
+                .map(|o| {
+                    let ty = o.kind.rust_type;
+                    if ty == "&[Value]" {
+                        "ValueSlice"
+                    } else {
+                        ty.rsplit("::").next().unwrap()
+                    }
+                })
+                .collect::<Vec<_>>()
+                .join(" ")
+        );
+        fmtln!(fmt, "(extractor");
+        fmt.indent(|fmt| {
+            fmtln!(
+                fmt,
+                "({} {})",
+                inst.name,
+                inst.operands_in
+                    .iter()
+                    .map(|o| { o.name })
+                    .collect::<Vec<_>>()
+                    .join(" ")
+            );
+            let mut s = format!(
+                "(inst_data (InstructionData.{} (Opcode.{})",
+                inst.format.name, inst.camel_name
+            );
+
+            // Immediates.
+            let imm_operands: Vec<_> = inst
+                .operands_in
+                .iter()
+                .filter(|o| !o.is_value() && !o.is_varargs())
+                .collect();
+            assert_eq!(imm_operands.len(), inst.format.imm_fields.len());
+            for op in imm_operands {
+                write!(&mut s, " {}", op.name).unwrap();
+            }
+
+            // Value and varargs operands.
+            if inst.format.typevar_operand.is_some() {
+                if inst.format.has_value_list {
+                    // The instruction format uses a value list, but the
+                    // instruction itself might have not only a `&[Value]`
+                    // varargs operand, but also one or more `Value` operands as
+                    // well. If this is the case, then we need to read them off
+                    // the front of the `ValueList`.
+                    let values: Vec<_> = inst
+                        .operands_in
+                        .iter()
+                        .filter(|o| o.is_value())
+                        .map(|o| o.name)
+                        .collect();
+                    let varargs = inst
+                        .operands_in
+                        .iter()
+                        .find(|o| o.is_varargs())
+                        .unwrap()
+                        .name;
+                    if values.is_empty() {
+                        write!(&mut s, " (value_list_slice {})", varargs).unwrap();
+                    } else {
+                        write!(
+                            &mut s,
+                            " (unwrap_head_value_list_{} {} {})",
+                            values.len(),
+                            values.join(" "),
+                            varargs
+                        )
+                        .unwrap();
+                    }
+                } else if inst.format.num_value_operands == 1 {
+                    write!(
+                        &mut s,
+                        " {}",
+                        inst.operands_in.iter().find(|o| o.is_value()).unwrap().name
+                    )
+                    .unwrap();
+                } else {
+                    let values = inst
+                        .operands_in
+                        .iter()
+                        .filter(|o| o.is_value())
+                        .map(|o| o.name)
+                        .collect::<Vec<_>>();
+                    assert_eq!(values.len(), inst.format.num_value_operands);
+                    let values = values.join(" ");
+                    write!(
+                        &mut s,
+                        " (value_array_{} {})",
+                        inst.format.num_value_operands, values,
+                    )
+                    .unwrap();
+                }
+            }
+            s.push_str("))");
+            fmt.line(&s);
+        });
+        fmt.line(")");
+        fmt.empty_line();
+    }
+}
+
 /// Generate a Builder trait with methods for all instructions.
 fn gen_builder(
    instructions: &AllInstructions,
@@ -1128,7 +1366,9 @@ pub(crate) fn generate(
    all_inst: &AllInstructions,
    opcode_filename: &str,
    inst_builder_filename: &str,
+    isle_filename: &str,
    out_dir: &str,
+    crate_dir: &Path,
 ) -> Result<(), error::Error> {
    // Opcodes.
    let mut fmt = Formatter::new();
@@ -1144,6 +1384,20 @@ pub(crate) fn generate(
    gen_try_from(all_inst, &mut fmt);
    fmt.update_file(opcode_filename, out_dir)?;

+    // ISLE DSL.
+    #[cfg(feature = "rebuild-isle")]
+    {
+        let mut fmt = Formatter::new();
+        gen_isle(&formats, all_inst, &mut fmt);
+        let crate_src_dir = crate_dir.join("src");
+        fmt.update_file(isle_filename, &crate_src_dir.display().to_string())?;
+    }
+    #[cfg(not(feature = "rebuild-isle"))]
+    {
+        // Silence unused variable warnings.
+        let _ = (isle_filename, crate_dir);
+    }
+
    // Instruction builder.
    let mut fmt = Formatter::new();
    gen_builder(all_inst, &formats, &mut fmt);
--- a/cranelift/codegen/meta/src/lib.rs
+++ b/cranelift/codegen/meta/src/lib.rs
@@ -1,5 +1,7 @@
 //! This crate generates Rust sources for use by
 //! [`cranelift_codegen`](../cranelift_codegen/index.html).
+
+use std::path::Path;
 #[macro_use]
 mod cdsl;
 mod srcgen;
@@ -21,7 +23,7 @@ pub fn isa_from_arch(arch: &str) -> Result<isa::Isa, String> {
 }

 /// Generates all the Rust source files used in Cranelift from the meta-language.
-pub fn generate(isas: &[isa::Isa], out_dir: &str) -> Result<(), error::Error> {
+pub fn generate(isas: &[isa::Isa], out_dir: &str, crate_dir: &Path) -> Result<(), error::Error> {
    // Create all the definitions:
    // - common definitions.
    let mut shared_defs = shared::define();
@@ -46,7 +48,9 @@ pub fn generate(isas: &[isa::Isa], out_dir: &str) -> Result<(), error::Error> {
        &shared_defs.all_instructions,
        "opcodes.rs",
        "inst_builder.rs",
+        "clif.isle",
        &out_dir,
+        crate_dir,
    )?;

    for isa in target_isas {
--- a/cranelift/codegen/meta/src/srcgen.rs
+++ b/cranelift/codegen/meta/src/srcgen.rs
@@ -100,6 +100,7 @@ impl Formatter {
        let path_str = format!("{}/{}", directory, filename.as_ref());

        let path = path::Path::new(&path_str);
+        println!("Writing generated file: {}", path.display());
        let mut f = fs::File::create(path)?;

        for l in self.lines.iter().map(|l| l.as_bytes()) {
--- a/cranelift/codegen/src/clif.isle
+++ b/cranelift/codegen/src/clif.isle
--- a/cranelift/codegen/src/isa/x64/inst.isle
+++ b/cranelift/codegen/src/isa/x64/inst.isle
@@ -0,0 +1,977 @@
+;; Extern type definitions and constructors for the x64 `MachInst` type.
+
+;;;; `MInst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(type MInst extern
+      (enum (Nop (len u8))
+            (AluRmiR (size OperandSize)
+                     (op AluRmiROpcode)
+                     (src1 Reg)
+                     (src2 RegMemImm)
+                     (dst WritableReg))
+            (MulHi (size OperandSize)
+                   (signed bool)
+                   (src1 Reg)
+                   (src2 RegMem)
+                   (dst_lo WritableReg)
+                   (dst_hi WritableReg))
+            (XmmRmR (op SseOpcode)
+                    (src1 Reg)
+                    (src2 RegMem)
+                    (dst WritableReg))
+            (XmmUnaryRmR (op SseOpcode)
+                         (src RegMem)
+                         (dst WritableReg))
+            (XmmRmiReg (opcode SseOpcode)
+                       (src1 Reg)
+                       (src2 RegMemImm)
+                       (dst WritableReg))
+            (XmmRmRImm (op SseOpcode)
+                       (src1 Reg)
+                       (src2 RegMem)
+                       (dst WritableReg)
+                       (imm u8)
+                       (size OperandSize))
+            (CmpRmiR (size OperandSize)
+                     (opcode CmpOpcode)
+                     (src RegMemImm)
+                     (dst Reg))
+            (Imm (dst_size OperandSize)
+                 (simm64 u64)
+                 (dst WritableReg))
+            (ShiftR (size OperandSize)
+                    (kind ShiftKind)
+                    (src Reg)
+                    (num_bits Imm8Reg)
+                    (dst WritableReg))
+            (MovzxRmR (ext_mode ExtMode)
+                      (src RegMem)
+                      (dst WritableReg))
+            (MovsxRmR (ext_mode ExtMode)
+                      (src RegMem)
+                      (dst WritableReg))
+            (Cmove (size OperandSize)
+                   (cc CC)
+                   (consequent RegMem)
+                   (alternative Reg)
+                   (dst WritableReg))
+            (XmmRmREvex (op Avx512Opcode)
+                        (src1 RegMem)
+                        (src2 Reg)
+                        (dst WritableReg))))
+
+(type OperandSize extern
+      (enum Size8
+            Size16
+            Size32
+            Size64))
+
+;; Get the `OperandSize` for a given `Type`.
+(decl operand_size_of_type (Type) OperandSize)
+(extern constructor operand_size_of_type operand_size_of_type)
+
+;; Get the bit width of an `OperandSize`.
+(decl operand_size_bits (OperandSize) u16)
+(rule (operand_size_bits (OperandSize.Size8)) 8)
+(rule (operand_size_bits (OperandSize.Size16)) 16)
+(rule (operand_size_bits (OperandSize.Size32)) 32)
+(rule (operand_size_bits (OperandSize.Size64)) 64)
+
+(type AluRmiROpcode extern
+      (enum Add
+            Adc
+            Sub
+            Sbb
+            And
+            Or
+            Xor
+            Mul
+            And8
+            Or8))
+
+(type SseOpcode extern
+      (enum Addps
+            Addpd
+            Addss
+            Addsd
+            Andps
+            Andpd
+            Andnps
+            Andnpd
+            Blendvpd
+            Blendvps
+            Comiss
+            Comisd
+            Cmpps
+            Cmppd
+            Cmpss
+            Cmpsd
+            Cvtdq2ps
+            Cvtdq2pd
+            Cvtpd2ps
+            Cvtps2pd
+            Cvtsd2ss
+            Cvtsd2si
+            Cvtsi2ss
+            Cvtsi2sd
+            Cvtss2si
+            Cvtss2sd
+            Cvttpd2dq
+            Cvttps2dq
+            Cvttss2si
+            Cvttsd2si
+            Divps
+            Divpd
+            Divss
+            Divsd
+            Insertps
+            Maxps
+            Maxpd
+            Maxss
+            Maxsd
+            Minps
+            Minpd
+            Minss
+            Minsd
+            Movaps
+            Movapd
+            Movd
+            Movdqa
+            Movdqu
+            Movlhps
+            Movmskps
+            Movmskpd
+            Movq
+            Movss
+            Movsd
+            Movups
+            Movupd
+            Mulps
+            Mulpd
+            Mulss
+            Mulsd
+            Orps
+            Orpd
+            Pabsb
+            Pabsw
+            Pabsd
+            Packssdw
+            Packsswb
+            Packusdw
+            Packuswb
+            Paddb
+            Paddd
+            Paddq
+            Paddw
+            Paddsb
+            Paddsw
+            Paddusb
+            Paddusw
+            Palignr
+            Pand
+            Pandn
+            Pavgb
+            Pavgw
+            Pblendvb
+            Pcmpeqb
+            Pcmpeqw
+            Pcmpeqd
+            Pcmpeqq
+            Pcmpgtb
+            Pcmpgtw
+            Pcmpgtd
+            Pcmpgtq
+            Pextrb
+            Pextrw
+            Pextrd
+            Pinsrb
+            Pinsrw
+            Pinsrd
+            Pmaddubsw
+            Pmaddwd
+            Pmaxsb
+            Pmaxsw
+            Pmaxsd
+            Pmaxub
+            Pmaxuw
+            Pmaxud
+            Pminsb
+            Pminsw
+            Pminsd
+            Pminub
+            Pminuw
+            Pminud
+            Pmovmskb
+            Pmovsxbd
+            Pmovsxbw
+            Pmovsxbq
+            Pmovsxwd
+            Pmovsxwq
+            Pmovsxdq
+            Pmovzxbd
+            Pmovzxbw
+            Pmovzxbq
+            Pmovzxwd
+            Pmovzxwq
+            Pmovzxdq
+            Pmuldq
+            Pmulhw
+            Pmulhuw
+            Pmulhrsw
+            Pmulld
+            Pmullw
+            Pmuludq
+            Por
+            Pshufb
+            Pshufd
+            Psllw
+            Pslld
+            Psllq
+            Psraw
+            Psrad
+            Psrlw
+            Psrld
+            Psrlq
+            Psubb
+            Psubd
+            Psubq
+            Psubw
+            Psubsb
+            Psubsw
+            Psubusb
+            Psubusw
+            Ptest
+            Punpckhbw
+            Punpckhwd
+            Punpcklbw
+            Punpcklwd
+            Pxor
+            Rcpss
+            Roundps
+            Roundpd
+            Roundss
+            Roundsd
+            Rsqrtss
+            Shufps
+            Sqrtps
+            Sqrtpd
+            Sqrtss
+            Sqrtsd
+            Subps
+            Subpd
+            Subss
+            Subsd
+            Ucomiss
+            Ucomisd
+            Unpcklps
+            Xorps
+            Xorpd))
+
+(type CmpOpcode extern
+      (enum Cmp
+            Test))
+
+(type RegMemImm extern
+      (enum
+       (Reg (reg Reg))
+       (Mem (addr SyntheticAmode))
+       (Imm (simm32 u32))))
+
+(type RegMem extern
+      (enum
+       (Reg (reg Reg))
+       (Mem (addr SyntheticAmode))))
+
+;; Put the given clif value into a `RegMem` operand.
+;;
+;; Asserts that the value fits into a single register, and doesn't require
+;; multiple registers for its representation (like `i128` for example).
+;;
+;; As a side effect, this marks the value as used.
+(decl put_in_reg_mem (Value) RegMem)
+(extern constructor put_in_reg_mem put_in_reg_mem)
+
+(type SyntheticAmode extern (enum))
+
+(type ShiftKind extern
+      (enum ShiftLeft
+            ShiftRightLogical
+            ShiftRightArithmetic
+            RotateLeft
+            RotateRight))
+
+(type Imm8Reg extern
+      (enum (Imm8 (imm u8))
+            (Reg (reg Reg))))
+
+(type CC extern
+      (enum O
+            NO
+            B
+            NB
+            Z
+            NZ
+            BE
+            NBE
+            S
+            NS
+            L
+            NL
+            LE
+            NLE
+            P
+            NP))
+
+(type Avx512Opcode extern
+      (enum Vcvtudq2ps
+            Vpabsq
+            Vpermi2b
+            Vpmullq
+            Vpopcntb))
+
+;;;; Helpers for Querying Enabled ISA Extensions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(decl avx512vl_enabled () Type)
+(extern extractor avx512vl_enabled avx512vl_enabled)
+
+(decl avx512dq_enabled () Type)
+(extern extractor avx512dq_enabled avx512dq_enabled)
+
+;;;; Helpers for Merging and Sinking Immediates/Loads  ;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Extract a constant `Imm8Reg.Imm8` from a value operand.
+(decl imm8_from_value (Imm8Reg) Value)
+(extern extractor imm8_from_value imm8_from_value)
+
+;; Extract a constant `RegMemImm.Imm` from a value operand.
+(decl simm32_from_value (RegMemImm) Value)
+(extern extractor simm32_from_value simm32_from_value)
+
+;; Extract a constant `RegMemImm.Imm` from an `Imm64` immediate.
+(decl simm32_from_imm64 (RegMemImm) Imm64)
+(extern extractor simm32_from_imm64 simm32_from_imm64)
+
+;; A load that can be sunk into another operation.
+(type SinkableLoad extern (enum))
+
+;; Extract a `SinkableLoad` that works with `RegMemImm.Mem` from a value
+;; operand.
+(decl sinkable_load (SinkableLoad) Value)
+(extern extractor sinkable_load sinkable_load)
+
+;; Sink a `SinkableLoad` into a `RegMemImm.Mem`.
+;;
+;; This is a side-effectful operation that notifies the context that the
+;; instruction that produced the `SinkableImm` has been sunk into another
+;; instruction, and no longer needs to be lowered.
+(decl sink_load (SinkableLoad) RegMemImm)
+(extern constructor sink_load sink_load)
+
+;;;; Helpers for Working with Flags ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Newtype wrapper around `MInst` for instructions that are used for their
+;; effect on flags.
+(type ProducesFlags (enum (ProducesFlags (inst MInst) (result Reg))))
+
+;; Newtype wrapper around `MInst` for instructions that consume flags.
+(type ConsumesFlags (enum (ConsumesFlags (inst MInst) (result Reg))))
+
+;; Combine flags-producing and -consuming instructions together, ensuring that
+;; they are emitted back-to-back and no other instructions can be emitted
+;; between them and potentially clobber the flags.
+;;
+;; Returns a `ValueRegs` where the first register is the result of the
+;; `ProducesFlags` instruction and the second is the result of the
+;; `ConsumesFlags` instruction.
+(decl with_flags (ProducesFlags ConsumesFlags) ValueRegs)
+(rule (with_flags (ProducesFlags.ProducesFlags producer_inst producer_result)
+                  (ConsumesFlags.ConsumesFlags consumer_inst consumer_result))
+      (let ((_x Unit (emit producer_inst))
+            (_y Unit (emit consumer_inst)))
+        (value_regs producer_result consumer_result)))
+
+;; Like `with_flags` but returns only the result of the consumer operation.
+(decl with_flags_1 (ProducesFlags ConsumesFlags) Reg)
+(rule (with_flags_1 (ProducesFlags.ProducesFlags producer_inst _producer_result)
+                    (ConsumesFlags.ConsumesFlags consumer_inst consumer_result))
+      (let ((_x Unit (emit producer_inst))
+            (_y Unit (emit consumer_inst)))
+        consumer_result))
+
+;; Like `with_flags` but allows two consumers of the same flags. The result is a
+;; `ValueRegs` containing the first consumer's result and then the second
+;; consumer's result.
+(decl with_flags_2 (ProducesFlags ConsumesFlags ConsumesFlags) ValueRegs)
+(rule (with_flags_2 (ProducesFlags.ProducesFlags producer_inst producer_result)
+                    (ConsumesFlags.ConsumesFlags consumer_inst_1 consumer_result_1)
+                    (ConsumesFlags.ConsumesFlags consumer_inst_2 consumer_result_2))
+      (let ((_x Unit (emit producer_inst))
+            (_y Unit (emit consumer_inst_1))
+            (_z Unit (emit consumer_inst_2)))
+        (value_regs consumer_result_1 consumer_result_2)))
+
+;;;; Helpers for Sign/Zero Extending ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(type ExtendKind (enum Sign Zero))
+
+(type ExtMode extern (enum BL BQ WL WQ LQ))
+
+;; `ExtMode::new`
+(decl ext_mode (u16 u16) ExtMode)
+(extern constructor ext_mode ext_mode)
+
+;; Put the given value into a register, but extended as the given type.
+(decl extend_to_reg (Value Type ExtendKind) Reg)
+
+;; If the value is already of the requested type, no extending is necessary.
+(rule (extend_to_reg (and val (value_type ty)) =ty _kind)
+      (put_in_reg val))
+
+(rule (extend_to_reg (and val (value_type from_ty))
+                     to_ty
+                     kind)
+      (let ((from_bits u16 (ty_bits from_ty))
+            ;; Use `operand_size_of_type` so that the we clamp the output to 32-
+            ;; or 64-bit width types.
+            (to_bits u16 (operand_size_bits (operand_size_of_type to_ty))))
+        (extend kind
+                to_ty
+                (ext_mode from_bits to_bits)
+                (put_in_reg_mem val))))
+
+;; Do a sign or zero extension of the given `RegMem`.
+(decl extend (ExtendKind Type ExtMode RegMem) Reg)
+
+;; Zero extending uses `movzx`.
+(rule (extend (ExtendKind.Zero) ty mode src)
+      (movzx ty mode src))
+
+;; Sign extending uses `movsx`.
+(rule (extend (ExtendKind.Sign) ty mode src)
+      (movsx ty mode src))
+
+;;;; Instruction Constructors ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; These constructors create SSA-style `MInst`s. It is their responsibility to
+;; maintain the invariant that each temporary register they allocate and define
+;; only gets defined the once.
+
+;; Emit an instruction.
+;;
+;; This is low-level and side-effectful; it should only be used as an
+;; implementation detail by helpers that preserve the SSA facade themselves.
+(decl emit (MInst) Unit)
+(extern constructor emit emit)
+
+;; Helper for emitting `MInst.AluRmiR` instructions.
+(decl alu_rmi_r (Type AluRmiROpcode Reg RegMemImm) Reg)
+(rule (alu_rmi_r ty opcode src1 src2)
+      (let ((dst WritableReg (temp_writable_reg ty))
+            (size OperandSize (operand_size_of_type ty))
+            (_ Unit (emit (MInst.AluRmiR size opcode src1 src2 dst))))
+        (writable_reg_to_reg dst)))
+
+;; Helper for emitting `add` instructions.
+(decl add (Type Reg RegMemImm) Reg)
+(rule (add ty src1 src2)
+      (alu_rmi_r ty
+                 (AluRmiROpcode.Add)
+                 src1
+                 src2))
+
+;; Helper for creating `add` instructions whose flags are also used.
+(decl add_with_flags (Type Reg RegMemImm) ProducesFlags)
+(rule (add_with_flags ty src1 src2)
+      (let ((dst WritableReg (temp_writable_reg ty)))
+        (ProducesFlags.ProducesFlags (MInst.AluRmiR (operand_size_of_type ty)
+                                                    (AluRmiROpcode.Add)
+                                                    src1
+                                                    src2
+                                                    dst)
+                                     (writable_reg_to_reg dst))))
+
+;; Helper for creating `adc` instructions.
+(decl adc (Type Reg RegMemImm) ConsumesFlags)
+(rule (adc ty src1 src2)
+      (let ((dst WritableReg (temp_writable_reg ty)))
+        (ConsumesFlags.ConsumesFlags (MInst.AluRmiR (operand_size_of_type ty)
+                                                    (AluRmiROpcode.Adc)
+                                                    src1
+                                                    src2
+                                                    dst)
+                                     (writable_reg_to_reg dst))))
+
+;; Helper for emitting `sub` instructions.
+(decl sub (Type Reg RegMemImm) Reg)
+(rule (sub ty src1 src2)
+      (alu_rmi_r ty
+                 (AluRmiROpcode.Sub)
+                 src1
+                 src2))
+
+;; Helper for creating `sub` instructions whose flags are also used.
+(decl sub_with_flags (Type Reg RegMemImm) ProducesFlags)
+(rule (sub_with_flags ty src1 src2)
+      (let ((dst WritableReg (temp_writable_reg ty)))
+        (ProducesFlags.ProducesFlags (MInst.AluRmiR (operand_size_of_type ty)
+                                                    (AluRmiROpcode.Sub)
+                                                    src1
+                                                    src2
+                                                    dst)
+                                     (writable_reg_to_reg dst))))
+
+;; Helper for creating `sbb` instructions.
+(decl sbb (Type Reg RegMemImm) ConsumesFlags)
+(rule (sbb ty src1 src2)
+      (let ((dst WritableReg (temp_writable_reg ty)))
+        (ConsumesFlags.ConsumesFlags (MInst.AluRmiR (operand_size_of_type ty)
+                                                    (AluRmiROpcode.Sbb)
+                                                    src1
+                                                    src2
+                                                    dst)
+                                     (writable_reg_to_reg dst))))
+
+;; Helper for creating `mul` instructions.
+(decl mul (Type Reg RegMemImm) Reg)
+(rule (mul ty src1 src2)
+      (alu_rmi_r ty
+                 (AluRmiROpcode.Mul)
+                 src1
+                 src2))
+
+;; Helper for emitting `and` instructions.
+;;
+;; Use `m_` prefix (short for "mach inst") to disambiguate with the ISLE-builtin
+;; `and` operator.
+(decl m_and (Type Reg RegMemImm) Reg)
+(rule (m_and ty src1 src2)
+      (alu_rmi_r ty
+                 (AluRmiROpcode.And)
+                 src1
+                 src2))
+
+;; Helper for emitting `or` instructions.
+(decl or (Type Reg RegMemImm) Reg)
+(rule (or ty src1 src2)
+      (alu_rmi_r ty
+                 (AluRmiROpcode.Or)
+                 src1
+                 src2))
+
+;; Helper for emitting `xor` instructions.
+(decl xor (Type Reg RegMemImm) Reg)
+(rule (xor ty src1 src2)
+      (alu_rmi_r ty
+                 (AluRmiROpcode.Xor)
+                 src1
+                 src2))
+
+;; Helper for emitting immediates.
+(decl imm (Type u64) Reg)
+(rule (imm ty simm64)
+      (let ((dst WritableReg (temp_writable_reg ty))
+            (size OperandSize (operand_size_of_type ty))
+            (_ Unit (emit (MInst.Imm size simm64 dst))))
+        (writable_reg_to_reg dst)))
+
+(decl nonzero_u64_fits_in_u32 (u64) u64)
+(extern extractor nonzero_u64_fits_in_u32 nonzero_u64_fits_in_u32)
+
+;; Special case for when a 64-bit immediate fits into 32-bits. We can use a
+;; 32-bit move that zero-extends the value, which has a smaller encoding.
+(rule (imm $I64 (nonzero_u64_fits_in_u32 x))
+      (let ((dst WritableReg (temp_writable_reg $I64))
+            (_ Unit (emit (MInst.Imm (OperandSize.Size32) x dst))))
+        (writable_reg_to_reg dst)))
+
+;; Special case for zero immediates: turn them into an `xor r, r`.
+(rule (imm ty 0)
+      (let ((wr WritableReg (temp_writable_reg ty))
+            (r Reg (writable_reg_to_reg wr))
+            (size OperandSize (operand_size_of_type ty))
+            (_ Unit (emit (MInst.AluRmiR size
+                                         (AluRmiROpcode.Xor)
+                                         r
+                                         (RegMemImm.Reg r)
+                                         wr))))
+        r))
+
+;; Helper for creating `MInst.ShifR` instructions.
+(decl shift_r (Type ShiftKind Reg Imm8Reg) Reg)
+(rule (shift_r ty kind src1 src2)
+      (let ((dst WritableReg (temp_writable_reg ty))
+            (size OperandSize (operand_size_of_type ty))
+            (_ Unit (emit (MInst.ShiftR size kind src1 src2 dst))))
+        (writable_reg_to_reg dst)))
+
+;; Helper for creating `rotl` instructions (prefixed with "m_", short for "mach
+;; inst", to disambiguate this from clif's `rotl`).
+(decl m_rotl (Type Reg Imm8Reg) Reg)
+(rule (m_rotl ty src1 src2)
+      (shift_r ty (ShiftKind.RotateLeft) src1 src2))
+
+;; Helper for creating `shl` instructions.
+(decl shl (Type Reg Imm8Reg) Reg)
+(rule (shl ty src1 src2)
+      (shift_r ty (ShiftKind.ShiftLeft) src1 src2))
+
+;; Helper for creating logical shift-right instructions.
+(decl shr (Type Reg Imm8Reg) Reg)
+(rule (shr ty src1 src2)
+      (shift_r ty (ShiftKind.ShiftRightLogical) src1 src2))
+
+;; Helper for creating arithmetic shift-right instructions.
+(decl sar (Type Reg Imm8Reg) Reg)
+(rule (sar ty src1 src2)
+      (shift_r ty (ShiftKind.ShiftRightArithmetic) src1 src2))
+
+;; Helper for creating `MInst.CmpRmiR` instructions.
+(decl cmp_rmi_r (OperandSize CmpOpcode RegMemImm Reg) ProducesFlags)
+(rule (cmp_rmi_r size opcode src1 src2)
+      (ProducesFlags.ProducesFlags (MInst.CmpRmiR size
+                                                  opcode
+                                                  src1
+                                                  src2)
+                                   (invalid_reg)))
+
+;; Helper for creating `cmp` instructions.
+(decl cmp (OperandSize RegMemImm Reg) ProducesFlags)
+(rule (cmp size src1 src2)
+      (cmp_rmi_r size (CmpOpcode.Cmp) src1 src2))
+
+;; Helper for creating `test` instructions.
+(decl test (OperandSize RegMemImm Reg) ProducesFlags)
+(rule (test size src1 src2)
+      (cmp_rmi_r size (CmpOpcode.Test) src1 src2))
+
+;; Helper for creating `MInst.Cmove` instructions.
+(decl cmove (Type CC RegMem Reg) ConsumesFlags)
+(rule (cmove ty cc consequent alternative)
+      (let ((dst WritableReg (temp_writable_reg ty))
+            (size OperandSize (operand_size_of_type ty)))
+        (ConsumesFlags.ConsumesFlags (MInst.Cmove size cc consequent alternative dst)
+                                     (writable_reg_to_reg dst))))
+
+;; Helper for creating `MInst.MovzxRmR` instructions.
+(decl movzx (Type ExtMode RegMem) Reg)
+(rule (movzx ty mode src)
+      (let ((dst WritableReg (temp_writable_reg ty))
+            (_ Unit (emit (MInst.MovzxRmR mode src dst))))
+        (writable_reg_to_reg dst)))
+
+;; Helper for creating `MInst.MovsxRmR` instructions.
+(decl movsx (Type ExtMode RegMem) Reg)
+(rule (movsx ty mode src)
+      (let ((dst WritableReg (temp_writable_reg ty))
+            (_ Unit (emit (MInst.MovsxRmR mode src dst))))
+        (writable_reg_to_reg dst)))
+
+;; Helper for creating `MInst.XmmRmR` instructions.
+(decl xmm_rm_r (Type SseOpcode Reg RegMem) Reg)
+(rule (xmm_rm_r ty op src1 src2)
+      (let ((dst WritableReg (temp_writable_reg ty))
+            (_ Unit (emit (MInst.XmmRmR op src1 src2 dst))))
+        (writable_reg_to_reg dst)))
+
+;; Helper for creating `paddb` instructions.
+(decl paddb (Reg RegMem) Reg)
+(rule (paddb src1 src2)
+      (xmm_rm_r $I8X16 (SseOpcode.Paddb) src1 src2))
+
+;; Helper for creating `paddw` instructions.
+(decl paddw (Reg RegMem) Reg)
+(rule (paddw src1 src2)
+      (xmm_rm_r $I16X8 (SseOpcode.Paddw) src1 src2))
+
+;; Helper for creating `paddd` instructions.
+(decl paddd (Reg RegMem) Reg)
+(rule (paddd src1 src2)
+      (xmm_rm_r $I32X4 (SseOpcode.Paddd) src1 src2))
+
+;; Helper for creating `paddq` instructions.
+(decl paddq (Reg RegMem) Reg)
+(rule (paddq src1 src2)
+      (xmm_rm_r $I64X2 (SseOpcode.Paddq) src1 src2))
+
+;; Helper for creating `paddsb` instructions.
+(decl paddsb (Reg RegMem) Reg)
+(rule (paddsb src1 src2)
+      (xmm_rm_r $I8X16 (SseOpcode.Paddsb) src1 src2))
+
+;; Helper for creating `paddsw` instructions.
+(decl paddsw (Reg RegMem) Reg)
+(rule (paddsw src1 src2)
+      (xmm_rm_r $I16X8 (SseOpcode.Paddsw) src1 src2))
+
+;; Helper for creating `paddusb` instructions.
+(decl paddusb (Reg RegMem) Reg)
+(rule (paddusb src1 src2)
+      (xmm_rm_r $I8X16 (SseOpcode.Paddusb) src1 src2))
+
+;; Helper for creating `paddusw` instructions.
+(decl paddusw (Reg RegMem) Reg)
+(rule (paddusw src1 src2)
+      (xmm_rm_r $I16X8 (SseOpcode.Paddusw) src1 src2))
+
+;; Helper for creating `psubb` instructions.
+(decl psubb (Reg RegMem) Reg)
+(rule (psubb src1 src2)
+      (xmm_rm_r $I8X16 (SseOpcode.Psubb) src1 src2))
+
+;; Helper for creating `psubw` instructions.
+(decl psubw (Reg RegMem) Reg)
+(rule (psubw src1 src2)
+      (xmm_rm_r $I16X8 (SseOpcode.Psubw) src1 src2))
+
+;; Helper for creating `psubd` instructions.
+(decl psubd (Reg RegMem) Reg)
+(rule (psubd src1 src2)
+      (xmm_rm_r $I32X4 (SseOpcode.Psubd) src1 src2))
+
+;; Helper for creating `psubq` instructions.
+(decl psubq (Reg RegMem) Reg)
+(rule (psubq src1 src2)
+      (xmm_rm_r $I64X2 (SseOpcode.Psubq) src1 src2))
+
+;; Helper for creating `psubsb` instructions.
+(decl psubsb (Reg RegMem) Reg)
+(rule (psubsb src1 src2)
+      (xmm_rm_r $I8X16 (SseOpcode.Psubsb) src1 src2))
+
+;; Helper for creating `psubsw` instructions.
+(decl psubsw (Reg RegMem) Reg)
+(rule (psubsw src1 src2)
+      (xmm_rm_r $I16X8 (SseOpcode.Psubsw) src1 src2))
+
+;; Helper for creating `psubusb` instructions.
+(decl psubusb (Reg RegMem) Reg)
+(rule (psubusb src1 src2)
+      (xmm_rm_r $I8X16 (SseOpcode.Psubusb) src1 src2))
+
+;; Helper for creating `psubusw` instructions.
+(decl psubusw (Reg RegMem) Reg)
+(rule (psubusw src1 src2)
+      (xmm_rm_r $I16X8 (SseOpcode.Psubusw) src1 src2))
+
+;; Helper for creating `pavgb` instructions.
+(decl pavgb (Reg RegMem) Reg)
+(rule (pavgb src1 src2)
+      (xmm_rm_r $I8X16 (SseOpcode.Pavgb) src1 src2))
+
+;; Helper for creating `pavgw` instructions.
+(decl pavgw (Reg RegMem) Reg)
+(rule (pavgw src1 src2)
+      (xmm_rm_r $I16X8 (SseOpcode.Pavgw) src1 src2))
+
+;; Helper for creating `pand` instructions.
+(decl pand (Reg RegMem) Reg)
+(rule (pand src1 src2)
+      (xmm_rm_r $F32X4 (SseOpcode.Pand) src1 src2))
+
+;; Helper for creating `andps` instructions.
+(decl andps (Reg RegMem) Reg)
+(rule (andps src1 src2)
+      (xmm_rm_r $F32X4 (SseOpcode.Andps) src1 src2))
+
+;; Helper for creating `andpd` instructions.
+(decl andpd (Reg RegMem) Reg)
+(rule (andpd src1 src2)
+      (xmm_rm_r $F64X2 (SseOpcode.Andpd) src1 src2))
+
+;; Helper for creating `por` instructions.
+(decl por (Reg RegMem) Reg)
+(rule (por src1 src2)
+      (xmm_rm_r $F32X4 (SseOpcode.Por) src1 src2))
+
+;; Helper for creating `orps` instructions.
+(decl orps (Reg RegMem) Reg)
+(rule (orps src1 src2)
+      (xmm_rm_r $F32X4 (SseOpcode.Orps) src1 src2))
+
+;; Helper for creating `orpd` instructions.
+(decl orpd (Reg RegMem) Reg)
+(rule (orpd src1 src2)
+      (xmm_rm_r $F64X2 (SseOpcode.Orpd) src1 src2))
+
+;; Helper for creating `pxor` instructions.
+(decl pxor (Reg RegMem) Reg)
+(rule (pxor src1 src2)
+      (xmm_rm_r $I8X16 (SseOpcode.Pxor) src1 src2))
+
+;; Helper for creating `xorps` instructions.
+(decl xorps (Reg RegMem) Reg)
+(rule (xorps src1 src2)
+      (xmm_rm_r $F32X4 (SseOpcode.Xorps) src1 src2))
+
+;; Helper for creating `xorpd` instructions.
+(decl xorpd (Reg RegMem) Reg)
+(rule (xorpd src1 src2)
+      (xmm_rm_r $F64X2 (SseOpcode.Xorpd) src1 src2))
+
+;; Helper for creating `pmullw` instructions.
+(decl pmullw (Reg RegMem) Reg)
+(rule (pmullw src1 src2)
+      (xmm_rm_r $I16X8 (SseOpcode.Pmullw) src1 src2))
+
+;; Helper for creating `pmulld` instructions.
+(decl pmulld (Reg RegMem) Reg)
+(rule (pmulld src1 src2)
+      (xmm_rm_r $I16X8 (SseOpcode.Pmulld) src1 src2))
+
+;; Helper for creating `pmulhw` instructions.
+(decl pmulhw (Reg RegMem) Reg)
+(rule (pmulhw src1 src2)
+      (xmm_rm_r $I16X8 (SseOpcode.Pmulhw) src1 src2))
+
+;; Helper for creating `pmulhuw` instructions.
+(decl pmulhuw (Reg RegMem) Reg)
+(rule (pmulhuw src1 src2)
+      (xmm_rm_r $I16X8 (SseOpcode.Pmulhuw) src1 src2))
+
+;; Helper for creating `pmuldq` instructions.
+(decl pmuldq (Reg RegMem) Reg)
+(rule (pmuldq src1 src2)
+      (xmm_rm_r $I16X8 (SseOpcode.Pmuldq) src1 src2))
+
+;; Helper for creating `pmuludq` instructions.
+(decl pmuludq (Reg RegMem) Reg)
+(rule (pmuludq src1 src2)
+      (xmm_rm_r $I64X2 (SseOpcode.Pmuludq) src1 src2))
+
+;; Helper for creating `punpckhwd` instructions.
+(decl punpckhwd (Reg RegMem) Reg)
+(rule (punpckhwd src1 src2)
+      (xmm_rm_r $I16X8 (SseOpcode.Punpckhwd) src1 src2))
+
+;; Helper for creating `punpcklwd` instructions.
+(decl punpcklwd (Reg RegMem) Reg)
+(rule (punpcklwd src1 src2)
+      (xmm_rm_r $I16X8 (SseOpcode.Punpcklwd) src1 src2))
+
+;; Helper for creating `andnps` instructions.
+(decl andnps (Reg RegMem) Reg)
+(rule (andnps src1 src2)
+      (xmm_rm_r $F32X4 (SseOpcode.Andnps) src1 src2))
+
+;; Helper for creating `andnpd` instructions.
+(decl andnpd (Reg RegMem) Reg)
+(rule (andnpd src1 src2)
+      (xmm_rm_r $F64X2 (SseOpcode.Andnpd) src1 src2))
+
+;; Helper for creating `pandn` instructions.
+(decl pandn (Reg RegMem) Reg)
+(rule (pandn src1 src2)
+      (xmm_rm_r $F64X2 (SseOpcode.Pandn) src1 src2))
+
+;; Helper for creating `MInst.XmmRmRImm` instructions.
+(decl xmm_rm_r_imm (SseOpcode Reg RegMem u8 OperandSize) Reg)
+(rule (xmm_rm_r_imm op src1 src2 imm size)
+      (let ((dst WritableReg (temp_writable_reg $I8X16))
+            (_ Unit (emit (MInst.XmmRmRImm op
+                                           src1
+                                           src2
+                                           dst
+                                           imm
+                                           size))))
+        (writable_reg_to_reg dst)))
+
+;; Helper for creating `palignr` instructions.
+(decl palignr (Reg RegMem u8 OperandSize) Reg)
+(rule (palignr src1 src2 imm size)
+      (xmm_rm_r_imm (SseOpcode.Palignr)
+                    src1
+                    src2
+                    imm
+                    size))
+
+;; Helper for creating `pshufd` instructions.
+(decl pshufd (RegMem u8 OperandSize) Reg)
+(rule (pshufd src imm size)
+      (let ((w_dst WritableReg (temp_writable_reg $I8X16))
+            (dst Reg (writable_reg_to_reg w_dst))
+            (_ Unit (emit (MInst.XmmRmRImm (SseOpcode.Pshufd)
+                                           dst
+                                           src
+                                           w_dst
+                                           imm
+                                           size))))
+        dst))
+
+;; Helper for creating `MInst.XmmUnaryRmR` instructions.
+(decl xmm_unary_rm_r (SseOpcode RegMem) Reg)
+(rule (xmm_unary_rm_r op src)
+      (let ((dst WritableReg (temp_writable_reg $I8X16))
+            (_ Unit (emit (MInst.XmmUnaryRmR op src dst))))
+        (writable_reg_to_reg dst)))
+
+;; Helper for creating `pmovsxbw` instructions.
+(decl pmovsxbw (RegMem) Reg)
+(rule (pmovsxbw src)
+      (xmm_unary_rm_r (SseOpcode.Pmovsxbw) src))
+
+;; Helper for creating `pmovzxbw` instructions.
+(decl pmovzxbw (RegMem) Reg)
+(rule (pmovzxbw src)
+      (xmm_unary_rm_r (SseOpcode.Pmovzxbw) src))
+
+;; Helper for creating `MInst.XmmRmREvex` instructions.
+(decl xmm_rm_r_evex (Avx512Opcode RegMem Reg) Reg)
+(rule (xmm_rm_r_evex op src1 src2)
+      (let ((dst WritableReg (temp_writable_reg $I8X16))
+            (_ Unit (emit (MInst.XmmRmREvex op
+                                            src1
+                                            src2
+                                            dst))))
+        (writable_reg_to_reg dst)))
+
+;; Helper for creating `vpmullq` instructions.
+;;
+;; Requires AVX-512 vl and dq.
+(decl vpmullq (RegMem Reg) Reg)
+(rule (vpmullq src1 src2)
+      (xmm_rm_r_evex (Avx512Opcode.Vpmullq)
+                     src1
+                     src2))
+
+;; Helper for creating `MInst.XmmRmiReg` instructions.
+(decl xmm_rmi_reg (SseOpcode Reg RegMemImm) Reg)
+(rule (xmm_rmi_reg op src1 src2)
+      (let ((dst WritableReg (temp_writable_reg $I8X16))
+            (_ Unit (emit (MInst.XmmRmiReg op
+                                           src1
+                                           src2
+                                           dst))))
+        (writable_reg_to_reg dst)))
+
+;; Helper for creating `psllq` instructions.
+(decl psllq (Reg RegMemImm) Reg)
+(rule (psllq src1 src2)
+      (xmm_rmi_reg (SseOpcode.Psllq) src1 src2))
+
+;; Helper for creating `psrlq` instructions.
+(decl psrlq (Reg RegMemImm) Reg)
+(rule (psrlq src1 src2)
+      (xmm_rmi_reg (SseOpcode.Psrlq) src1 src2))
+
+;; Helper for creating `MInst.MulHi` instructions.
+;;
+;; Returns the (lo, hi) register halves of the multiplication.
+(decl mul_hi (Type bool Reg RegMem) ValueRegs)
+(rule (mul_hi ty signed src1 src2)
+      (let ((dst_lo WritableReg (temp_writable_reg ty))
+            (dst_hi WritableReg (temp_writable_reg ty))
+            (size OperandSize (operand_size_of_type ty))
+            (_ Unit (emit (MInst.MulHi size
+                                       signed
+                                       src1
+                                       src2
+                                       dst_lo
+                                       dst_hi))))
+        (value_regs (writable_reg_to_reg dst_lo)
+                    (writable_reg_to_reg dst_hi))))
+
+;; Helper for creating `mul` instructions that return both the lower and
+;; (unsigned) higher halves of the result.
+(decl mulhi_u (Type Reg RegMem) ValueRegs)
+(rule (mulhi_u ty src1 src2)
+      (mul_hi ty $false src1 src2))
--- a/cranelift/codegen/src/isa/x64/inst/args.rs
+++ b/cranelift/codegen/src/isa/x64/inst/args.rs
@@ -1,14 +1,13 @@
 //! Instruction operand sub-components (aka "parts"): definitions and printing.

 use super::regs::{self, show_ireg_sized};
-use super::EmitState;
+use super::{EmitState, RegMapper};
 use crate::ir::condcodes::{FloatCC, IntCC};
 use crate::ir::{MemFlags, Type};
 use crate::isa::x64::inst::Inst;
 use crate::machinst::*;
 use regalloc::{
-    PrettyPrint, PrettyPrintSized, RealRegUniverse, Reg, RegClass, RegUsageCollector,
-    RegUsageMapper, Writable,
+    PrettyPrint, PrettyPrintSized, RealRegUniverse, Reg, RegClass, RegUsageCollector, Writable,
 };
 use smallvec::{smallvec, SmallVec};
 use std::fmt;
@@ -175,7 +174,7 @@ impl SyntheticAmode {
        }
    }

-    pub(crate) fn map_uses<RUM: RegUsageMapper>(&mut self, map: &RUM) {
+    pub(crate) fn map_uses<RM: RegMapper>(&mut self, map: &RM) {
        match self {
            SyntheticAmode::Real(addr) => addr.map_uses(map),
            SyntheticAmode::NominalSPOffset { .. } => {
@@ -285,6 +284,25 @@ impl PrettyPrintSized for RegMemImm {
    }
 }

+/// An operand which is either an 8-bit integer immediate or a register.
+#[derive(Clone, Debug)]
+pub enum Imm8Reg {
+    Imm8 { imm: u8 },
+    Reg { reg: Reg },
+}
+
+impl From<u8> for Imm8Reg {
+    fn from(imm: u8) -> Self {
+        Self::Imm8 { imm }
+    }
+}
+
+impl From<Reg> for Imm8Reg {
+    fn from(reg: Reg) -> Self {
+        Self::Reg { reg }
+    }
+}
+
 /// An operand which is either an integer Register or a value in Memory.  This can denote an 8, 16,
 /// 32, 64, or 128 bit value.
 #[derive(Clone, Debug)]
--- a/cranelift/codegen/src/isa/x64/inst/emit.rs
+++ b/cranelift/codegen/src/isa/x64/inst/emit.rs
@@ -147,14 +147,16 @@ pub(crate) fn emit(
        Inst::AluRmiR {
            size,
            op,
-            src,
+            src1,
+            src2,
            dst: reg_g,
        } => {
+            debug_assert_eq!(*src1, reg_g.to_reg());
            let mut rex = RexFlags::from(*size);
            if *op == AluRmiROpcode::Mul {
                // We kinda freeloaded Mul into RMI_R_Op, but it doesn't fit the usual pattern, so
                // we have to special-case it.
-                match src {
+                match src2 {
                    RegMemImm::Reg { reg: reg_e } => {
                        emit_std_reg_reg(
                            sink,
@@ -213,7 +215,7 @@ pub(crate) fn emit(
                };
                assert!(!(is_8bit && *size == OperandSize::Size64));

-                match src {
+                match src2 {
                    RegMemImm::Reg { reg: reg_e } => {
                        if is_8bit {
                            rex.always_emit_if_8bit_needed(*reg_e);
@@ -323,8 +325,9 @@ pub(crate) fn emit(
            }
        }

-        Inst::Not { size, src } => {
-            let rex_flags = RexFlags::from((*size, src.to_reg()));
+        Inst::Not { size, src, dst } => {
+            debug_assert_eq!(*src, dst.to_reg());
+            let rex_flags = RexFlags::from((*size, dst.to_reg()));
            let (opcode, prefix) = match size {
                OperandSize::Size8 => (0xF6, LegacyPrefixes::None),
                OperandSize::Size16 => (0xF7, LegacyPrefixes::_66),
@@ -333,12 +336,13 @@ pub(crate) fn emit(
            };

            let subopcode = 2;
-            let enc_src = int_reg_enc(src.to_reg());
+            let enc_src = int_reg_enc(dst.to_reg());
            emit_std_enc_enc(sink, prefix, opcode, 1, subopcode, enc_src, rex_flags)
        }

-        Inst::Neg { size, src } => {
-            let rex_flags = RexFlags::from((*size, src.to_reg()));
+        Inst::Neg { size, src, dst } => {
+            debug_assert_eq!(*src, dst.to_reg());
+            let rex_flags = RexFlags::from((*size, dst.to_reg()));
            let (opcode, prefix) = match size {
                OperandSize::Size8 => (0xF6, LegacyPrefixes::None),
                OperandSize::Size16 => (0xF7, LegacyPrefixes::_66),
@@ -347,15 +351,21 @@ pub(crate) fn emit(
            };

            let subopcode = 3;
-            let enc_src = int_reg_enc(src.to_reg());
+            let enc_src = int_reg_enc(dst.to_reg());
            emit_std_enc_enc(sink, prefix, opcode, 1, subopcode, enc_src, rex_flags)
        }

        Inst::Div {
            size,
            signed,
+            dividend,
            divisor,
+            dst_quotient,
+            dst_remainder,
        } => {
+            debug_assert_eq!(*dividend, regs::rax());
+            debug_assert_eq!(dst_quotient.to_reg(), regs::rax());
+            debug_assert_eq!(dst_remainder.to_reg(), regs::rdx());
            let (opcode, prefix) = match size {
                OperandSize::Size8 => (0xF6, LegacyPrefixes::None),
                OperandSize::Size16 => (0xF7, LegacyPrefixes::_66),
@@ -397,7 +407,18 @@ pub(crate) fn emit(
            }
        }

-        Inst::MulHi { size, signed, rhs } => {
+        Inst::MulHi {
+            size,
+            signed,
+            src1,
+            src2,
+            dst_lo,
+            dst_hi,
+        } => {
+            debug_assert_eq!(*src1, regs::rax());
+            debug_assert_eq!(dst_lo.to_reg(), regs::rax());
+            debug_assert_eq!(dst_hi.to_reg(), regs::rdx());
+
            let rex_flags = RexFlags::from(*size);
            let prefix = match size {
                OperandSize::Size16 => LegacyPrefixes::_66,
@@ -407,7 +428,7 @@ pub(crate) fn emit(
            };

            let subopcode = if *signed { 5 } else { 4 };
-            match rhs {
+            match src2 {
                RegMem::Reg { reg } => {
                    let src = int_reg_enc(*reg);
                    emit_std_enc_enc(sink, prefix, 0xF7, 1, subopcode, src, rex_flags)
@@ -421,28 +442,39 @@ pub(crate) fn emit(
            }
        }

-        Inst::SignExtendData { size } => match size {
-            OperandSize::Size8 => {
-                sink.put1(0x66);
-                sink.put1(0x98);
+        Inst::SignExtendData { size, src, dst } => {
+            debug_assert_eq!(*src, regs::rax());
+            debug_assert_eq!(dst.to_reg(), regs::rdx());
+            match size {
+                OperandSize::Size8 => {
+                    sink.put1(0x66);
+                    sink.put1(0x98);
+                }
+                OperandSize::Size16 => {
+                    sink.put1(0x66);
+                    sink.put1(0x99);
+                }
+                OperandSize::Size32 => sink.put1(0x99),
+                OperandSize::Size64 => {
+                    sink.put1(0x48);
+                    sink.put1(0x99);
+                }
            }
-            OperandSize::Size16 => {
-                sink.put1(0x66);
-                sink.put1(0x99);
-            }
-            OperandSize::Size32 => sink.put1(0x99),
-            OperandSize::Size64 => {
-                sink.put1(0x48);
-                sink.put1(0x99);
-            }
-        },
+        }

        Inst::CheckedDivOrRemSeq {
            kind,
            size,
+            dividend,
            divisor,
            tmp,
+            dst_quotient,
+            dst_remainder,
        } => {
+            debug_assert_eq!(*dividend, regs::rax());
+            debug_assert_eq!(dst_quotient.to_reg(), regs::rax());
+            debug_assert_eq!(dst_remainder.to_reg(), regs::rdx());
+
            // Generates the following code sequence:
            //
            // ;; check divide by zero:
@@ -792,9 +824,11 @@ pub(crate) fn emit(
        Inst::ShiftR {
            size,
            kind,
+            src,
            num_bits,
            dst,
        } => {
+            debug_assert_eq!(*src, dst.to_reg());
            let subopcode = match kind {
                ShiftKind::RotateLeft => 0,
                ShiftKind::RotateRight => 1,
@@ -805,7 +839,8 @@ pub(crate) fn emit(
            let enc_dst = int_reg_enc(dst.to_reg());
            let rex_flags = RexFlags::from((*size, dst.to_reg()));
            match num_bits {
-                None => {
+                Imm8Reg::Reg { reg } => {
+                    debug_assert_eq!(*reg, regs::rcx());
                    let (opcode, prefix) = match size {
                        OperandSize::Size8 => (0xD2, LegacyPrefixes::None),
                        OperandSize::Size16 => (0xD3, LegacyPrefixes::_66),
@@ -820,7 +855,7 @@ pub(crate) fn emit(
                    emit_std_enc_enc(sink, prefix, opcode, 1, subopcode, enc_dst, rex_flags);
                }

-                Some(num_bits) => {
+                Imm8Reg::Imm8 { imm: num_bits } => {
                    let (opcode, prefix) = match size {
                        OperandSize::Size8 => (0xC0, LegacyPrefixes::None),
                        OperandSize::Size16 => (0xC1, LegacyPrefixes::_66),
@@ -840,10 +875,16 @@ pub(crate) fn emit(
            }
        }

-        Inst::XmmRmiReg { opcode, src, dst } => {
+        Inst::XmmRmiReg {
+            opcode,
+            src1,
+            src2,
+            dst,
+        } => {
+            debug_assert_eq!(*src1, dst.to_reg());
            let rex = RexFlags::clear_w();
            let prefix = LegacyPrefixes::_66;
-            if let RegMemImm::Imm { simm32 } = src {
+            if let RegMemImm::Imm { simm32 } = src2 {
                let (opcode_bytes, reg_digit) = match opcode {
                    SseOpcode::Psllw => (0x0F71, 6),
                    SseOpcode::Pslld => (0x0F72, 6),
@@ -874,7 +915,7 @@ pub(crate) fn emit(
                    _ => panic!("invalid opcode: {}", opcode),
                };

-                match src {
+                match src2 {
                    RegMemImm::Reg { reg } => {
                        emit_std_reg_reg(sink, prefix, opcode_bytes, 2, dst.to_reg(), *reg, rex);
                    }
@@ -993,9 +1034,11 @@ pub(crate) fn emit(
        Inst::Cmove {
            size,
            cc,
-            src,
+            consequent,
+            alternative,
            dst: reg_g,
        } => {
+            debug_assert_eq!(*alternative, reg_g.to_reg());
            let rex_flags = RexFlags::from(*size);
            let prefix = match size {
                OperandSize::Size16 => LegacyPrefixes::_66,
@@ -1004,7 +1047,7 @@ pub(crate) fn emit(
                _ => unreachable!("invalid size spec for cmove"),
            };
            let opcode = 0x0F40 + cc.get_enc() as u32;
-            match src {
+            match consequent {
                RegMem::Reg { reg: reg_e } => {
                    emit_std_reg_reg(sink, prefix, opcode, 2, reg_g.to_reg(), *reg_e, rex_flags);
                }
@@ -1433,9 +1476,11 @@ pub(crate) fn emit(

        Inst::XmmRmR {
            op,
-            src: src_e,
+            src1,
+            src2: src_e,
            dst: reg_g,
        } => {
+            debug_assert_eq!(*src1, reg_g.to_reg());
            let rex = RexFlags::clear_w();
            let (prefix, opcode, length) = match op {
                SseOpcode::Addps => (LegacyPrefixes::None, 0x0F58, 2),
@@ -1678,11 +1723,13 @@ pub(crate) fn emit(

        Inst::XmmRmRImm {
            op,
-            src,
+            src1,
+            src2,
            dst,
            imm,
            size,
        } => {
+            debug_assert_eq!(*src1, dst.to_reg());
            let (prefix, opcode, len) = match op {
                SseOpcode::Cmpps => (LegacyPrefixes::None, 0x0FC2, 2),
                SseOpcode::Cmppd => (LegacyPrefixes::_66, 0x0FC2, 2),
@@ -1713,7 +1760,7 @@ pub(crate) fn emit(
                // `src` in ModRM's r/m field.
                _ => false,
            };
-            match src {
+            match src2 {
                RegMem::Reg { reg } => {
                    if regs_swapped {
                        emit_std_reg_reg(sink, prefix, opcode, len, *reg, dst.to_reg(), rex);
@@ -2403,8 +2450,17 @@ pub(crate) fn emit(
            }
        }

-        Inst::LockCmpxchg { ty, src, dst } => {
-            // lock cmpxchg{b,w,l,q} %src, (dst)
+        Inst::LockCmpxchg {
+            ty,
+            replacement,
+            expected,
+            mem,
+            dst_old,
+        } => {
+            debug_assert_eq!(*expected, regs::rax());
+            debug_assert_eq!(dst_old.to_reg(), regs::rax());
+
+            // lock cmpxchg{b,w,l,q} %replacement, (mem)
            // Note that 0xF0 is the Lock prefix.
            let (prefix, opcodes) = match *ty {
                types::I8 => (LegacyPrefixes::_F0, 0x0FB0),
@@ -2413,12 +2469,34 @@ pub(crate) fn emit(
                types::I64 => (LegacyPrefixes::_F0, 0x0FB1),
                _ => unreachable!(),
            };
-            let rex = RexFlags::from((OperandSize::from_ty(*ty), *src));
-            let amode = dst.finalize(state, sink);
-            emit_std_reg_mem(sink, state, info, prefix, opcodes, 2, *src, &amode, rex);
+            let rex = RexFlags::from((OperandSize::from_ty(*ty), *replacement));
+            let amode = mem.finalize(state, sink);
+            emit_std_reg_mem(
+                sink,
+                state,
+                info,
+                prefix,
+                opcodes,
+                2,
+                *replacement,
+                &amode,
+                rex,
+            );
        }

-        Inst::AtomicRmwSeq { ty, op } => {
+        Inst::AtomicRmwSeq {
+            ty,
+            op,
+            address,
+            operand,
+            temp,
+            dst_old,
+        } => {
+            debug_assert_eq!(*address, regs::r9());
+            debug_assert_eq!(*operand, regs::r10());
+            debug_assert_eq!(temp.to_reg(), regs::r11());
+            debug_assert_eq!(dst_old.to_reg(), regs::rax());
+
            // Emit this:
            //
            //    mov{zbq,zwq,zlq,q}     (%r9), %rax  // rax = old value
@@ -2516,8 +2594,10 @@ pub(crate) fn emit(
            // No need to call `add_trap` here, since the `i4` emit will do that.
            let i4 = Inst::LockCmpxchg {
                ty: *ty,
-                src: r11,
-                dst: amode.into(),
+                replacement: r11,
+                expected: regs::rax(),
+                mem: amode.into(),
+                dst_old: Writable::from_reg(regs::rax()),
            };
            i4.emit(sink, info, state);

--- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs
+++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs
@@ -4199,8 +4199,10 @@ fn test_x64_emit() {
    insns.push((
        Inst::LockCmpxchg {
            ty: types::I8,
-            src: rbx,
-            dst: am1,
+            mem: am1,
+            replacement: rbx,
+            expected: rax,
+            dst_old: w_rax,
        },
        "F0410FB09C9241010000",
        "lock cmpxchgb %bl, 321(%r10,%rdx,4)",
@@ -4209,8 +4211,10 @@ fn test_x64_emit() {
    insns.push((
        Inst::LockCmpxchg {
            ty: types::I8,
-            src: rdx,
-            dst: am2.clone(),
+            mem: am2.clone(),
+            replacement: rdx,
+            expected: rax,
+            dst_old: w_rax,
        },
        "F00FB094F1C7CFFFFF",
        "lock cmpxchgb %dl, -12345(%rcx,%rsi,8)",
@@ -4218,8 +4222,10 @@ fn test_x64_emit() {
    insns.push((
        Inst::LockCmpxchg {
            ty: types::I8,
-            src: rsi,
-            dst: am2.clone(),
+            mem: am2.clone(),
+            replacement: rsi,
+            expected: rax,
+            dst_old: w_rax,
        },
        "F0400FB0B4F1C7CFFFFF",
        "lock cmpxchgb %sil, -12345(%rcx,%rsi,8)",
@@ -4227,8 +4233,10 @@ fn test_x64_emit() {
    insns.push((
        Inst::LockCmpxchg {
            ty: types::I8,
-            src: r10,
-            dst: am2.clone(),
+            mem: am2.clone(),
+            replacement: r10,
+            expected: rax,
+            dst_old: w_rax,
        },
        "F0440FB094F1C7CFFFFF",
        "lock cmpxchgb %r10b, -12345(%rcx,%rsi,8)",
@@ -4236,8 +4244,10 @@ fn test_x64_emit() {
    insns.push((
        Inst::LockCmpxchg {
            ty: types::I8,
-            src: r15,
-            dst: am2.clone(),
+            mem: am2.clone(),
+            replacement: r15,
+            expected: rax,
+            dst_old: w_rax,
        },
        "F0440FB0BCF1C7CFFFFF",
        "lock cmpxchgb %r15b, -12345(%rcx,%rsi,8)",
@@ -4246,8 +4256,10 @@ fn test_x64_emit() {
    insns.push((
        Inst::LockCmpxchg {
            ty: types::I16,
-            src: rsi,
-            dst: am2.clone(),
+            mem: am2.clone(),
+            replacement: rsi,
+            expected: rax,
+            dst_old: w_rax,
        },
        "66F00FB1B4F1C7CFFFFF",
        "lock cmpxchgw %si, -12345(%rcx,%rsi,8)",
@@ -4255,8 +4267,10 @@ fn test_x64_emit() {
    insns.push((
        Inst::LockCmpxchg {
            ty: types::I16,
-            src: r10,
-            dst: am2.clone(),
+            mem: am2.clone(),
+            replacement: r10,
+            expected: rax,
+            dst_old: w_rax,
        },
        "66F0440FB194F1C7CFFFFF",
        "lock cmpxchgw %r10w, -12345(%rcx,%rsi,8)",
@@ -4265,8 +4279,10 @@ fn test_x64_emit() {
    insns.push((
        Inst::LockCmpxchg {
            ty: types::I32,
-            src: rsi,
-            dst: am2.clone(),
+            mem: am2.clone(),
+            replacement: rsi,
+            expected: rax,
+            dst_old: w_rax,
        },
        "F00FB1B4F1C7CFFFFF",
        "lock cmpxchgl %esi, -12345(%rcx,%rsi,8)",
@@ -4274,8 +4290,10 @@ fn test_x64_emit() {
    insns.push((
        Inst::LockCmpxchg {
            ty: types::I32,
-            src: r10,
-            dst: am2.clone(),
+            mem: am2.clone(),
+            replacement: r10,
+            expected: rax,
+            dst_old: w_rax,
        },
        "F0440FB194F1C7CFFFFF",
        "lock cmpxchgl %r10d, -12345(%rcx,%rsi,8)",
@@ -4284,8 +4302,10 @@ fn test_x64_emit() {
    insns.push((
        Inst::LockCmpxchg {
            ty: types::I64,
-            src: rsi,
-            dst: am2.clone(),
+            mem: am2.clone(),
+            replacement: rsi,
+            expected: rax,
+            dst_old: w_rax,
        },
        "F0480FB1B4F1C7CFFFFF",
        "lock cmpxchgq %rsi, -12345(%rcx,%rsi,8)",
@@ -4293,8 +4313,10 @@ fn test_x64_emit() {
    insns.push((
        Inst::LockCmpxchg {
            ty: types::I64,
-            src: r10,
-            dst: am2.clone(),
+            mem: am2.clone(),
+            replacement: r10,
+            expected: rax,
+            dst_old: w_rax,
        },
        "F04C0FB194F1C7CFFFFF",
        "lock cmpxchgq %r10, -12345(%rcx,%rsi,8)",
@@ -4302,27 +4324,62 @@ fn test_x64_emit() {

    // AtomicRmwSeq
    insns.push((
-        Inst::AtomicRmwSeq { ty: types::I8, op: inst_common::AtomicRmwOp::Or, },
+        Inst::AtomicRmwSeq {
+            ty: types::I8,
+            op: inst_common::AtomicRmwOp::Or,
+            address: r9,
+            operand: r10,
+            temp: w_r11,
+            dst_old: w_rax
+        },
        "490FB6014989C34D09D3F0450FB0190F85EFFFFFFF",
        "atomically { 8_bits_at_[%r9]) Or= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }"
    ));
    insns.push((
-        Inst::AtomicRmwSeq { ty: types::I16, op: inst_common::AtomicRmwOp::And, },
+        Inst::AtomicRmwSeq {
+            ty: types::I16,
+            op: inst_common::AtomicRmwOp::And,
+            address: r9,
+            operand: r10,
+            temp: w_r11,
+            dst_old: w_rax
+        },
        "490FB7014989C34D21D366F0450FB1190F85EEFFFFFF",
        "atomically { 16_bits_at_[%r9]) And= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }"
    ));
    insns.push((
-        Inst::AtomicRmwSeq { ty: types::I32, op: inst_common::AtomicRmwOp::Xchg, },
+        Inst::AtomicRmwSeq {
+            ty: types::I32,
+            op: inst_common::AtomicRmwOp::Xchg,
+            address: r9,
+            operand: r10,
+            temp: w_r11,
+            dst_old: w_rax
+        },
        "418B014989C34D89D3F0450FB1190F85EFFFFFFF",
        "atomically { 32_bits_at_[%r9]) Xchg= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }"
    ));
    insns.push((
-        Inst::AtomicRmwSeq { ty: types::I32, op: inst_common::AtomicRmwOp::Umin, },
+        Inst::AtomicRmwSeq {
+            ty: types::I32,
+            op: inst_common::AtomicRmwOp::Umin,
+            address: r9,
+            operand: r10,
+            temp: w_r11,
+            dst_old: w_rax
+        },
        "418B014989C34539DA4D0F46DAF0450FB1190F85EBFFFFFF",
        "atomically { 32_bits_at_[%r9]) Umin= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }"
    ));
    insns.push((
-        Inst::AtomicRmwSeq { ty: types::I64, op: inst_common::AtomicRmwOp::Add, },
+        Inst::AtomicRmwSeq {
+            ty: types::I64,
+            op: inst_common::AtomicRmwOp::Add,
+            address: r9,
+            operand: r10,
+            temp: w_r11,
+            dst_old: w_rax
+        },
        "498B014989C34D01D3F04D0FB1190F85EFFFFFFF",
        "atomically { 64_bits_at_[%r9]) Add= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }"
    ));
--- a/cranelift/codegen/src/isa/x64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/x64/inst/mod.rs
--- a/cranelift/codegen/src/isa/x64/lower.isle
+++ b/cranelift/codegen/src/isa/x64/lower.isle
@@ -0,0 +1,947 @@
+;; x86-64 instruction selection and CLIF-to-MachInst lowering.
+
+;; The main lowering constructor term: takes a clif `Inst` and returns the
+;; register(s) within which the lowered instruction's result values live.
+(decl lower (Inst) ValueRegs)
+
+;;;; Rules for `iconst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; `i64` and smaller.
+(rule (lower (has_type (fits_in_64 ty)
+                       (iconst (u64_from_imm64 x))))
+      (value_reg (imm ty x)))
+
+;; `i128`
+(rule (lower (has_type $I128
+                       (iconst (u64_from_imm64 x))))
+      (value_regs (imm $I64 x)
+                  (imm $I64 0)))
+
+;;;; Rules for `bconst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; `b64` and smaller.
+
+(rule (lower (has_type (fits_in_64 ty)
+                       (bconst $false)))
+      (value_reg (imm ty 0)))
+
+(rule (lower (has_type (fits_in_64 ty)
+                       (bconst $true)))
+      (value_reg (imm ty 1)))
+
+;; `b128`
+
+(rule (lower (has_type $B128
+                       (bconst $false)))
+      (value_regs (imm $B64 0)
+                  (imm $B64 0)))
+
+(rule (lower (has_type $B128
+                       (bconst $true)))
+      (value_regs (imm $B64 1)
+                  (imm $B64 0)))
+
+;;;; Rules for `null` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type ty (null)))
+      (value_reg (imm ty 0)))
+
+;;;; Rules for `iadd` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; `i64` and smaller.
+
+;; Add two registers.
+(rule (lower (has_type (fits_in_64 ty)
+                       (iadd x y)))
+      (value_reg (add ty
+                      (put_in_reg x)
+                      (RegMemImm.Reg (put_in_reg y)))))
+
+;; Add a register and an immediate.
+
+(rule (lower (has_type (fits_in_64 ty)
+                       (iadd x (simm32_from_value y))))
+      (value_reg (add ty (put_in_reg x) y)))
+
+(rule (lower (has_type (fits_in_64 ty)
+                       (iadd (simm32_from_value x) y)))
+      (value_reg (add ty (put_in_reg y) x)))
+
+;; Add a register and memory.
+
+(rule (lower (has_type (fits_in_64 ty)
+                       (iadd x (sinkable_load y))))
+      (value_reg (add ty
+                      (put_in_reg x)
+                      (sink_load y))))
+
+(rule (lower (has_type (fits_in_64 ty)
+                       (iadd (sinkable_load x) y)))
+      (value_reg (add ty
+                      (put_in_reg y)
+                      (sink_load x))))
+
+;; SSE.
+
+(rule (lower (has_type (multi_lane 8 16)
+                       (iadd x y)))
+      (value_reg (paddb (put_in_reg x)
+                        (put_in_reg_mem y))))
+
+(rule (lower (has_type (multi_lane 16 8)
+                       (iadd x y)))
+      (value_reg (paddw (put_in_reg x)
+                        (put_in_reg_mem y))))
+
+(rule (lower (has_type (multi_lane 32 4)
+                       (iadd x y)))
+      (value_reg (paddd (put_in_reg x)
+                        (put_in_reg_mem y))))
+
+(rule (lower (has_type (multi_lane 64 2)
+                       (iadd x y)))
+      (value_reg (paddq (put_in_reg x)
+                        (put_in_reg_mem y))))
+
+;; `i128`
+(rule (lower (has_type $I128 (iadd x y)))
+      ;; Get the high/low registers for `x`.
+      (let ((x_regs ValueRegs (put_in_regs x))
+            (x_lo Reg (value_regs_get x_regs 0))
+            (x_hi Reg (value_regs_get x_regs 1)))
+        ;; Get the high/low registers for `y`.
+        (let ((y_regs ValueRegs (put_in_regs y))
+              (y_lo Reg (value_regs_get y_regs 0))
+              (y_hi Reg (value_regs_get y_regs 1)))
+          ;; Do an add followed by an add-with-carry.
+          (with_flags (add_with_flags $I64 x_lo (RegMemImm.Reg y_lo))
+                      (adc $I64 x_hi (RegMemImm.Reg y_hi))))))
+
+;;;; Rules for `sadd_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type (multi_lane 8 16)
+                       (sadd_sat x y)))
+      (value_reg (paddsb (put_in_reg x)
+                         (put_in_reg_mem y))))
+
+(rule (lower (has_type (multi_lane 16 8)
+                       (sadd_sat x y)))
+      (value_reg (paddsw (put_in_reg x)
+                         (put_in_reg_mem y))))
+
+;;;; Rules for `uadd_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type (multi_lane 8 16)
+                       (uadd_sat x y)))
+      (value_reg (paddusb (put_in_reg x)
+                          (put_in_reg_mem y))))
+
+(rule (lower (has_type (multi_lane 16 8)
+                       (uadd_sat x y)))
+      (value_reg (paddusw (put_in_reg x)
+                          (put_in_reg_mem y))))
+
+;;;; Rules for `iadd_ifcout` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Add two registers.
+(rule (lower (has_type (fits_in_64 ty)
+                       (iadd_ifcout x y)))
+      (value_reg (add ty
+                      (put_in_reg x)
+                      (RegMemImm.Reg (put_in_reg y)))))
+
+;; Add a register and an immediate.
+
+(rule (lower (has_type (fits_in_64 ty)
+                       (iadd_ifcout x (simm32_from_value y))))
+      (value_reg (add ty (put_in_reg x) y)))
+
+(rule (lower (has_type (fits_in_64 ty)
+                       (iadd_ifcout (simm32_from_value x) y)))
+      (value_reg (add ty (put_in_reg y) x)))
+
+;; Add a register and memory.
+
+(rule (lower (has_type (fits_in_64 ty)
+                       (iadd_ifcout x (sinkable_load y))))
+      (value_reg (add ty
+                      (put_in_reg x)
+                      (sink_load y))))
+
+(rule (lower (has_type (fits_in_64 ty)
+                       (iadd_ifcout (sinkable_load x) y)))
+      (value_reg (add ty
+                      (put_in_reg y)
+                      (sink_load x))))
+
+;; (No `iadd_ifcout` for `i128`.)
+
+;;;; Rules for `iadd_imm` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; `i64` and smaller.
+
+;; When the immediate fits in a `RegMemImm.Imm`, use that.
+(rule (lower (has_type (fits_in_64 ty) (iadd_imm (simm32_from_imm64 x) y)))
+      (value_reg (add ty (put_in_reg y) x)))
+
+;; Otherwise, put the immediate into a register.
+(rule (lower (has_type (fits_in_64 ty) (iadd_imm (u64_from_imm64 x) y)))
+      (value_reg (add ty (put_in_reg y) (RegMemImm.Reg (imm ty x)))))
+
+;; `i128`
+
+;; When the immediate fits in a `RegMemImm.Imm`, use that.
+(rule (lower (has_type $I128 (iadd_imm (simm32_from_imm64 x) y)))
+      (let ((y_regs ValueRegs (put_in_regs y))
+            (y_lo Reg (value_regs_get y_regs 0))
+            (y_hi Reg (value_regs_get y_regs 1)))
+        (with_flags (add_with_flags $I64 y_lo x)
+                    (adc $I64 y_hi (RegMemImm.Imm 0)))))
+
+;; Otherwise, put the immediate into a register.
+(rule (lower (has_type $I128 (iadd_imm (u64_from_imm64 x) y)))
+      (let ((y_regs ValueRegs (put_in_regs y))
+            (y_lo Reg (value_regs_get y_regs 0))
+            (y_hi Reg (value_regs_get y_regs 1))
+            (x_lo Reg (imm $I64 x)))
+        (with_flags (add_with_flags $I64 y_lo (RegMemImm.Reg x_lo))
+                    (adc $I64 y_hi (RegMemImm.Imm 0)))))
+
+;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; `i64` and smaller.
+
+;; Sub two registers.
+(rule (lower (has_type (fits_in_64 ty)
+                       (isub x y)))
+      (value_reg (sub ty
+                      (put_in_reg x)
+                      (RegMemImm.Reg (put_in_reg y)))))
+
+;; Sub a register and an immediate.
+(rule (lower (has_type (fits_in_64 ty)
+                       (isub x (simm32_from_value y))))
+      (value_reg (sub ty (put_in_reg x) y)))
+
+;; Sub a register and memory.
+(rule (lower (has_type (fits_in_64 ty)
+                       (isub x (sinkable_load y))))
+      (value_reg (sub ty
+                      (put_in_reg x)
+                      (sink_load y))))
+
+;; SSE.
+
+(rule (lower (has_type (multi_lane 8 16)
+                       (isub x y)))
+      (value_reg (psubb (put_in_reg x)
+                        (put_in_reg_mem y))))
+
+(rule (lower (has_type (multi_lane 16 8)
+                       (isub x y)))
+      (value_reg (psubw (put_in_reg x)
+                        (put_in_reg_mem y))))
+
+(rule (lower (has_type (multi_lane 32 4)
+                       (isub x y)))
+      (value_reg (psubd (put_in_reg x)
+                        (put_in_reg_mem y))))
+
+(rule (lower (has_type (multi_lane 64 2)
+                       (isub x y)))
+      (value_reg (psubq (put_in_reg x)
+                        (put_in_reg_mem y))))
+
+;; `i128`
+(rule (lower (has_type $I128 (isub x y)))
+      ;; Get the high/low registers for `x`.
+      (let ((x_regs ValueRegs (put_in_regs x))
+            (x_lo Reg (value_regs_get x_regs 0))
+            (x_hi Reg (value_regs_get x_regs 1)))
+        ;; Get the high/low registers for `y`.
+        (let ((y_regs ValueRegs (put_in_regs y))
+              (y_lo Reg (value_regs_get y_regs 0))
+              (y_hi Reg (value_regs_get y_regs 1)))
+          ;; Do a sub followed by an sub-with-borrow.
+          (with_flags (sub_with_flags $I64 x_lo (RegMemImm.Reg y_lo))
+                      (sbb $I64 x_hi (RegMemImm.Reg y_hi))))))
+
+;;;; Rules for `ssub_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type (multi_lane 8 16)
+                       (ssub_sat x y)))
+      (value_reg (psubsb (put_in_reg x)
+                         (put_in_reg_mem y))))
+
+(rule (lower (has_type (multi_lane 16 8)
+                       (ssub_sat x y)))
+      (value_reg (psubsw (put_in_reg x)
+                         (put_in_reg_mem y))))
+
+;;;; Rules for `usub_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type (multi_lane 8 16)
+                       (usub_sat x y)))
+      (value_reg (psubusb (put_in_reg x)
+                          (put_in_reg_mem y))))
+
+(rule (lower (has_type (multi_lane 16 8)
+                       (usub_sat x y)))
+      (value_reg (psubusw (put_in_reg x)
+                          (put_in_reg_mem y))))
+
+;;;; Rules for `band` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; `{i,b}64` and smaller.
+
+;; And two registers.
+(rule (lower (has_type (fits_in_64 ty) (band x y)))
+      (value_reg (m_and ty
+                        (put_in_reg x)
+                        (RegMemImm.Reg (put_in_reg y)))))
+
+;; And with a memory operand.
+
+(rule (lower (has_type (fits_in_64 ty)
+                       (band x (sinkable_load y))))
+      (value_reg (m_and ty
+                        (put_in_reg x)
+                        (sink_load y))))
+
+(rule (lower (has_type (fits_in_64 ty)
+                       (band (sinkable_load x) y)))
+      (value_reg (m_and ty
+                        (put_in_reg y)
+                        (sink_load x))))
+
+;; And with an immediate.
+
+(rule (lower (has_type (fits_in_64 ty)
+                       (band x (simm32_from_value y))))
+      (value_reg (m_and ty
+                        (put_in_reg x)
+                        y)))
+
+(rule (lower (has_type (fits_in_64 ty)
+                       (band (simm32_from_value x) y)))
+      (value_reg (m_and ty
+                        (put_in_reg y)
+                        x)))
+
+;; SSE.
+
+(rule (lower (has_type $F32X4 (band x y)))
+      (value_reg (andps (put_in_reg x)
+                        (put_in_reg_mem y))))
+
+(rule (lower (has_type $F64X2 (band x y)))
+      (value_reg (andpd (put_in_reg x)
+                        (put_in_reg_mem y))))
+
+(rule (lower (has_type (multi_lane _bits _lanes)
+                       (band x y)))
+      (value_reg (pand (put_in_reg x)
+                       (put_in_reg_mem y))))
+
+;; `{i,b}128`.
+
+(rule (lower (has_type $I128 (band x y)))
+      (let ((x_regs ValueRegs (put_in_regs x))
+            (x_lo Reg (value_regs_get x_regs 0))
+            (x_hi Reg (value_regs_get x_regs 1))
+            (y_regs ValueRegs (put_in_regs y))
+            (y_lo Reg (value_regs_get y_regs 0))
+            (y_hi Reg (value_regs_get y_regs 1)))
+        (value_regs (m_and $I64 x_lo (RegMemImm.Reg y_lo))
+                    (m_and $I64 x_hi (RegMemImm.Reg y_hi)))))
+
+(rule (lower (has_type $B128 (band x y)))
+      ;; Booleans are always `0` or `1`, so we only need to do the `and` on the
+      ;; low half. The high half is always zero but, rather than generate a new
+      ;; zero, we just reuse `x`'s high half which is already zero.
+      (let ((x_regs ValueRegs (put_in_regs x))
+            (x_lo Reg (value_regs_get x_regs 0))
+            (x_hi Reg (value_regs_get x_regs 1))
+            (y_lo Reg (lo_reg y)))
+        (value_regs (m_and $I64 x_lo (RegMemImm.Reg y_lo))
+                    x_hi)))
+
+;;;; Rules for `bor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; `{i,b}64` and smaller.
+
+;; Or two registers.
+(rule (lower (has_type (fits_in_64 ty) (bor x y)))
+      (value_reg (or ty
+                     (put_in_reg x)
+                     (RegMemImm.Reg (put_in_reg y)))))
+
+;; Or with a memory operand.
+
+(rule (lower (has_type (fits_in_64 ty)
+                       (bor x (sinkable_load y))))
+      (value_reg (or ty
+                     (put_in_reg x)
+                     (sink_load y))))
+
+(rule (lower (has_type (fits_in_64 ty)
+                       (bor (sinkable_load x) y)))
+      (value_reg (or ty
+                     (put_in_reg y)
+                     (sink_load x))))
+
+;; Or with an immediate.
+
+(rule (lower (has_type (fits_in_64 ty)
+                       (bor x (simm32_from_value y))))
+      (value_reg (or ty
+                     (put_in_reg x)
+                     y)))
+
+(rule (lower (has_type (fits_in_64 ty)
+                       (bor (simm32_from_value x) y)))
+      (value_reg (or ty
+                     (put_in_reg y)
+                     x)))
+
+;; SSE.
+
+(rule (lower (has_type $F32X4 (bor x y)))
+      (value_reg (orps (put_in_reg x)
+                       (put_in_reg_mem y))))
+
+(rule (lower (has_type $F64X2 (bor x y)))
+      (value_reg (orpd (put_in_reg x)
+                       (put_in_reg_mem y))))
+
+(rule (lower (has_type (multi_lane _bits _lanes)
+                       (bor x y)))
+      (value_reg (por (put_in_reg x)
+                      (put_in_reg_mem y))))
+
+;; `{i,b}128`.
+
+(decl or_i128 (ValueRegs ValueRegs) ValueRegs)
+(rule (or_i128 x y)
+      (let ((x_lo Reg (value_regs_get x 0))
+            (x_hi Reg (value_regs_get x 1))
+            (y_lo Reg (value_regs_get y 0))
+            (y_hi Reg (value_regs_get y 1)))
+        (value_regs (or $I64 x_lo (RegMemImm.Reg y_lo))
+                    (or $I64 x_hi (RegMemImm.Reg y_hi)))))
+
+(rule (lower (has_type $I128 (bor x y)))
+      (or_i128 (put_in_regs x) (put_in_regs y)))
+
+(rule (lower (has_type $B128 (bor x y)))
+      ;; Booleans are always `0` or `1`, so we only need to do the `or` on the
+      ;; low half. The high half is always zero but, rather than generate a new
+      ;; zero, we just reuse `x`'s high half which is already zero.
+      (let ((x_regs ValueRegs (put_in_regs x))
+            (x_lo Reg (value_regs_get x_regs 0))
+            (x_hi Reg (value_regs_get x_regs 1))
+            (y_lo Reg (lo_reg y)))
+        (value_regs (or $I64 x_lo (RegMemImm.Reg y_lo))
+                    x_hi)))
+
+;;;; Rules for `bxor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; `{i,b}64` and smaller.
+
+;; Xor two registers.
+(rule (lower (has_type (fits_in_64 ty) (bxor x y)))
+      (value_reg (xor ty
+                      (put_in_reg x)
+                      (RegMemImm.Reg (put_in_reg y)))))
+
+;; Xor with a memory operand.
+
+(rule (lower (has_type (fits_in_64 ty)
+                       (bxor x (sinkable_load y))))
+      (value_reg (xor ty
+                      (put_in_reg x)
+                      (sink_load y))))
+
+(rule (lower (has_type (fits_in_64 ty)
+                       (bxor (sinkable_load x) y)))
+      (value_reg (xor ty
+                      (put_in_reg y)
+                      (sink_load x))))
+
+;; Xor with an immediate.
+
+(rule (lower (has_type (fits_in_64 ty)
+                       (bxor x (simm32_from_value y))))
+      (value_reg (xor ty
+                      (put_in_reg x)
+                      y)))
+
+(rule (lower (has_type (fits_in_64 ty)
+                       (bxor (simm32_from_value x) y)))
+      (value_reg (xor ty
+                      (put_in_reg y)
+                      x)))
+
+;; SSE.
+
+(rule (lower (has_type $F32X4 (bxor x y)))
+      (value_reg (xorps (put_in_reg x)
+                        (put_in_reg_mem y))))
+
+(rule (lower (has_type $F64X2 (bxor x y)))
+      (value_reg (xorpd (put_in_reg x)
+                        (put_in_reg_mem y))))
+
+(rule (lower (has_type (multi_lane _bits _lanes)
+                       (bxor x y)))
+      (value_reg (pxor (put_in_reg x)
+                       (put_in_reg_mem y))))
+
+;; `{i,b}128`.
+
+(rule (lower (has_type $I128 (bxor x y)))
+      (let ((x_regs ValueRegs (put_in_regs x))
+            (x_lo Reg (value_regs_get x_regs 0))
+            (x_hi Reg (value_regs_get x_regs 1))
+            (y_regs ValueRegs (put_in_regs y))
+            (y_lo Reg (value_regs_get y_regs 0))
+            (y_hi Reg (value_regs_get y_regs 1)))
+        (value_regs (xor $I64 x_lo (RegMemImm.Reg y_lo))
+                    (xor $I64 x_hi (RegMemImm.Reg y_hi)))))
+
+(rule (lower (has_type $B128 (bxor x y)))
+      ;; Booleans are always `0` or `1`, so we only need to do the `xor` on the
+      ;; low half. The high half is always zero but, rather than generate a new
+      ;; zero, we just reuse `x`'s high half which is already zero.
+      (let ((x_regs ValueRegs (put_in_regs x))
+            (x_lo Reg (value_regs_get x_regs 0))
+            (x_hi Reg (value_regs_get x_regs 1))
+            (y_lo Reg (lo_reg y)))
+        (value_regs (xor $I64 x_lo (RegMemImm.Reg y_lo))
+                    x_hi)))
+
+;;;; Rules for `ishl` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; `i64` and smaller.
+
+(rule (lower (has_type (fits_in_64 ty) (ishl src amt)))
+      ;; NB: Only the low bits of `amt` matter since we logically mask the shift
+      ;; amount to the value's bit width.
+      (let ((amt_ Reg (lo_reg amt)))
+        (value_reg (shl ty (put_in_reg src) (Imm8Reg.Reg amt_)))))
+
+(rule (lower (has_type (fits_in_64 ty) (ishl src (imm8_from_value amt))))
+      (value_reg (shl ty (put_in_reg src) amt)))
+
+;; `i128`.
+
+(decl shl_i128 (ValueRegs Reg) ValueRegs)
+(rule (shl_i128 src amt)
+      ;; Unpack the registers that make up the 128-bit value being shifted.
+      (let ((src_lo Reg (value_regs_get src 0))
+            (src_hi Reg (value_regs_get src 1))
+            ;; Do two 64-bit shifts.
+            (lo_shifted Reg (shl $I64 src_lo (Imm8Reg.Reg amt)))
+            (hi_shifted Reg (shl $I64 src_hi (Imm8Reg.Reg amt)))
+            ;; `src_lo >> (64 - amt)` are the bits to carry over from the lo
+            ;; into the hi.
+            (carry Reg (shr $I64 src_lo (Imm8Reg.Reg (sub $I64 (imm $I64 64) (RegMemImm.Reg amt)))))
+            (zero Reg (imm $I64 0))
+            ;; Nullify the carry if we are shifting in by a multiple of 128.
+            (carry_ Reg (with_flags_1 (test (OperandSize.Size64) (RegMemImm.Imm 127) amt)
+                                      (cmove $I64 (CC.Z) (RegMem.Reg zero) carry)))
+            ;; Add the carry into the high half.
+            (hi_shifted_ Reg (or $I64 carry_ (RegMemImm.Reg hi_shifted))))
+        ;; Combine the two shifted halves. However, if we are shifting by >= 64
+        ;; (modulo 128), then the low bits are zero and the high bits are our
+        ;; low bits.
+        (with_flags_2 (test (OperandSize.Size64) (RegMemImm.Imm 64) amt)
+                      (cmove $I64 (CC.Z) (RegMem.Reg lo_shifted) zero)
+                      (cmove $I64 (CC.Z) (RegMem.Reg hi_shifted_) lo_shifted))))
+
+(rule (lower (has_type $I128 (ishl src amt)))
+      ;; NB: Only the low bits of `amt` matter since we logically mask the shift
+      ;; amount to the value's bit width.
+      (let ((amt_ Reg (lo_reg amt)))
+        (shl_i128 (put_in_regs src) amt_)))
+
+;;;; Rules for `ushr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; `i64` and smaller.
+
+(rule (lower (has_type (fits_in_64 ty) (ushr src amt)))
+      (let ((src_ Reg (extend_to_reg src ty (ExtendKind.Zero)))
+            ;; NB: Only the low bits of `amt` matter since we logically mask the
+            ;; shift amount to the value's bit width.
+            (amt_ Reg (lo_reg amt)))
+        (value_reg (shr ty src_ (Imm8Reg.Reg amt_)))))
+
+(rule (lower (has_type (fits_in_64 ty) (ushr src (imm8_from_value amt))))
+      (let ((src_ Reg (extend_to_reg src ty (ExtendKind.Zero))))
+        (value_reg (shr ty src_ amt))))
+
+;; `i128`.
+
+(decl shr_i128 (ValueRegs Reg) ValueRegs)
+(rule (shr_i128 src amt)
+      ;; Unpack the lo/hi halves of `src`.
+      (let ((src_lo Reg (value_regs_get src 0))
+            (src_hi Reg (value_regs_get src 1))
+            ;; Do a shift on each half.
+            (lo_shifted Reg (shr $I64 src_lo (Imm8Reg.Reg amt)))
+            (hi_shifted Reg (shr $I64 src_hi (Imm8Reg.Reg amt)))
+            ;; `src_hi << (64 - amt)` are the bits to carry over from the hi
+            ;; into the lo.
+            (carry Reg (shl $I64 src_hi (Imm8Reg.Reg (sub $I64 (imm $I64 64) (RegMemImm.Reg amt)))))
+            ;; Nullify the carry if we are shifting by a multiple of 128.
+            (carry_ Reg (with_flags_1 (test (OperandSize.Size64) (RegMemImm.Imm 127) amt)
+                                      (cmove $I64 (CC.Z) (RegMem.Reg (imm $I64 0)) carry)))
+            ;; Add the carry bits into the lo.
+            (lo_shifted_ Reg (or $I64 carry_ (RegMemImm.Reg lo_shifted))))
+        ;; Combine the two shifted halves. However, if we are shifting by >= 64
+        ;; (modulo 128), then the hi bits are zero and the lo bits are what
+        ;; would otherwise be our hi bits.
+        (with_flags_2 (test (OperandSize.Size64) (RegMemImm.Imm 64) amt)
+                      (cmove $I64 (CC.Z) (RegMem.Reg lo_shifted_) hi_shifted)
+                      (cmove $I64 (CC.Z) (RegMem.Reg hi_shifted) (imm $I64 0)))))
+
+(rule (lower (has_type $I128 (ushr src amt)))
+      ;; NB: Only the low bits of `amt` matter since we logically mask the shift
+      ;; amount to the value's bit width.
+      (let ((amt_ Reg (lo_reg amt)))
+        (shr_i128 (put_in_regs src) amt_)))
+
+;;;; Rules for `rotl` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; `i64` and smaller.
+
+(rule (lower (has_type (fits_in_64 ty) (rotl src amt)))
+      ;; NB: Only the low bits of `amt` matter since we logically mask the
+      ;; shift amount to the value's bit width.
+      (let ((amt_ Reg (lo_reg amt)))
+        (value_reg (m_rotl ty (put_in_reg src) (Imm8Reg.Reg amt_)))))
+
+(rule (lower (has_type (fits_in_64 ty) (rotl src (imm8_from_value amt))))
+      (value_reg (m_rotl ty (put_in_reg src) amt)))
+
+;; `i128`.
+
+(rule (lower (has_type $I128 (rotl src amt)))
+      (let ((src_ ValueRegs (put_in_regs src))
+            ;; NB: Only the low bits of `amt` matter since we logically mask the
+            ;; rotation amount to the value's bit width.
+            (amt_ Reg (lo_reg amt)))
+        (or_i128 (shl_i128 src_ amt_)
+                 (shr_i128 src_ (sub $I64 (imm $I64 128) (RegMemImm.Reg amt_))))))
+
+;;;; Rules for `avg_round` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type (multi_lane 8 16)
+                       (avg_round x y)))
+      (value_reg (pavgb (put_in_reg x) (put_in_reg_mem y))))
+
+(rule (lower (has_type (multi_lane 16 8)
+                       (avg_round x y)))
+      (value_reg (pavgw (put_in_reg x) (put_in_reg_mem y))))
+
+;;;; Rules for `imul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; `i64` and smaller.
+
+;; Multiply two registers.
+(rule (lower (has_type (fits_in_64 ty) (imul x y)))
+      (value_reg (mul ty
+                      (put_in_reg x)
+                      (RegMemImm.Reg (put_in_reg y)))))
+
+;; Multiply a register and an immediate.
+
+(rule (lower (has_type (fits_in_64 ty)
+                       (imul x (simm32_from_value y))))
+      (value_reg (mul ty (put_in_reg x) y)))
+
+(rule (lower (has_type (fits_in_64 ty)
+                       (imul (simm32_from_value x) y)))
+      (value_reg (mul ty (put_in_reg y) x)))
+
+;; Multiply a register and a memory load.
+
+(rule (lower (has_type (fits_in_64 ty)
+                       (imul x (sinkable_load y))))
+      (value_reg (mul ty
+                      (put_in_reg x)
+                      (sink_load y))))
+
+(rule (lower (has_type (fits_in_64 ty)
+                       (imul (sinkable_load x) y)))
+      (value_reg (mul ty
+                      (put_in_reg y)
+                      (sink_load x))))
+
+;; `i128`.
+
+;; mul:
+;;   dst_lo = lhs_lo * rhs_lo
+;;   dst_hi = umulhi(lhs_lo, rhs_lo) +
+;;            lhs_lo * rhs_hi +
+;;            lhs_hi * rhs_lo
+;;
+;; so we emit:
+;;   lo_hi = mul x_lo, y_hi
+;;   hi_lo = mul x_hi, y_lo
+;;   hilo_hilo = add lo_hi, hi_lo
+;;   dst_lo:hi_lolo = mulhi_u x_lo, y_lo
+;;   dst_hi = add hilo_hilo, hi_lolo
+;;   return (dst_lo, dst_hi)
+(rule (lower (has_type $I128 (imul x y)))
+      ;; Put `x` into registers and unpack its hi/lo halves.
+      (let ((x_regs ValueRegs (put_in_regs x))
+            (x_lo Reg (value_regs_get x_regs 0))
+            (x_hi Reg (value_regs_get x_regs 1))
+            ;; Put `y` into registers and unpack its hi/lo halves.
+            (y_regs ValueRegs (put_in_regs y))
+            (y_lo Reg (value_regs_get y_regs 0))
+            (y_hi Reg (value_regs_get y_regs 1))
+            ;; lo_hi = mul x_lo, y_hi
+            (lo_hi Reg (mul $I64 x_lo (RegMemImm.Reg y_hi)))
+            ;; hi_lo = mul x_hi, y_lo
+            (hi_lo Reg (mul $I64 x_hi (RegMemImm.Reg y_lo)))
+            ;; hilo_hilo = add lo_hi, hi_lo
+            (hilo_hilo Reg (add $I64 lo_hi (RegMemImm.Reg hi_lo)))
+            ;; dst_lo:hi_lolo = mulhi_u x_lo, y_lo
+            (mul_regs ValueRegs (mulhi_u $I64 x_lo (RegMem.Reg y_lo)))
+            (dst_lo Reg (value_regs_get mul_regs 0))
+            (hi_lolo Reg (value_regs_get mul_regs 1))
+            ;; dst_hi = add hilo_hilo, hi_lolo
+            (dst_hi Reg (add $I64 hilo_hilo (RegMemImm.Reg hi_lolo))))
+        (value_regs dst_lo dst_hi)))
+
+;; SSE.
+
+;; (No i8x16 multiply.)
+
+(rule (lower (has_type (multi_lane 16 8) (imul x y)))
+      (value_reg (pmullw (put_in_reg x) (put_in_reg_mem y))))
+
+(rule (lower (has_type (multi_lane 32 4) (imul x y)))
+      (value_reg (pmulld (put_in_reg x) (put_in_reg_mem y))))
+
+;; With AVX-512 we can implement `i64x2` multiplication with a single
+;; instruction.
+(rule (lower (has_type (and (avx512vl_enabled)
+                            (avx512dq_enabled)
+                            (multi_lane 64 2))
+                       (imul x y)))
+      (value_reg (vpmullq (put_in_reg_mem x) (put_in_reg y))))
+
+;; Otherwise, for i64x2 multiplication we describe a lane A as being composed of
+;; a 32-bit upper half "Ah" and a 32-bit lower half "Al". The 32-bit long hand
+;; multiplication can then be written as:
+;;
+;;    Ah Al
+;; *  Bh Bl
+;;    -----
+;;    Al * Bl
+;; + (Ah * Bl) << 32
+;; + (Al * Bh) << 32
+;;
+;; So for each lane we will compute:
+;;
+;;   A * B  = (Al * Bl) + ((Ah * Bl) + (Al * Bh)) << 32
+;;
+;; Note, the algorithm will use `pmuldq` which operates directly on the lower
+;; 32-bit (`Al` or `Bl`) of a lane and writes the result to the full 64-bits of
+;; the lane of the destination. For this reason we don't need shifts to isolate
+;; the lower 32-bits, however, we will need to use shifts to isolate the high
+;; 32-bits when doing calculations, i.e., `Ah == A >> 32`.
+(rule (lower (has_type (multi_lane 64 2)
+                       (imul a b)))
+      (let ((a0 Reg (put_in_reg a))
+            (b0 Reg (put_in_reg b))
+            ;; a_hi = A >> 32
+            (a_hi Reg (psrlq a0 (RegMemImm.Imm 32)))
+            ;; ah_bl = Ah * Bl
+            (ah_bl Reg (pmuludq a_hi (RegMem.Reg b0)))
+            ;; b_hi = B >> 32
+            (b_hi Reg (psrlq b0 (RegMemImm.Imm 32)))
+            ;; al_bh = Al * Bh
+            (al_bh Reg (pmuludq a0 (RegMem.Reg b_hi)))
+            ;; aa_bb = ah_bl + al_bh
+            (aa_bb Reg (paddq ah_bl (RegMem.Reg al_bh)))
+            ;; aa_bb_shifted = aa_bb << 32
+            (aa_bb_shifted Reg (psllq aa_bb (RegMemImm.Imm 32)))
+            ;; al_bl = Al * Bl
+            (al_bl Reg (pmuludq a0 (RegMem.Reg b0))))
+        ;; al_bl + aa_bb_shifted
+        (value_reg (paddq al_bl (RegMem.Reg aa_bb_shifted)))))
+
+;; Special case for `i16x8.extmul_high_i8x16_s`.
+(rule (lower (has_type (multi_lane 16 8)
+                       (imul (def_inst (swiden_high (and (value_type (multi_lane 8 16))
+                                                         x)))
+                             (def_inst (swiden_high (and (value_type (multi_lane 8 16))
+                                                         y))))))
+      (let ((x1 Reg (put_in_reg x))
+            (x2 Reg (palignr x1 (RegMem.Reg x1) 8 (OperandSize.Size32)))
+            (x3 Reg (pmovsxbw (RegMem.Reg x2)))
+            (y1 Reg (put_in_reg y))
+            (y2 Reg (palignr y1 (RegMem.Reg y1) 8 (OperandSize.Size32)))
+            (y3 Reg (pmovsxbw (RegMem.Reg y2))))
+        (value_reg (pmullw x3 (RegMem.Reg y3)))))
+
+;; Special case for `i32x4.extmul_high_i16x8_s`.
+(rule (lower (has_type (multi_lane 32 4)
+                       (imul (def_inst (swiden_high (and (value_type (multi_lane 16 8))
+                                                         x)))
+                             (def_inst (swiden_high (and (value_type (multi_lane 16 8))
+                                                         y))))))
+      (let ((x2 Reg (put_in_reg x))
+            (y2 Reg (put_in_reg y))
+            (lo Reg (pmullw x2 (RegMem.Reg y2)))
+            (hi Reg (pmulhw x2 (RegMem.Reg y2))))
+        (value_reg (punpckhwd lo (RegMem.Reg hi)))))
+
+;; Special case for `i64x2.extmul_high_i32x4_s`.
+(rule (lower (has_type (multi_lane 64 2)
+                       (imul (def_inst (swiden_high (and (value_type (multi_lane 32 4))
+                                                         x)))
+                             (def_inst (swiden_high (and (value_type (multi_lane 32 4))
+                                                         y))))))
+      (let ((x2 Reg (pshufd (put_in_reg_mem x)
+                            0xFA
+                            (OperandSize.Size32)))
+            (y2 Reg (pshufd (put_in_reg_mem y)
+                            0xFA
+                            (OperandSize.Size32))))
+        (value_reg (pmuldq x2 (RegMem.Reg y2)))))
+
+;; Special case for `i16x8.extmul_low_i8x16_s`.
+(rule (lower (has_type (multi_lane 16 8)
+                       (imul (def_inst (swiden_low (and (value_type (multi_lane 8 16))
+                                                        x)))
+                             (def_inst (swiden_low (and (value_type (multi_lane 8 16))
+                                                        y))))))
+      (let ((x2 Reg (pmovsxbw (put_in_reg_mem x)))
+            (y2 Reg (pmovsxbw (put_in_reg_mem y))))
+        (value_reg (pmullw x2 (RegMem.Reg y2)))))
+
+;; Special case for `i32x4.extmul_low_i16x8_s`.
+(rule (lower (has_type (multi_lane 32 4)
+                       (imul (def_inst (swiden_low (and (value_type (multi_lane 16 8))
+                                                        x)))
+                             (def_inst (swiden_low (and (value_type (multi_lane 16 8))
+                                                        y))))))
+      (let ((x2 Reg (put_in_reg x))
+            (y2 Reg (put_in_reg y))
+            (lo Reg (pmullw x2 (RegMem.Reg y2)))
+            (hi Reg (pmulhw x2 (RegMem.Reg y2))))
+        (value_reg (punpcklwd lo (RegMem.Reg hi)))))
+
+;; Special case for `i64x2.extmul_low_i32x4_s`.
+(rule (lower (has_type (multi_lane 64 2)
+                       (imul (def_inst (swiden_low (and (value_type (multi_lane 32 4))
+                                                        x)))
+                             (def_inst (swiden_low (and (value_type (multi_lane 32 4))
+                                                        y))))))
+      (let ((x2 Reg (pshufd (put_in_reg_mem x)
+                            0x50
+                            (OperandSize.Size32)))
+            (y2 Reg (pshufd (put_in_reg_mem y)
+                            0x50
+                            (OperandSize.Size32))))
+        (value_reg (pmuldq x2 (RegMem.Reg y2)))))
+
+;; Special case for `i16x8.extmul_high_i8x16_u`.
+(rule (lower (has_type (multi_lane 16 8)
+                       (imul (def_inst (uwiden_high (and (value_type (multi_lane 8 16))
+                                                         x)))
+                             (def_inst (uwiden_high (and (value_type (multi_lane 8 16))
+                                                         y))))))
+      (let ((x1 Reg (put_in_reg x))
+            (x2 Reg (palignr x1 (RegMem.Reg x1) 8 (OperandSize.Size32)))
+            (x3 Reg (pmovzxbw (RegMem.Reg x2)))
+            (y1 Reg (put_in_reg y))
+            (y2 Reg (palignr y1 (RegMem.Reg y1) 8 (OperandSize.Size32)))
+            (y3 Reg (pmovzxbw (RegMem.Reg y2))))
+        (value_reg (pmullw x3 (RegMem.Reg y3)))))
+
+;; Special case for `i32x4.extmul_high_i16x8_u`.
+(rule (lower (has_type (multi_lane 32 4)
+                       (imul (def_inst (uwiden_high (and (value_type (multi_lane 16 8))
+                                                         x)))
+                             (def_inst (uwiden_high (and (value_type (multi_lane 16 8))
+                                                         y))))))
+      (let ((x2 Reg (put_in_reg x))
+            (y2 Reg (put_in_reg y))
+            (lo Reg (pmullw x2 (RegMem.Reg y2)))
+            (hi Reg (pmulhuw x2 (RegMem.Reg y2))))
+        (value_reg (punpckhwd lo (RegMem.Reg hi)))))
+
+;; Special case for `i64x2.extmul_high_i32x4_u`.
+(rule (lower (has_type (multi_lane 64 2)
+                       (imul (def_inst (uwiden_high (and (value_type (multi_lane 32 4))
+                                                         x)))
+                             (def_inst (uwiden_high (and (value_type (multi_lane 32 4))
+                                                         y))))))
+      (let ((x2 Reg (pshufd (put_in_reg_mem x)
+                            0xFA
+                            (OperandSize.Size32)))
+            (y2 Reg (pshufd (put_in_reg_mem y)
+                            0xFA
+                            (OperandSize.Size32))))
+        (value_reg (pmuludq x2 (RegMem.Reg y2)))))
+
+;; Special case for `i16x8.extmul_low_i8x16_u`.
+(rule (lower (has_type (multi_lane 16 8)
+                       (imul (def_inst (uwiden_low (and (value_type (multi_lane 8 16))
+                                                        x)))
+                             (def_inst (uwiden_low (and (value_type (multi_lane 8 16))
+                                                        y))))))
+      (let ((x2 Reg (pmovzxbw (put_in_reg_mem x)))
+            (y2 Reg (pmovzxbw (put_in_reg_mem y))))
+        (value_reg (pmullw x2 (RegMem.Reg y2)))))
+
+;; Special case for `i32x4.extmul_low_i16x8_u`.
+(rule (lower (has_type (multi_lane 32 4)
+                       (imul (def_inst (uwiden_low (and (value_type (multi_lane 16 8))
+                                                        x)))
+                             (def_inst (uwiden_low (and (value_type (multi_lane 16 8))
+                                                        y))))))
+      (let ((x2 Reg (put_in_reg x))
+            (y2 Reg (put_in_reg y))
+            (lo Reg (pmullw x2 (RegMem.Reg y2)))
+            (hi Reg (pmulhuw x2 (RegMem.Reg y2))))
+        (value_reg (punpcklwd lo (RegMem.Reg hi)))))
+
+;; Special case for `i64x2.extmul_low_i32x4_u`.
+(rule (lower (has_type (multi_lane 64 2)
+                       (imul (def_inst (uwiden_low (and (value_type (multi_lane 32 4))
+                                                        x)))
+                             (def_inst (uwiden_low (and (value_type (multi_lane 32 4))
+                                                        y))))))
+      (let ((x2 Reg (pshufd (put_in_reg_mem x)
+                            0x50
+                            (OperandSize.Size32)))
+            (y2 Reg (pshufd (put_in_reg_mem y)
+                            0x50
+                            (OperandSize.Size32))))
+        (value_reg (pmuludq x2 (RegMem.Reg y2)))))
+
+;;;; Rules for `band_not` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Note the flipping of operands below. CLIF specifies
+;;
+;;   band_not(x, y) = and(x, not(y))
+;;
+;; while x86 does
+;;
+;;   pandn(x, y) = and(not(x), y)
+
+(rule (lower (has_type $F32X4 (band_not x y)))
+      (value_reg (andnps (put_in_reg y) (put_in_reg_mem x))))
+
+(rule (lower (has_type $F64X2 (band_not x y)))
+      (value_reg (andnpd (put_in_reg y) (put_in_reg_mem x))))
+
+(rule (lower (has_type (multi_lane _bits _lanes) (band_not x y)))
+      (value_reg (pandn (put_in_reg y) (put_in_reg_mem x))))
--- a/cranelift/codegen/src/isa/x64/lower.rs
+++ b/cranelift/codegen/src/isa/x64/lower.rs
@@ -1,5 +1,8 @@
 //! Lowering rules for X64.

+// ISLE integration glue.
+mod isle;
+
 use crate::data_value::DataValue;
 use crate::ir::{
    condcodes::{CondCode, FloatCC, IntCC},
@@ -1497,20 +1500,15 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        None
    };

-    match op {
-        Opcode::Iconst | Opcode::Bconst | Opcode::Null => {
-            let value = ctx
-                .get_constant(insn)
-                .expect("constant value for iconst et al");
-            let dst = get_output_reg(ctx, outputs[0]);
-            for inst in Inst::gen_constant(dst, value as u128, ty.unwrap(), |ty| {
-                ctx.alloc_tmp(ty).only_reg().unwrap()
-            }) {
-                ctx.emit(inst);
-            }
-        }
+    if let Ok(()) = isle::lower(ctx, isa_flags, &outputs, insn) {
+        return Ok(());
+    }

-        Opcode::Iadd
+    match op {
+        Opcode::Iconst
+        | Opcode::Bconst
+        | Opcode::Null
+        | Opcode::Iadd
        | Opcode::IaddIfcout
        | Opcode::SaddSat
        | Opcode::UaddSat
@@ -1520,755 +1518,14 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        | Opcode::AvgRound
        | Opcode::Band
        | Opcode::Bor
-        | Opcode::Bxor => {
-            let ty = ty.unwrap();
-            if ty.lane_count() > 1 {
-                let sse_op = match op {
-                    Opcode::Iadd => match ty {
-                        types::I8X16 => SseOpcode::Paddb,
-                        types::I16X8 => SseOpcode::Paddw,
-                        types::I32X4 => SseOpcode::Paddd,
-                        types::I64X2 => SseOpcode::Paddq,
-                        _ => panic!("Unsupported type for packed iadd instruction: {}", ty),
-                    },
-                    Opcode::SaddSat => match ty {
-                        types::I8X16 => SseOpcode::Paddsb,
-                        types::I16X8 => SseOpcode::Paddsw,
-                        _ => panic!("Unsupported type for packed sadd_sat instruction: {}", ty),
-                    },
-                    Opcode::UaddSat => match ty {
-                        types::I8X16 => SseOpcode::Paddusb,
-                        types::I16X8 => SseOpcode::Paddusw,
-                        _ => panic!("Unsupported type for packed uadd_sat instruction: {}", ty),
-                    },
-                    Opcode::Isub => match ty {
-                        types::I8X16 => SseOpcode::Psubb,
-                        types::I16X8 => SseOpcode::Psubw,
-                        types::I32X4 => SseOpcode::Psubd,
-                        types::I64X2 => SseOpcode::Psubq,
-                        _ => panic!("Unsupported type for packed isub instruction: {}", ty),
-                    },
-                    Opcode::SsubSat => match ty {
-                        types::I8X16 => SseOpcode::Psubsb,
-                        types::I16X8 => SseOpcode::Psubsw,
-                        _ => panic!("Unsupported type for packed ssub_sat instruction: {}", ty),
-                    },
-                    Opcode::UsubSat => match ty {
-                        types::I8X16 => SseOpcode::Psubusb,
-                        types::I16X8 => SseOpcode::Psubusw,
-                        _ => panic!("Unsupported type for packed usub_sat instruction: {}", ty),
-                    },
-                    Opcode::AvgRound => match ty {
-                        types::I8X16 => SseOpcode::Pavgb,
-                        types::I16X8 => SseOpcode::Pavgw,
-                        _ => panic!("Unsupported type for packed avg_round instruction: {}", ty),
-                    },
-                    Opcode::Band => match ty {
-                        types::F32X4 => SseOpcode::Andps,
-                        types::F64X2 => SseOpcode::Andpd,
-                        _ => SseOpcode::Pand,
-                    },
-                    Opcode::Bor => match ty {
-                        types::F32X4 => SseOpcode::Orps,
-                        types::F64X2 => SseOpcode::Orpd,
-                        _ => SseOpcode::Por,
-                    },
-                    Opcode::Bxor => match ty {
-                        types::F32X4 => SseOpcode::Xorps,
-                        types::F64X2 => SseOpcode::Xorpd,
-                        _ => SseOpcode::Pxor,
-                    },
-                    _ => panic!("Unsupported packed instruction: {}", op),
-                };
-                let lhs = put_input_in_reg(ctx, inputs[0]);
-                let rhs = input_to_reg_mem(ctx, inputs[1]);
-                let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
-
-                // Move the `lhs` to the same register as `dst`.
-                ctx.emit(Inst::gen_move(dst, lhs, ty));
-                ctx.emit(Inst::xmm_rm_r(sse_op, rhs, dst));
-            } else if ty == types::I128 || ty == types::B128 {
-                let alu_ops = match op {
-                    Opcode::Iadd => (AluRmiROpcode::Add, AluRmiROpcode::Adc),
-                    Opcode::Isub => (AluRmiROpcode::Sub, AluRmiROpcode::Sbb),
-                    Opcode::Band => (AluRmiROpcode::And, AluRmiROpcode::And),
-                    Opcode::Bor => (AluRmiROpcode::Or, AluRmiROpcode::Or),
-                    Opcode::Bxor => (AluRmiROpcode::Xor, AluRmiROpcode::Xor),
-                    _ => panic!("Unsupported opcode with 128-bit integers: {:?}", op),
-                };
-                let lhs = put_input_in_regs(ctx, inputs[0]);
-                let rhs = put_input_in_regs(ctx, inputs[1]);
-                let dst = get_output_reg(ctx, outputs[0]);
-                assert_eq!(lhs.len(), 2);
-                assert_eq!(rhs.len(), 2);
-                assert_eq!(dst.len(), 2);
-
-                // For add, sub, and, or, xor: just do ops on lower then upper
-                // half. Carry-flag propagation is implicit (add/adc, sub/sbb).
-                ctx.emit(Inst::gen_move(dst.regs()[0], lhs.regs()[0], types::I64));
-                ctx.emit(Inst::gen_move(dst.regs()[1], lhs.regs()[1], types::I64));
-                ctx.emit(Inst::alu_rmi_r(
-                    OperandSize::Size64,
-                    alu_ops.0,
-                    RegMemImm::reg(rhs.regs()[0]),
-                    dst.regs()[0],
-                ));
-                ctx.emit(Inst::alu_rmi_r(
-                    OperandSize::Size64,
-                    alu_ops.1,
-                    RegMemImm::reg(rhs.regs()[1]),
-                    dst.regs()[1],
-                ));
-            } else {
-                let size = if ty == types::I64 {
-                    OperandSize::Size64
-                } else {
-                    OperandSize::Size32
-                };
-                let alu_op = match op {
-                    Opcode::Iadd | Opcode::IaddIfcout => AluRmiROpcode::Add,
-                    Opcode::Isub => AluRmiROpcode::Sub,
-                    Opcode::Band => AluRmiROpcode::And,
-                    Opcode::Bor => AluRmiROpcode::Or,
-                    Opcode::Bxor => AluRmiROpcode::Xor,
-                    _ => unreachable!(),
-                };
-
-                let (lhs, rhs) = match op {
-                    Opcode::Iadd
-                    | Opcode::IaddIfcout
-                    | Opcode::Band
-                    | Opcode::Bor
-                    | Opcode::Bxor => {
-                        // For commutative operations, try to commute operands if one is an
-                        // immediate or direct memory reference. Do so by converting LHS to RMI; if
-                        // reg, then always convert RHS to RMI; else, use LHS as RMI and convert
-                        // RHS to reg.
-                        let lhs = input_to_reg_mem_imm(ctx, inputs[0]);
-                        if let RegMemImm::Reg { reg: lhs_reg } = lhs {
-                            let rhs = input_to_reg_mem_imm(ctx, inputs[1]);
-                            (lhs_reg, rhs)
-                        } else {
-                            let rhs_reg = put_input_in_reg(ctx, inputs[1]);
-                            (rhs_reg, lhs)
-                        }
-                    }
-                    Opcode::Isub => (
-                        put_input_in_reg(ctx, inputs[0]),
-                        input_to_reg_mem_imm(ctx, inputs[1]),
-                    ),
-                    _ => unreachable!(),
-                };
-
-                let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
-                ctx.emit(Inst::mov_r_r(OperandSize::Size64, lhs, dst));
-                ctx.emit(Inst::alu_rmi_r(size, alu_op, rhs, dst));
-            }
-        }
-
-        Opcode::Imul => {
-            let ty = ty.unwrap();
-
-            // Check for ext_mul_* instructions which are being shared here under imul. We must
-            // check first for operands that are opcodes since checking for types is not enough.
-            if let Some(_) = matches_input_any(
-                ctx,
-                inputs[0],
-                &[
-                    Opcode::SwidenHigh,
-                    Opcode::SwidenLow,
-                    Opcode::UwidenHigh,
-                    Opcode::UwidenLow,
-                ],
-            ) {
-                // Optimized ext_mul_* lowerings are based on optimized lowerings
-                // here: https://github.com/WebAssembly/simd/pull/376
-                if let Some(swiden0_high) = matches_input(ctx, inputs[0], Opcode::SwidenHigh) {
-                    if let Some(swiden1_high) = matches_input(ctx, inputs[1], Opcode::SwidenHigh) {
-                        let swiden_input = &[
-                            InsnInput {
-                                insn: swiden0_high,
-                                input: 0,
-                            },
-                            InsnInput {
-                                insn: swiden1_high,
-                                input: 0,
-                            },
-                        ];
-                        let input0_ty = ctx.input_ty(swiden0_high, 0);
-                        let input1_ty = ctx.input_ty(swiden1_high, 0);
-                        let output_ty = ctx.output_ty(insn, 0);
-                        let lhs = put_input_in_reg(ctx, swiden_input[0]);
-                        let rhs = put_input_in_reg(ctx, swiden_input[1]);
-                        let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
-
-                        match (input0_ty, input1_ty, output_ty) {
-                            (types::I8X16, types::I8X16, types::I16X8) => {
-                                // i16x8.extmul_high_i8x16_s
-                                ctx.emit(Inst::xmm_rm_r_imm(
-                                    SseOpcode::Palignr,
-                                    RegMem::reg(lhs),
-                                    Writable::from_reg(lhs),
-                                    8,
-                                    OperandSize::Size32,
-                                ));
-                                ctx.emit(Inst::xmm_mov(
-                                    SseOpcode::Pmovsxbw,
-                                    RegMem::reg(lhs),
-                                    Writable::from_reg(lhs),
-                                ));
-
-                                ctx.emit(Inst::gen_move(dst, rhs, output_ty));
-                                ctx.emit(Inst::xmm_rm_r_imm(
-                                    SseOpcode::Palignr,
-                                    RegMem::reg(rhs),
-                                    dst,
-                                    8,
-                                    OperandSize::Size32,
-                                ));
-                                ctx.emit(Inst::xmm_mov(
-                                    SseOpcode::Pmovsxbw,
-                                    RegMem::reg(dst.to_reg()),
-                                    dst,
-                                ));
-                                ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmullw, RegMem::reg(lhs), dst));
-                            }
-                            (types::I16X8, types::I16X8, types::I32X4) => {
-                                // i32x4.extmul_high_i16x8_s
-                                ctx.emit(Inst::gen_move(dst, lhs, input0_ty));
-                                let tmp_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
-                                ctx.emit(Inst::gen_move(tmp_reg, lhs, input0_ty));
-                                ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmullw, RegMem::reg(rhs), dst));
-                                ctx.emit(Inst::xmm_rm_r(
-                                    SseOpcode::Pmulhw,
-                                    RegMem::reg(rhs),
-                                    tmp_reg,
-                                ));
-                                ctx.emit(Inst::xmm_rm_r(
-                                    SseOpcode::Punpckhwd,
-                                    RegMem::from(tmp_reg),
-                                    dst,
-                                ));
-                            }
-                            (types::I32X4, types::I32X4, types::I64X2) => {
-                                // i64x2.extmul_high_i32x4_s
-                                let tmp_reg = ctx.alloc_tmp(types::I32X4).only_reg().unwrap();
-                                ctx.emit(Inst::xmm_rm_r_imm(
-                                    SseOpcode::Pshufd,
-                                    RegMem::reg(lhs),
-                                    tmp_reg,
-                                    0xFA,
-                                    OperandSize::Size32,
-                                ));
-                                ctx.emit(Inst::xmm_rm_r_imm(
-                                    SseOpcode::Pshufd,
-                                    RegMem::reg(rhs),
-                                    dst,
-                                    0xFA,
-                                    OperandSize::Size32,
-                                ));
-                                ctx.emit(Inst::xmm_rm_r(
-                                    SseOpcode::Pmuldq,
-                                    RegMem::reg(tmp_reg.to_reg()),
-                                    dst,
-                                ));
-                            }
-                            // Note swiden_high only allows types: I8X16, I16X8, and I32X4
-                            _ => panic!("Unsupported extmul_low_signed type"),
-                        }
-                    }
-                } else if let Some(swiden0_low) = matches_input(ctx, inputs[0], Opcode::SwidenLow) {
-                    if let Some(swiden1_low) = matches_input(ctx, inputs[1], Opcode::SwidenLow) {
-                        let swiden_input = &[
-                            InsnInput {
-                                insn: swiden0_low,
-                                input: 0,
-                            },
-                            InsnInput {
-                                insn: swiden1_low,
-                                input: 0,
-                            },
-                        ];
-                        let input0_ty = ctx.input_ty(swiden0_low, 0);
-                        let input1_ty = ctx.input_ty(swiden1_low, 0);
-                        let output_ty = ctx.output_ty(insn, 0);
-                        let lhs = put_input_in_reg(ctx, swiden_input[0]);
-                        let rhs = put_input_in_reg(ctx, swiden_input[1]);
-                        let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
-
-                        match (input0_ty, input1_ty, output_ty) {
-                            (types::I8X16, types::I8X16, types::I16X8) => {
-                                // i32x4.extmul_low_i8x16_s
-                                let tmp_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
-                                ctx.emit(Inst::xmm_mov(
-                                    SseOpcode::Pmovsxbw,
-                                    RegMem::reg(lhs),
-                                    tmp_reg,
-                                ));
-                                ctx.emit(Inst::xmm_mov(SseOpcode::Pmovsxbw, RegMem::reg(rhs), dst));
-                                ctx.emit(Inst::xmm_rm_r(
-                                    SseOpcode::Pmullw,
-                                    RegMem::reg(tmp_reg.to_reg()),
-                                    dst,
-                                ));
-                            }
-                            (types::I16X8, types::I16X8, types::I32X4) => {
-                                // i32x4.extmul_low_i16x8_s
-                                ctx.emit(Inst::gen_move(dst, lhs, input0_ty));
-                                let tmp_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
-                                ctx.emit(Inst::gen_move(tmp_reg, lhs, input0_ty));
-                                ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmullw, RegMem::reg(rhs), dst));
-                                ctx.emit(Inst::xmm_rm_r(
-                                    SseOpcode::Pmulhw,
-                                    RegMem::reg(rhs),
-                                    tmp_reg,
-                                ));
-                                ctx.emit(Inst::xmm_rm_r(
-                                    SseOpcode::Punpcklwd,
-                                    RegMem::from(tmp_reg),
-                                    dst,
-                                ));
-                            }
-                            (types::I32X4, types::I32X4, types::I64X2) => {
-                                // i64x2.extmul_low_i32x4_s
-                                let tmp_reg = ctx.alloc_tmp(types::I32X4).only_reg().unwrap();
-                                ctx.emit(Inst::xmm_rm_r_imm(
-                                    SseOpcode::Pshufd,
-                                    RegMem::reg(lhs),
-                                    tmp_reg,
-                                    0x50,
-                                    OperandSize::Size32,
-                                ));
-                                ctx.emit(Inst::xmm_rm_r_imm(
-                                    SseOpcode::Pshufd,
-                                    RegMem::reg(rhs),
-                                    dst,
-                                    0x50,
-                                    OperandSize::Size32,
-                                ));
-                                ctx.emit(Inst::xmm_rm_r(
-                                    SseOpcode::Pmuldq,
-                                    RegMem::reg(tmp_reg.to_reg()),
-                                    dst,
-                                ));
-                            }
-                            // Note swiden_low only allows types: I8X16, I16X8, and I32X4
-                            _ => panic!("Unsupported extmul_low_signed type"),
-                        }
-                    }
-                } else if let Some(uwiden0_high) = matches_input(ctx, inputs[0], Opcode::UwidenHigh)
-                {
-                    if let Some(uwiden1_high) = matches_input(ctx, inputs[1], Opcode::UwidenHigh) {
-                        let uwiden_input = &[
-                            InsnInput {
-                                insn: uwiden0_high,
-                                input: 0,
-                            },
-                            InsnInput {
-                                insn: uwiden1_high,
-                                input: 0,
-                            },
-                        ];
-                        let input0_ty = ctx.input_ty(uwiden0_high, 0);
-                        let input1_ty = ctx.input_ty(uwiden1_high, 0);
-                        let output_ty = ctx.output_ty(insn, 0);
-                        let lhs = put_input_in_reg(ctx, uwiden_input[0]);
-                        let rhs = put_input_in_reg(ctx, uwiden_input[1]);
-                        let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
-
-                        match (input0_ty, input1_ty, output_ty) {
-                            (types::I8X16, types::I8X16, types::I16X8) => {
-                                // i16x8.extmul_high_i8x16_u
-                                ctx.emit(Inst::xmm_rm_r_imm(
-                                    SseOpcode::Palignr,
-                                    RegMem::reg(lhs),
-                                    Writable::from_reg(lhs),
-                                    8,
-                                    OperandSize::Size32,
-                                ));
-                                ctx.emit(Inst::xmm_mov(
-                                    SseOpcode::Pmovzxbw,
-                                    RegMem::reg(lhs),
-                                    Writable::from_reg(lhs),
-                                ));
-                                ctx.emit(Inst::gen_move(dst, rhs, output_ty));
-                                ctx.emit(Inst::xmm_rm_r_imm(
-                                    SseOpcode::Palignr,
-                                    RegMem::reg(rhs),
-                                    dst,
-                                    8,
-                                    OperandSize::Size32,
-                                ));
-                                ctx.emit(Inst::xmm_mov(
-                                    SseOpcode::Pmovzxbw,
-                                    RegMem::reg(dst.to_reg()),
-                                    dst,
-                                ));
-                                ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmullw, RegMem::reg(lhs), dst));
-                            }
-                            (types::I16X8, types::I16X8, types::I32X4) => {
-                                // i32x4.extmul_high_i16x8_u
-                                ctx.emit(Inst::gen_move(dst, lhs, input0_ty));
-                                let tmp_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
-                                ctx.emit(Inst::gen_move(tmp_reg, lhs, input0_ty));
-                                ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmullw, RegMem::reg(rhs), dst));
-                                ctx.emit(Inst::xmm_rm_r(
-                                    SseOpcode::Pmulhuw,
-                                    RegMem::reg(rhs),
-                                    tmp_reg,
-                                ));
-                                ctx.emit(Inst::xmm_rm_r(
-                                    SseOpcode::Punpckhwd,
-                                    RegMem::from(tmp_reg),
-                                    dst,
-                                ));
-                            }
-                            (types::I32X4, types::I32X4, types::I64X2) => {
-                                // i64x2.extmul_high_i32x4_u
-                                let tmp_reg = ctx.alloc_tmp(types::I32X4).only_reg().unwrap();
-                                ctx.emit(Inst::xmm_rm_r_imm(
-                                    SseOpcode::Pshufd,
-                                    RegMem::reg(lhs),
-                                    tmp_reg,
-                                    0xFA,
-                                    OperandSize::Size32,
-                                ));
-                                ctx.emit(Inst::xmm_rm_r_imm(
-                                    SseOpcode::Pshufd,
-                                    RegMem::reg(rhs),
-                                    dst,
-                                    0xFA,
-                                    OperandSize::Size32,
-                                ));
-                                ctx.emit(Inst::xmm_rm_r(
-                                    SseOpcode::Pmuludq,
-                                    RegMem::reg(tmp_reg.to_reg()),
-                                    dst,
-                                ));
-                            }
-                            // Note uwiden_high only allows types: I8X16, I16X8, and I32X4
-                            _ => panic!("Unsupported extmul_high_unsigned type"),
-                        }
-                    }
-                } else if let Some(uwiden0_low) = matches_input(ctx, inputs[0], Opcode::UwidenLow) {
-                    if let Some(uwiden1_low) = matches_input(ctx, inputs[1], Opcode::UwidenLow) {
-                        let uwiden_input = &[
-                            InsnInput {
-                                insn: uwiden0_low,
-                                input: 0,
-                            },
-                            InsnInput {
-                                insn: uwiden1_low,
-                                input: 0,
-                            },
-                        ];
-
-                        let input0_ty = ctx.input_ty(uwiden0_low, 0);
-                        let input1_ty = ctx.input_ty(uwiden1_low, 0);
-                        let output_ty = ctx.output_ty(insn, 0);
-                        let lhs = put_input_in_reg(ctx, uwiden_input[0]);
-                        let rhs = put_input_in_reg(ctx, uwiden_input[1]);
-                        let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
-
-                        match (input0_ty, input1_ty, output_ty) {
-                            (types::I8X16, types::I8X16, types::I16X8) => {
-                                // i16x8.extmul_low_i8x16_u
-                                let tmp_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
-                                ctx.emit(Inst::xmm_mov(
-                                    SseOpcode::Pmovzxbw,
-                                    RegMem::reg(lhs),
-                                    tmp_reg,
-                                ));
-                                ctx.emit(Inst::xmm_mov(SseOpcode::Pmovzxbw, RegMem::reg(rhs), dst));
-                                ctx.emit(Inst::xmm_rm_r(
-                                    SseOpcode::Pmullw,
-                                    RegMem::reg(tmp_reg.to_reg()),
-                                    dst,
-                                ));
-                            }
-                            (types::I16X8, types::I16X8, types::I32X4) => {
-                                // i32x4.extmul_low_i16x8_u
-                                ctx.emit(Inst::gen_move(dst, lhs, input0_ty));
-                                let tmp_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
-                                ctx.emit(Inst::gen_move(tmp_reg, lhs, input0_ty));
-                                ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmullw, RegMem::reg(rhs), dst));
-                                ctx.emit(Inst::xmm_rm_r(
-                                    SseOpcode::Pmulhuw,
-                                    RegMem::reg(rhs),
-                                    tmp_reg,
-                                ));
-                                ctx.emit(Inst::xmm_rm_r(
-                                    SseOpcode::Punpcklwd,
-                                    RegMem::from(tmp_reg),
-                                    dst,
-                                ));
-                            }
-                            (types::I32X4, types::I32X4, types::I64X2) => {
-                                // i64x2.extmul_low_i32x4_u
-                                let tmp_reg = ctx.alloc_tmp(types::I32X4).only_reg().unwrap();
-                                ctx.emit(Inst::xmm_rm_r_imm(
-                                    SseOpcode::Pshufd,
-                                    RegMem::reg(lhs),
-                                    tmp_reg,
-                                    0x50,
-                                    OperandSize::Size32,
-                                ));
-                                ctx.emit(Inst::xmm_rm_r_imm(
-                                    SseOpcode::Pshufd,
-                                    RegMem::reg(rhs),
-                                    dst,
-                                    0x50,
-                                    OperandSize::Size32,
-                                ));
-                                ctx.emit(Inst::xmm_rm_r(
-                                    SseOpcode::Pmuludq,
-                                    RegMem::reg(tmp_reg.to_reg()),
-                                    dst,
-                                ));
-                            }
-                            // Note uwiden_low only allows types: I8X16, I16X8, and I32X4
-                            _ => panic!("Unsupported extmul_low_unsigned type"),
-                        }
-                    }
-                } else {
-                    panic!("Unsupported imul operation for type: {}", ty);
-                }
-            } else if ty == types::I64X2 {
-                // Eventually one of these should be `input_to_reg_mem` (TODO).
-                let lhs = put_input_in_reg(ctx, inputs[0]);
-                let rhs = put_input_in_reg(ctx, inputs[1]);
-                let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
-
-                if isa_flags.use_avx512vl_simd() && isa_flags.use_avx512dq_simd() {
-                    // With the right AVX512 features (VL + DQ) this operation
-                    // can lower to a single operation.
-                    ctx.emit(Inst::xmm_rm_r_evex(
-                        Avx512Opcode::Vpmullq,
-                        RegMem::reg(rhs),
-                        lhs,
-                        dst,
-                    ));
-                } else {
-                    // Otherwise, for I64X2 multiplication we describe a lane A as being
-                    // composed of a 32-bit upper half "Ah" and a 32-bit lower half
-                    // "Al". The 32-bit long hand multiplication can then be written
-                    // as:
-                    //    Ah Al
-                    // *  Bh Bl
-                    //    -----
-                    //    Al * Bl
-                    // + (Ah * Bl) << 32
-                    // + (Al * Bh) << 32
-                    //
-                    // So for each lane we will compute:
-                    //   A * B  = (Al * Bl) + ((Ah * Bl) + (Al * Bh)) << 32
-                    //
-                    // Note, the algorithm will use pmuldq which operates directly
-                    // on the lower 32-bit (Al or Bl) of a lane and writes the
-                    // result to the full 64-bits of the lane of the destination.
-                    // For this reason we don't need shifts to isolate the lower
-                    // 32-bits, however, we will need to use shifts to isolate the
-                    // high 32-bits when doing calculations, i.e., Ah == A >> 32.
-                    //
-                    // The full sequence then is as follows:
-                    //   A' = A
-                    //   A' = A' >> 32
-                    //   A' = Ah' * Bl
-                    //   B' = B
-                    //   B' = B' >> 32
-                    //   B' = Bh' * Al
-                    //   B' = B' + A'
-                    //   B' = B' << 32
-                    //   A' = A
-                    //   A' = Al' * Bl
-                    //   A' = A' + B'
-                    //   dst = A'
-
-                    // A' = A
-                    let rhs_1 = ctx.alloc_tmp(types::I64X2).only_reg().unwrap();
-                    ctx.emit(Inst::gen_move(rhs_1, rhs, ty));
-
-                    // A' = A' >> 32
-                    // A' = Ah' * Bl
-                    ctx.emit(Inst::xmm_rmi_reg(
-                        SseOpcode::Psrlq,
-                        RegMemImm::imm(32),
-                        rhs_1,
-                    ));
-                    ctx.emit(Inst::xmm_rm_r(
-                        SseOpcode::Pmuludq,
-                        RegMem::reg(lhs.clone()),
-                        rhs_1,
-                    ));
-
-                    // B' = B
-                    let lhs_1 = ctx.alloc_tmp(types::I64X2).only_reg().unwrap();
-                    ctx.emit(Inst::gen_move(lhs_1, lhs, ty));
-
-                    // B' = B' >> 32
-                    // B' = Bh' * Al
-                    ctx.emit(Inst::xmm_rmi_reg(
-                        SseOpcode::Psrlq,
-                        RegMemImm::imm(32),
-                        lhs_1,
-                    ));
-                    ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmuludq, RegMem::reg(rhs), lhs_1));
-
-                    // B' = B' + A'
-                    // B' = B' << 32
-                    ctx.emit(Inst::xmm_rm_r(
-                        SseOpcode::Paddq,
-                        RegMem::reg(rhs_1.to_reg()),
-                        lhs_1,
-                    ));
-                    ctx.emit(Inst::xmm_rmi_reg(
-                        SseOpcode::Psllq,
-                        RegMemImm::imm(32),
-                        lhs_1,
-                    ));
-
-                    // A' = A
-                    // A' = Al' * Bl
-                    // A' = A' + B'
-                    // dst = A'
-                    ctx.emit(Inst::gen_move(rhs_1, rhs, ty));
-                    ctx.emit(Inst::xmm_rm_r(
-                        SseOpcode::Pmuludq,
-                        RegMem::reg(lhs.clone()),
-                        rhs_1,
-                    ));
-                    ctx.emit(Inst::xmm_rm_r(
-                        SseOpcode::Paddq,
-                        RegMem::reg(lhs_1.to_reg()),
-                        rhs_1,
-                    ));
-                    ctx.emit(Inst::gen_move(dst, rhs_1.to_reg(), ty));
-                }
-            } else if ty.lane_count() > 1 {
-                // Emit single instruction lowerings for the remaining vector
-                // multiplications.
-                let sse_op = match ty {
-                    types::I16X8 => SseOpcode::Pmullw,
-                    types::I32X4 => SseOpcode::Pmulld,
-                    _ => panic!("Unsupported type for packed imul instruction: {}", ty),
-                };
-                let lhs = put_input_in_reg(ctx, inputs[0]);
-                let rhs = input_to_reg_mem(ctx, inputs[1]);
-                let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
-
-                // Move the `lhs` to the same register as `dst`.
-                ctx.emit(Inst::gen_move(dst, lhs, ty));
-                ctx.emit(Inst::xmm_rm_r(sse_op, rhs, dst));
-            } else if ty == types::I128 || ty == types::B128 {
-                // Handle 128-bit multiplications.
-                let lhs = put_input_in_regs(ctx, inputs[0]);
-                let rhs = put_input_in_regs(ctx, inputs[1]);
-                let dst = get_output_reg(ctx, outputs[0]);
-                assert_eq!(lhs.len(), 2);
-                assert_eq!(rhs.len(), 2);
-                assert_eq!(dst.len(), 2);
-
-                // mul:
-                //   dst_lo = lhs_lo * rhs_lo
-                //   dst_hi = umulhi(lhs_lo, rhs_lo) + lhs_lo * rhs_hi + lhs_hi * rhs_lo
-                //
-                // so we emit:
-                //   mov dst_lo, lhs_lo
-                //   mul dst_lo, rhs_lo
-                //   mov dst_hi, lhs_lo
-                //   mul dst_hi, rhs_hi
-                //   mov tmp, lhs_hi
-                //   mul tmp, rhs_lo
-                //   add dst_hi, tmp
-                //   mov rax, lhs_lo
-                //   umulhi rhs_lo  // implicit rax arg/dst
-                //   add dst_hi, rax
-                let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap();
-                ctx.emit(Inst::gen_move(dst.regs()[0], lhs.regs()[0], types::I64));
-                ctx.emit(Inst::alu_rmi_r(
-                    OperandSize::Size64,
-                    AluRmiROpcode::Mul,
-                    RegMemImm::reg(rhs.regs()[0]),
-                    dst.regs()[0],
-                ));
-                ctx.emit(Inst::gen_move(dst.regs()[1], lhs.regs()[0], types::I64));
-                ctx.emit(Inst::alu_rmi_r(
-                    OperandSize::Size64,
-                    AluRmiROpcode::Mul,
-                    RegMemImm::reg(rhs.regs()[1]),
-                    dst.regs()[1],
-                ));
-                ctx.emit(Inst::gen_move(tmp, lhs.regs()[1], types::I64));
-                ctx.emit(Inst::alu_rmi_r(
-                    OperandSize::Size64,
-                    AluRmiROpcode::Mul,
-                    RegMemImm::reg(rhs.regs()[0]),
-                    tmp,
-                ));
-                ctx.emit(Inst::alu_rmi_r(
-                    OperandSize::Size64,
-                    AluRmiROpcode::Add,
-                    RegMemImm::reg(tmp.to_reg()),
-                    dst.regs()[1],
-                ));
-                ctx.emit(Inst::gen_move(
-                    Writable::from_reg(regs::rax()),
-                    lhs.regs()[0],
-                    types::I64,
-                ));
-                ctx.emit(Inst::mul_hi(
-                    OperandSize::Size64,
-                    /* signed = */ false,
-                    RegMem::reg(rhs.regs()[0]),
-                ));
-                ctx.emit(Inst::alu_rmi_r(
-                    OperandSize::Size64,
-                    AluRmiROpcode::Add,
-                    RegMemImm::reg(regs::rdx()),
-                    dst.regs()[1],
-                ));
-            } else {
-                let size = if ty == types::I64 {
-                    OperandSize::Size64
-                } else {
-                    OperandSize::Size32
-                };
-                let alu_op = AluRmiROpcode::Mul;
-
-                // For commutative operations, try to commute operands if one is
-                // an immediate or direct memory reference. Do so by converting
-                // LHS to RMI; if reg, then always convert RHS to RMI; else, use
-                // LHS as RMI and convert RHS to reg.
-                let lhs = input_to_reg_mem_imm(ctx, inputs[0]);
-                let (lhs, rhs) = if let RegMemImm::Reg { reg: lhs_reg } = lhs {
-                    let rhs = input_to_reg_mem_imm(ctx, inputs[1]);
-                    (lhs_reg, rhs)
-                } else {
-                    let rhs_reg = put_input_in_reg(ctx, inputs[1]);
-                    (rhs_reg, lhs)
-                };
-
-                let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
-                ctx.emit(Inst::mov_r_r(OperandSize::Size64, lhs, dst));
-                ctx.emit(Inst::alu_rmi_r(size, alu_op, rhs, dst));
-            }
-        }
-
-        Opcode::BandNot => {
-            let ty = ty.unwrap();
-            debug_assert!(ty.is_vector() && ty.bytes() == 16);
-            let lhs = input_to_reg_mem(ctx, inputs[0]);
-            let rhs = put_input_in_reg(ctx, inputs[1]);
-            let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
-            let sse_op = match ty {
-                types::F32X4 => SseOpcode::Andnps,
-                types::F64X2 => SseOpcode::Andnpd,
-                _ => SseOpcode::Pandn,
-            };
-            // Note the flipping of operands: the `rhs` operand is used as the destination instead
-            // of the `lhs` as in the other bit operations above (e.g. `band`).
-            ctx.emit(Inst::gen_move(dst, rhs, ty));
-            ctx.emit(Inst::xmm_rm_r(sse_op, lhs, dst));
+        | Opcode::Bxor
+        | Opcode::Imul
+        | Opcode::BandNot => {
+            unreachable!(
+                "implemented in ISLE: inst = `{}`, type = `{:?}`",
+                ctx.dfg().display_inst(insn),
+                ty
+            );
        }

        Opcode::Iabs => {
@@ -5801,7 +5058,14 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(

            // Now the AtomicRmwSeq (pseudo-) instruction itself
            let op = inst_common::AtomicRmwOp::from(ctx.data(insn).atomic_rmw_op().unwrap());
-            ctx.emit(Inst::AtomicRmwSeq { ty: ty_access, op });
+            ctx.emit(Inst::AtomicRmwSeq {
+                ty: ty_access,
+                op,
+                address: regs::r9(),
+                operand: regs::r10(),
+                temp: Writable::from_reg(regs::r11()),
+                dst_old: Writable::from_reg(regs::rax()),
+            });

            // And finally, copy the preordained AtomicRmwSeq output reg to its destination.
            ctx.emit(Inst::gen_move(dst, regs::rax(), types::I64));
@@ -5827,8 +5091,10 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            ));
            ctx.emit(Inst::LockCmpxchg {
                ty: ty_access,
-                src: replacement,
-                dst: addr.into(),
+                mem: addr.into(),
+                replacement,
+                expected: regs::rax(),
+                dst_old: Writable::from_reg(regs::rax()),
            });
            // And finally, copy the old value at the location to its destination reg.
            ctx.emit(Inst::gen_move(dst, regs::rax(), types::I64));
--- a/cranelift/codegen/src/isa/x64/lower/isle.rs
+++ b/cranelift/codegen/src/isa/x64/lower/isle.rs
@@ -0,0 +1,414 @@
+//! ISLE integration glue code for x64 lowering.
+
+// Pull in the ISLE generated code.
+mod generated_code;
+
+// Types that the generated ISLE code uses via `use super::*`.
+use super::{
+    is_mergeable_load, lower_to_amode, AluRmiROpcode, Inst as MInst, OperandSize, Reg, RegMemImm,
+    Writable,
+};
+use crate::isa::x64::inst::args::SyntheticAmode;
+use crate::isa::x64::settings as x64_settings;
+use crate::{
+    ir::{immediates::*, types::*, Inst, InstructionData, Opcode, Value, ValueList},
+    isa::x64::inst::{
+        args::{Avx512Opcode, CmpOpcode, ExtMode, Imm8Reg, RegMem, ShiftKind, SseOpcode, CC},
+        x64_map_regs, RegMapper,
+    },
+    machinst::{get_output_reg, InsnInput, InsnOutput, LowerCtx},
+};
+use smallvec::SmallVec;
+use std::convert::TryFrom;
+
+type Unit = ();
+type ValueSlice<'a> = &'a [Value];
+type ValueArray2 = [Value; 2];
+type ValueArray3 = [Value; 3];
+type WritableReg = Writable<Reg>;
+type ValueRegs = crate::machinst::ValueRegs<Reg>;
+
+pub struct SinkableLoad {
+    inst: Inst,
+    addr_input: InsnInput,
+    offset: i32,
+}
+
+#[derive(Default)]
+struct RegRenamer {
+    // Map of `(old, new)` register names. Use a `SmallVec` because we typically
+    // only have one or two renamings.
+    renames: SmallVec<[(Reg, Reg); 2]>,
+}
+
+impl RegRenamer {
+    fn add_rename(&mut self, old: Reg, new: Reg) {
+        self.renames.push((old, new));
+    }
+
+    fn get_rename(&self, reg: Reg) -> Option<Reg> {
+        self.renames
+            .iter()
+            .find(|(old, _)| reg == *old)
+            .map(|(_, new)| *new)
+    }
+}
+
+impl RegMapper for RegRenamer {
+    fn get_use(&self, reg: Reg) -> Option<Reg> {
+        self.get_rename(reg)
+    }
+
+    fn get_def(&self, reg: Reg) -> Option<Reg> {
+        self.get_rename(reg)
+    }
+
+    fn get_mod(&self, reg: Reg) -> Option<Reg> {
+        self.get_rename(reg)
+    }
+}
+
+/// The main entry point for lowering with ISLE.
+pub(crate) fn lower<C>(
+    lower_ctx: &mut C,
+    isa_flags: &x64_settings::Flags,
+    outputs: &[InsnOutput],
+    inst: Inst,
+) -> Result<(), ()>
+where
+    C: LowerCtx<I = MInst>,
+{
+    // TODO: reuse the ISLE context across lowerings so we can reuse its
+    // internal heap allocations.
+    let mut isle_ctx = IsleContext::new(lower_ctx, isa_flags);
+
+    let temp_regs = generated_code::constructor_lower(&mut isle_ctx, inst).ok_or(())?;
+    let mut temp_regs = temp_regs.regs().iter();
+
+    // The ISLE generated code emits its own registers to define the
+    // instruction's lowered values in. We rename those registers to the
+    // registers they were assigned when their value was used as an operand in
+    // earlier lowerings.
+    let mut renamer = RegRenamer::default();
+    for output in outputs {
+        let dsts = get_output_reg(isle_ctx.lower_ctx, *output);
+        for (temp, dst) in temp_regs.by_ref().zip(dsts.regs()) {
+            renamer.add_rename(*temp, dst.to_reg());
+        }
+    }
+
+    for mut inst in isle_ctx.into_emitted_insts() {
+        x64_map_regs(&mut inst, &renamer);
+        lower_ctx.emit(inst);
+    }
+
+    Ok(())
+}
+
+pub struct IsleContext<'a, C> {
+    lower_ctx: &'a mut C,
+    isa_flags: &'a x64_settings::Flags,
+    emitted_insts: SmallVec<[MInst; 6]>,
+}
+
+impl<'a, C> IsleContext<'a, C> {
+    pub fn new(lower_ctx: &'a mut C, isa_flags: &'a x64_settings::Flags) -> Self {
+        IsleContext {
+            lower_ctx,
+            isa_flags,
+            emitted_insts: SmallVec::new(),
+        }
+    }
+
+    pub fn into_emitted_insts(self) -> SmallVec<[MInst; 6]> {
+        self.emitted_insts
+    }
+}
+
+impl<'a, C> generated_code::Context for IsleContext<'a, C>
+where
+    C: LowerCtx<I = MInst>,
+{
+    #[inline]
+    fn unpack_value_array_2(&mut self, arr: &ValueArray2) -> (Value, Value) {
+        let [a, b] = *arr;
+        (a, b)
+    }
+
+    #[inline]
+    fn pack_value_array_2(&mut self, a: Value, b: Value) -> ValueArray2 {
+        [a, b]
+    }
+
+    #[inline]
+    fn unpack_value_array_3(&mut self, arr: &ValueArray3) -> (Value, Value, Value) {
+        let [a, b, c] = *arr;
+        (a, b, c)
+    }
+
+    #[inline]
+    fn pack_value_array_3(&mut self, a: Value, b: Value, c: Value) -> ValueArray3 {
+        [a, b, c]
+    }
+
+    #[inline]
+    fn value_reg(&mut self, reg: Reg) -> ValueRegs {
+        ValueRegs::one(reg)
+    }
+
+    #[inline]
+    fn value_regs(&mut self, r1: Reg, r2: Reg) -> ValueRegs {
+        ValueRegs::two(r1, r2)
+    }
+
+    #[inline]
+    fn temp_writable_reg(&mut self, ty: Type) -> WritableReg {
+        let value_regs = self.lower_ctx.alloc_tmp(ty);
+        value_regs.only_reg().unwrap()
+    }
+
+    #[inline]
+    fn invalid_reg(&mut self) -> Reg {
+        Reg::invalid()
+    }
+
+    #[inline]
+    fn put_in_reg(&mut self, val: Value) -> Reg {
+        self.lower_ctx.put_value_in_regs(val).only_reg().unwrap()
+    }
+
+    #[inline]
+    fn put_in_regs(&mut self, val: Value) -> ValueRegs {
+        self.lower_ctx.put_value_in_regs(val)
+    }
+
+    #[inline]
+    fn value_regs_get(&mut self, regs: ValueRegs, i: usize) -> Reg {
+        regs.regs()[i]
+    }
+
+    #[inline]
+    fn u8_as_u64(&mut self, x: u8) -> u64 {
+        x.into()
+    }
+
+    #[inline]
+    fn u16_as_u64(&mut self, x: u16) -> u64 {
+        x.into()
+    }
+
+    #[inline]
+    fn u32_as_u64(&mut self, x: u32) -> u64 {
+        x.into()
+    }
+
+    #[inline]
+    fn ty_bits(&mut self, ty: Type) -> u16 {
+        ty.bits()
+    }
+
+    #[inline]
+    fn fits_in_64(&mut self, ty: Type) -> Option<Type> {
+        if ty.bits() <= 64 {
+            Some(ty)
+        } else {
+            None
+        }
+    }
+
+    #[inline]
+    fn value_list_slice(&mut self, list: ValueList) -> ValueSlice {
+        list.as_slice(&self.lower_ctx.dfg().value_lists)
+    }
+
+    #[inline]
+    fn unwrap_head_value_list_1(&mut self, list: ValueList) -> (Value, ValueSlice) {
+        match self.value_list_slice(list) {
+            [head, tail @ ..] => (*head, tail),
+            _ => out_of_line_panic("`unwrap_head_value_list_1` on empty `ValueList`"),
+        }
+    }
+
+    #[inline]
+    fn unwrap_head_value_list_2(&mut self, list: ValueList) -> (Value, Value, ValueSlice) {
+        match self.value_list_slice(list) {
+            [head1, head2, tail @ ..] => (*head1, *head2, tail),
+            _ => out_of_line_panic(
+                "`unwrap_head_value_list_2` on list without at least two elements",
+            ),
+        }
+    }
+
+    #[inline]
+    fn writable_reg_to_reg(&mut self, r: WritableReg) -> Reg {
+        r.to_reg()
+    }
+
+    #[inline]
+    fn u64_from_imm64(&mut self, imm: Imm64) -> u64 {
+        imm.bits() as u64
+    }
+
+    #[inline]
+    fn inst_results(&mut self, inst: Inst) -> ValueSlice {
+        self.lower_ctx.dfg().inst_results(inst)
+    }
+
+    #[inline]
+    fn first_result(&mut self, inst: Inst) -> Option<Value> {
+        self.lower_ctx.dfg().inst_results(inst).first().copied()
+    }
+
+    #[inline]
+    fn inst_data(&mut self, inst: Inst) -> InstructionData {
+        self.lower_ctx.dfg()[inst].clone()
+    }
+
+    #[inline]
+    fn value_type(&mut self, val: Value) -> Type {
+        self.lower_ctx.dfg().value_type(val)
+    }
+
+    #[inline]
+    fn multi_lane(&mut self, ty: Type) -> Option<(u8, u16)> {
+        if ty.lane_count() > 1 {
+            Some((ty.lane_bits(), ty.lane_count()))
+        } else {
+            None
+        }
+    }
+
+    #[inline]
+    fn def_inst(&mut self, val: Value) -> Option<Inst> {
+        self.lower_ctx.dfg().value_def(val).inst()
+    }
+
+    #[inline]
+    fn operand_size_of_type(&mut self, ty: Type) -> OperandSize {
+        if ty.bits() == 64 {
+            OperandSize::Size64
+        } else {
+            OperandSize::Size32
+        }
+    }
+
+    fn put_in_reg_mem(&mut self, val: Value) -> RegMem {
+        let inputs = self.lower_ctx.get_value_as_source_or_const(val);
+
+        if let Some(c) = inputs.constant {
+            // Generate constants fresh at each use to minimize long-range
+            // register pressure.
+            let ty = self.value_type(val);
+            return RegMem::reg(generated_code::constructor_imm(self, ty, c).unwrap());
+        }
+
+        if let Some((src_insn, 0)) = inputs.inst {
+            if let Some((addr_input, offset)) = is_mergeable_load(self.lower_ctx, src_insn) {
+                self.lower_ctx.sink_inst(src_insn);
+                let amode = lower_to_amode(self.lower_ctx, addr_input, offset);
+                return RegMem::mem(amode);
+            }
+        }
+
+        RegMem::reg(self.put_in_reg(val))
+    }
+
+    #[inline]
+    fn avx512vl_enabled(&mut self, _: Type) -> Option<()> {
+        if self.isa_flags.use_avx512vl_simd() {
+            Some(())
+        } else {
+            None
+        }
+    }
+
+    #[inline]
+    fn avx512dq_enabled(&mut self, _: Type) -> Option<()> {
+        if self.isa_flags.use_avx512dq_simd() {
+            Some(())
+        } else {
+            None
+        }
+    }
+
+    #[inline]
+    fn imm8_from_value(&mut self, val: Value) -> Option<Imm8Reg> {
+        let inst = self.lower_ctx.dfg().value_def(val).inst()?;
+        let constant = self.lower_ctx.get_constant(inst)?;
+        let imm = u8::try_from(constant).ok()?;
+        Some(Imm8Reg::Imm8 { imm })
+    }
+
+    #[inline]
+    fn simm32_from_value(&mut self, val: Value) -> Option<RegMemImm> {
+        let inst = self.lower_ctx.dfg().value_def(val).inst()?;
+        let constant: u64 = self.lower_ctx.get_constant(inst)?;
+        let constant = constant as i64;
+        to_simm32(constant)
+    }
+
+    #[inline]
+    fn simm32_from_imm64(&mut self, imm: Imm64) -> Option<RegMemImm> {
+        to_simm32(imm.bits())
+    }
+
+    fn sinkable_load(&mut self, val: Value) -> Option<SinkableLoad> {
+        let input = self.lower_ctx.get_value_as_source_or_const(val);
+        if let Some((inst, 0)) = input.inst {
+            if let Some((addr_input, offset)) = is_mergeable_load(self.lower_ctx, inst) {
+                return Some(SinkableLoad {
+                    inst,
+                    addr_input,
+                    offset,
+                });
+            }
+        }
+        None
+    }
+
+    fn sink_load(&mut self, load: &SinkableLoad) -> RegMemImm {
+        self.lower_ctx.sink_inst(load.inst);
+        let addr = lower_to_amode(self.lower_ctx, load.addr_input, load.offset);
+        RegMemImm::Mem {
+            addr: SyntheticAmode::Real(addr),
+        }
+    }
+
+    #[inline]
+    fn ext_mode(&mut self, from_bits: u16, to_bits: u16) -> ExtMode {
+        ExtMode::new(from_bits, to_bits).unwrap()
+    }
+
+    fn emit(&mut self, inst: &MInst) -> Unit {
+        for inst in inst.clone().mov_mitosis() {
+            self.emitted_insts.push(inst);
+        }
+    }
+
+    #[inline]
+    fn nonzero_u64_fits_in_u32(&mut self, x: u64) -> Option<u64> {
+        if x != 0 && x < u64::from(u32::MAX) {
+            Some(x)
+        } else {
+            None
+        }
+    }
+}
+
+#[inline]
+fn to_simm32(constant: i64) -> Option<RegMemImm> {
+    if constant == ((constant << 32) >> 32) {
+        Some(RegMemImm::Imm {
+            simm32: constant as u32,
+        })
+    } else {
+        None
+    }
+}
+
+#[inline(never)]
+#[cold]
+#[track_caller]
+fn out_of_line_panic(msg: &str) -> ! {
+    panic!("{}", msg);
+}
--- a/cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs
+++ b/cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs
--- a/cranelift/codegen/src/machinst/lower.rs
+++ b/cranelift/codegen/src/machinst/lower.rs
@@ -11,13 +11,14 @@ use crate::fx::{FxHashMap, FxHashSet};
 use crate::inst_predicates::{has_lowering_side_effect, is_constant_64bit};
 use crate::ir::instructions::BranchInfo;
 use crate::ir::{
-    ArgumentPurpose, Block, Constant, ConstantData, ExternalName, Function, GlobalValueData, Inst,
-    InstructionData, MemFlags, Opcode, Signature, SourceLoc, Type, Value, ValueDef,
-    ValueLabelAssignments, ValueLabelStart,
+    ArgumentPurpose, Block, Constant, ConstantData, DataFlowGraph, ExternalName, Function,
+    GlobalValueData, Inst, InstructionData, MemFlags, Opcode, Signature, SourceLoc, Type, Value,
+    ValueDef, ValueLabelAssignments, ValueLabelStart,
 };
 use crate::machinst::{
-    writable_value_regs, ABICallee, BlockIndex, BlockLoweringOrder, LoweredBlock, MachLabel, VCode,
-    VCodeBuilder, VCodeConstant, VCodeConstantData, VCodeConstants, VCodeInst, ValueRegs,
+    non_writable_value_regs, writable_value_regs, ABICallee, BlockIndex, BlockLoweringOrder,
+    LoweredBlock, MachLabel, VCode, VCodeBuilder, VCodeConstant, VCodeConstantData, VCodeConstants,
+    VCodeInst, ValueRegs,
 };
 use crate::CodegenResult;
 use alloc::boxed::Box;
@@ -61,6 +62,8 @@ pub trait LowerCtx {
    /// The instruction type for which this lowering framework is instantiated.
    type I: VCodeInst;

+    fn dfg(&self) -> &DataFlowGraph;
+
    // Function-level queries:

    /// Get the `ABICallee`.
@@ -124,8 +127,12 @@ pub trait LowerCtx {
    /// instruction's result(s) must have *no* uses remaining, because it will
    /// not be codegen'd (it has been integrated into the current instruction).
    fn get_input_as_source_or_const(&self, ir_inst: Inst, idx: usize) -> NonRegInput;
+    /// Like `get_input_as_source_or_const` but with a `Value`.
+    fn get_value_as_source_or_const(&self, value: Value) -> NonRegInput;
    /// Put the `idx`th input into register(s) and return the assigned register.
    fn put_input_in_regs(&mut self, ir_inst: Inst, idx: usize) -> ValueRegs<Reg>;
+    /// Put the given value into register(s) and return the assigned register.
+    fn put_value_in_regs(&mut self, value: Value) -> ValueRegs<Reg>;
    /// Get the `idx`th output register(s) of the given IR instruction. When
    /// `backend.lower_inst_to_regs(ctx, inst)` is called, it is expected that
    /// the backend will write results to these output register(s).  This
@@ -1002,101 +1009,15 @@ impl<'func, I: VCodeInst> Lower<'func, I> {

        Ok((vcode, stack_map_info))
    }
-
-    fn put_value_in_regs(&mut self, val: Value) -> ValueRegs<Reg> {
-        log::trace!("put_value_in_reg: val {}", val);
-        let mut regs = self.value_regs[val];
-        log::trace!(" -> regs {:?}", regs);
-        assert!(regs.is_valid());
-
-        self.value_lowered_uses[val] += 1;
-
-        // Pinned-reg hack: if backend specifies a fixed pinned register, use it
-        // directly when we encounter a GetPinnedReg op, rather than lowering
-        // the actual op, and do not return the source inst to the caller; the
-        // value comes "out of the ether" and we will not force generation of
-        // the superfluous move.
-        if let ValueDef::Result(i, 0) = self.f.dfg.value_def(val) {
-            if self.f.dfg[i].opcode() == Opcode::GetPinnedReg {
-                if let Some(pr) = self.pinned_reg {
-                    regs = ValueRegs::one(pr);
-                }
-            }
-        }
-
-        regs
-    }
-
-    /// Get the actual inputs for a value. This is the implementation for
-    /// `get_input()` but starting from the SSA value, which is not exposed to
-    /// the backend.
-    fn get_value_as_source_or_const(&self, val: Value) -> NonRegInput {
-        log::trace!(
-            "get_input_for_val: val {} at cur_inst {:?} cur_scan_entry_color {:?}",
-            val,
-            self.cur_inst,
-            self.cur_scan_entry_color,
-        );
-        let inst = match self.f.dfg.value_def(val) {
-            // OK to merge source instruction if (i) we have a source
-            // instruction, and:
-            // - It has no side-effects, OR
-            // - It has a side-effect, has one output value, that one output has
-            //   only one use (this one), and the instruction's color is *one less
-            //   than* the current scan color.
-            //
-            //   This latter set of conditions is testing whether a
-            //   side-effecting instruction can sink to the current scan
-            //   location; this is possible if the in-color of this inst is
-            //   equal to the out-color of the producing inst, so no other
-            //   side-effecting ops occur between them (which will only be true
-            //   if they are in the same BB, because color increments at each BB
-            //   start).
-            //
-            //   If it is actually sunk, then in `merge_inst()`, we update the
-            //   scan color so that as we scan over the range past which the
-            //   instruction was sunk, we allow other instructions (that came
-            //   prior to the sunk instruction) to sink.
-            ValueDef::Result(src_inst, result_idx) => {
-                let src_side_effect = has_lowering_side_effect(self.f, src_inst);
-                log::trace!(" -> src inst {}", src_inst);
-                log::trace!(" -> has lowering side effect: {}", src_side_effect);
-                if !src_side_effect {
-                    // Pure instruction: always possible to sink.
-                    Some((src_inst, result_idx))
-                } else {
-                    // Side-effect: test whether this is the only use of the
-                    // only result of the instruction, and whether colors allow
-                    // the code-motion.
-                    if self.cur_scan_entry_color.is_some()
-                        && self.value_uses[val] == 1
-                        && self.value_lowered_uses[val] == 0
-                        && self.num_outputs(src_inst) == 1
-                        && self
-                            .side_effect_inst_entry_colors
-                            .get(&src_inst)
-                            .unwrap()
-                            .get()
-                            + 1
-                            == self.cur_scan_entry_color.unwrap().get()
-                    {
-                        Some((src_inst, 0))
-                    } else {
-                        None
-                    }
-                }
-            }
-            _ => None,
-        };
-        let constant = inst.and_then(|(inst, _)| self.get_constant(inst));
-
-        NonRegInput { inst, constant }
-    }
 }

 impl<'func, I: VCodeInst> LowerCtx for Lower<'func, I> {
    type I = I;

+    fn dfg(&self) -> &DataFlowGraph {
+        &self.f.dfg
+    }
+
    fn abi(&mut self) -> &mut dyn ABICallee<I = I> {
        self.vcode.abi()
    }
@@ -1207,12 +1128,124 @@ impl<'func, I: VCodeInst> LowerCtx for Lower<'func, I> {
        self.get_value_as_source_or_const(val)
    }

+    fn get_value_as_source_or_const(&self, val: Value) -> NonRegInput {
+        log::trace!(
+            "get_input_for_val: val {} at cur_inst {:?} cur_scan_entry_color {:?}",
+            val,
+            self.cur_inst,
+            self.cur_scan_entry_color,
+        );
+        let inst = match self.f.dfg.value_def(val) {
+            // OK to merge source instruction if (i) we have a source
+            // instruction, and:
+            // - It has no side-effects, OR
+            // - It has a side-effect, has one output value, that one output has
+            //   only one use (this one), and the instruction's color is *one less
+            //   than* the current scan color.
+            //
+            //   This latter set of conditions is testing whether a
+            //   side-effecting instruction can sink to the current scan
+            //   location; this is possible if the in-color of this inst is
+            //   equal to the out-color of the producing inst, so no other
+            //   side-effecting ops occur between them (which will only be true
+            //   if they are in the same BB, because color increments at each BB
+            //   start).
+            //
+            //   If it is actually sunk, then in `merge_inst()`, we update the
+            //   scan color so that as we scan over the range past which the
+            //   instruction was sunk, we allow other instructions (that came
+            //   prior to the sunk instruction) to sink.
+            ValueDef::Result(src_inst, result_idx) => {
+                let src_side_effect = has_lowering_side_effect(self.f, src_inst);
+                log::trace!(" -> src inst {}", src_inst);
+                log::trace!(" -> has lowering side effect: {}", src_side_effect);
+                if !src_side_effect {
+                    // Pure instruction: always possible to sink.
+                    Some((src_inst, result_idx))
+                } else {
+                    // Side-effect: test whether this is the only use of the
+                    // only result of the instruction, and whether colors allow
+                    // the code-motion.
+                    if self.cur_scan_entry_color.is_some()
+                        && self.value_uses[val] == 1
+                        && self.value_lowered_uses[val] == 0
+                        && self.num_outputs(src_inst) == 1
+                        && self
+                            .side_effect_inst_entry_colors
+                            .get(&src_inst)
+                            .unwrap()
+                            .get()
+                            + 1
+                            == self.cur_scan_entry_color.unwrap().get()
+                    {
+                        Some((src_inst, 0))
+                    } else {
+                        None
+                    }
+                }
+            }
+            _ => None,
+        };
+        let constant = inst.and_then(|(inst, _)| self.get_constant(inst));
+
+        NonRegInput { inst, constant }
+    }
+
    fn put_input_in_regs(&mut self, ir_inst: Inst, idx: usize) -> ValueRegs<Reg> {
        let val = self.f.dfg.inst_args(ir_inst)[idx];
-        let val = self.f.dfg.resolve_aliases(val);
        self.put_value_in_regs(val)
    }

+    fn put_value_in_regs(&mut self, val: Value) -> ValueRegs<Reg> {
+        let val = self.f.dfg.resolve_aliases(val);
+        log::trace!("put_value_in_regs: val {}", val);
+
+        // If the value is a constant, then (re)materialize it at each use. This
+        // lowers register pressure.
+        if let Some(c) = self
+            .f
+            .dfg
+            .value_def(val)
+            .inst()
+            .and_then(|inst| self.get_constant(inst))
+        {
+            let ty = self.f.dfg.value_type(val);
+
+            let regs = self.alloc_tmp(ty);
+            log::trace!(" -> regs {:?}", regs);
+            assert!(regs.is_valid());
+
+            let insts = I::gen_constant(regs, c.into(), ty, |ty| {
+                self.alloc_tmp(ty).only_reg().unwrap()
+            });
+            for inst in insts {
+                self.emit(inst);
+            }
+            return non_writable_value_regs(regs);
+        }
+
+        let mut regs = self.value_regs[val];
+        log::trace!(" -> regs {:?}", regs);
+        assert!(regs.is_valid());
+
+        self.value_lowered_uses[val] += 1;
+
+        // Pinned-reg hack: if backend specifies a fixed pinned register, use it
+        // directly when we encounter a GetPinnedReg op, rather than lowering
+        // the actual op, and do not return the source inst to the caller; the
+        // value comes "out of the ether" and we will not force generation of
+        // the superfluous move.
+        if let ValueDef::Result(i, 0) = self.f.dfg.value_def(val) {
+            if self.f.dfg[i].opcode() == Opcode::GetPinnedReg {
+                if let Some(pr) = self.pinned_reg {
+                    regs = ValueRegs::one(pr);
+                }
+            }
+        }
+
+        regs
+    }
+
    fn get_output(&self, ir_inst: Inst, idx: usize) -> ValueRegs<Writable<Reg>> {
        let val = self.f.dfg.inst_results(ir_inst)[idx];
        writable_value_regs(self.value_regs[val])
--- a/cranelift/codegen/src/prelude.isle
+++ b/cranelift/codegen/src/prelude.isle
@@ -0,0 +1,202 @@
+;; This is a prelude of standard definitions for ISLE, the instruction-selector
+;; DSL, as we use it bound to our interfaces.
+
+;;;; Primitive and External Types ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; `()`
+(type Unit (primitive Unit))
+
+;; `bool` is declared in `clif.isle`.
+(extern const $true bool)
+(extern const $false bool)
+
+(type u8 (primitive u8))
+(type u16 (primitive u16))
+(type u32 (primitive u32))
+(type u64 (primitive u64))
+(type u128 (primitive u128))
+(type usize (primitive usize))
+
+(type i8 (primitive i8))
+(type i16 (primitive i16))
+(type i32 (primitive i32))
+(type i64 (primitive i64))
+(type i128 (primitive i128))
+(type isize (primitive isize))
+
+;; `cranelift-entity`-based identifiers.
+(type Inst (primitive Inst))
+(type Type (primitive Type))
+(type Value (primitive Value))
+
+;; ISLE representation of `&[Value]`.
+(type ValueSlice (primitive ValueSlice))
+
+(type ValueList (primitive ValueList))
+(type ValueRegs (primitive ValueRegs))
+
+;;;; Registers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(type Reg (primitive Reg))
+(type WritableReg (primitive WritableReg))
+
+;; Construct a `ValueRegs` of one register.
+(decl value_reg (Reg) ValueRegs)
+(extern constructor value_reg value_reg)
+
+;; Construct a `ValueRegs` of two registers.
+(decl value_regs (Reg Reg) ValueRegs)
+(extern constructor value_regs value_regs)
+
+;; Get a temporary register for writing.
+(decl temp_writable_reg (Type) WritableReg)
+(extern constructor temp_writable_reg temp_writable_reg)
+
+;; Get a temporary register for reading.
+(decl temp_reg (Type) Reg)
+(rule (temp_reg ty)
+      (writable_reg_to_reg (temp_writable_reg ty)))
+
+;; Get the invalid register.
+(decl invalid_reg () Reg)
+(extern constructor invalid_reg invalid_reg)
+
+;; Put the given value into a register.
+;;
+;; Asserts that the value fits into a single register, and doesn't require
+;; multiple registers for its representation (like `i128` on x64 for example).
+;;
+;; As a side effect, this marks the value as used.
+(decl put_in_reg (Value) Reg)
+(extern constructor put_in_reg put_in_reg)
+
+;; Put the given value into one or more registers.
+;;
+;; As a side effect, this marks the value as used.
+(decl put_in_regs (Value) ValueRegs)
+(extern constructor put_in_regs put_in_regs)
+
+;; Get the `n`th register inside a `ValueRegs`.
+(decl value_regs_get (ValueRegs usize) Reg)
+(extern constructor value_regs_get value_regs_get)
+
+;; Put the value into one or more registers and return the first register.
+;;
+;; Unlike `put_in_reg`, this does not assert that the value fits in a single
+;; register. This is useful for things like a `i128` shift amount, where we mask
+;; the shift amount to the bit width of the value being shifted, and so the high
+;; half of the `i128` won't ever be used.
+;;
+;; As a side efect, this marks that value as used.
+(decl lo_reg (Value) Reg)
+(rule (lo_reg val)
+      (let ((regs ValueRegs (put_in_regs val)))
+        (value_regs_get regs 0)))
+
+;;;; Primitive Type Conversions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(decl u8_as_u64 (u8) u64)
+(extern constructor u8_as_u64 u8_as_u64)
+
+(decl u16_as_u64 (u16) u64)
+(extern constructor u16_as_u64 u16_as_u64)
+
+(decl u32_as_u64 (u32) u64)
+(extern constructor u32_as_u64 u32_as_u64)
+
+;;;; `cranelift_codegen::ir::Type` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(extern const $B1 Type)
+(extern const $B8 Type)
+(extern const $B16 Type)
+(extern const $B32 Type)
+(extern const $B64 Type)
+(extern const $B128 Type)
+
+(extern const $I8 Type)
+(extern const $I16 Type)
+(extern const $I32 Type)
+(extern const $I64 Type)
+(extern const $I128 Type)
+
+(extern const $B8X16 Type)
+(extern const $B16X8 Type)
+(extern const $B32X4 Type)
+(extern const $B64X2 Type)
+
+(extern const $I8X16 Type)
+(extern const $I16X8 Type)
+(extern const $I32X4 Type)
+(extern const $I64X2 Type)
+
+(extern const $F32X4 Type)
+(extern const $F64X2 Type)
+
+;; Get the bit width of a given type.
+(decl ty_bits (Type) u16)
+(extern constructor ty_bits ty_bits)
+
+;;;; Helper Clif Extractors ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; An extractor that only matches types that can fit in 64 bits.
+(decl fits_in_64 (Type) Type)
+(extern extractor fits_in_64 fits_in_64)
+
+;; Extractor to get a `ValueSlice` out of a `ValueList`.
+(decl value_list_slice (ValueSlice) ValueList)
+(extern extractor infallible value_list_slice value_list_slice)
+
+;; Extractor to get the first element from a value list, along with its tail as
+;; a `ValueSlice`.
+(decl unwrap_head_value_list_1 (Value ValueSlice) ValueList)
+(extern extractor infallible unwrap_head_value_list_1 unwrap_head_value_list_1)
+
+;; Extractor to get the first two elements from a value list, along with its
+;; tail as a `ValueSlice`.
+(decl unwrap_head_value_list_2 (Value Value ValueSlice) ValueList)
+(extern extractor infallible unwrap_head_value_list_2 unwrap_head_value_list_2)
+
+;; Turn a `Writable<Reg>` into a `Reg` via `Writable::to_reg`.
+(decl writable_reg_to_reg (WritableReg) Reg)
+(extern constructor writable_reg_to_reg writable_reg_to_reg)
+
+;; Extract a `u64` from an `Imm64`.
+(decl u64_from_imm64 (u64) Imm64)
+(extern extractor infallible u64_from_imm64 u64_from_imm64)
+
+;; Extract the result values for the given instruction.
+(decl inst_results (ValueSlice) Inst)
+(extern extractor infallible inst_results inst_results)
+
+;; Extract the first result value of the given instruction.
+(decl first_result (Value) Inst)
+(extern extractor first_result first_result)
+
+;; Extract the `InstructionData` for an `Inst`.
+(decl inst_data (InstructionData) Inst)
+(extern extractor infallible inst_data inst_data)
+
+;; Extract the type of a `Value`.
+(decl value_type (Type) Value)
+(extern extractor infallible value_type value_type)
+
+;; Extract the type of the instruction's first result.
+(decl result_type (Type) Inst)
+(extractor (result_type ty)
+           (first_result (value_type ty)))
+
+;; Extract the type of the instruction's first result and pass along the
+;; instruction as well.
+(decl has_type (Type Inst) Inst)
+(extractor (has_type ty inst)
+           (and (result_type ty)
+                inst))
+
+;; Match a multi-lane type, extracting (# bits per lane, # lanes) from the given
+;; type. Will only match when there is more than one lane.
+(decl multi_lane (u8 u16) Type)
+(extern extractor multi_lane multi_lane)
+
+;; Match the instruction that defines the given value, if any.
+(decl def_inst (Inst) Value)
+(extern extractor def_inst def_inst)
--- a/cranelift/entity/src/list.rs
+++ b/cranelift/entity/src/list.rs
@@ -62,7 +62,7 @@ use serde::{Deserialize, Serialize};
 ///
 /// The index stored in an `EntityList` points to part 2, the list elements. The value 0 is
 /// reserved for the empty list which isn't allocated in the vector.
-#[derive(Clone, Debug, PartialEq)]
+#[derive(Clone, Copy, Debug, PartialEq)]
 #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub struct EntityList<T: EntityRef + ReservedValue> {
    index: u32,
@@ -271,7 +271,7 @@ impl<T: EntityRef + ReservedValue> EntityList<T> {
    }

    /// Get the list as a slice.
-    pub fn as_slice<'a>(&'a self, pool: &'a ListPool<T>) -> &'a [T] {
+    pub fn as_slice<'a>(&self, pool: &'a ListPool<T>) -> &'a [T] {
        let idx = self.index as usize;
        match pool.len_of(self) {
            None => &[],
--- a/cranelift/filetests/filetests/isa/x64/i128.clif
+++ b/cranelift/filetests/filetests/isa/x64/i128.clif
@@ -122,18 +122,14 @@ block0(v0: i128, v1: i128):

    v2 = imul v0, v1
 ; nextln:  movq    %rsi, %rax
-; nextln:  movq    %rcx, %r8
 ; nextln:  movq    %rdi, %rsi
-; nextln:  imulq   %rdx, %rsi
-; nextln:  movq    %rdi, %rcx
-; nextln:  imulq   %r8, %rcx
+; nextln:  imulq   %rcx, %rsi
 ; nextln:  imulq   %rdx, %rax
-; nextln:  addq    %rax, %rcx
+; nextln:  addq    %rax, %rsi
 ; nextln:  movq    %rdi, %rax
 ; nextln:  mul     %rdx
-; nextln:  addq    %rdx, %rcx
-; nextln:  movq    %rsi, %rax
-; nextln:  movq    %rcx, %rdx
+; nextln:  addq    %rdx, %rsi
+; nextln:  movq    %rsi, %rdx

    return v2
 ; nextln:  movq    %rbp, %rsp
@@ -700,34 +696,35 @@ block2(v6: i128):
    v8 = iadd.i128 v6, v7
    return v8

-; check: pushq   %rbp
-; nextln: movq    %rsp, %rbp
-; nextln: testb   $$1, %dl
-; nextln: jnz     label1; j label2
+; check: Block 0:
+; check:   pushq   %rbp
+; nextln:  movq    %rsp, %rbp
+; nextln:  testb   $$1, %dl
+; nextln:  jnz     label1; j label2
 ; check: Block 1:
-; check:  movl    $$0, %esi
-; nextln: movl    $$0, %edi
-; nextln: movl    $$1, %eax
-; nextln: movl    $$0, %ecx
-; nextln: addq    %rax, %rsi
-; nextln: adcq    %rcx, %rdi
-; nextln: movq    %rsi, %rax
-; nextln: movq    %rdi, %rdx
-; nextln: movq    %rbp, %rsp
-; nextln: popq    %rbp
-; nextln: ret
+; check:   xorq    %rdi, %rdi
+; nextln:  xorq    %rsi, %rsi
+; nextln:  movl    $$1, %ecx
+; nextln:  xorq    %rax, %rax
+; nextln:  addq    %rcx, %rdi
+; nextln:  adcq    %rax, %rsi
+; nextln:  movq    %rdi, %rax
+; nextln:  movq    %rsi, %rdx
+; nextln:  movq    %rbp, %rsp
+; nextln:  popq    %rbp
+; nextln:  ret
 ; check: Block 2:
-; check:  movl    $$0, %esi
-; nextln: movl    $$0, %edi
-; nextln: movl    $$2, %eax
-; nextln: movl    $$0, %ecx
-; nextln: addq    %rax, %rsi
-; nextln: adcq    %rcx, %rdi
-; nextln: movq    %rsi, %rax
-; nextln: movq    %rdi, %rdx
-; nextln: movq    %rbp, %rsp
-; nextln: popq    %rbp
-; nextln: ret
+; check:   xorq    %rdi, %rdi
+; nextln:  xorq    %rsi, %rsi
+; nextln:  movl    $$2, %ecx
+; nextln:  xorq    %rax, %rax
+; nextln:  addq    %rcx, %rdi
+; nextln:  adcq    %rax, %rsi
+; nextln:  movq    %rdi, %rax
+; nextln:  movq    %rsi, %rdx
+; nextln:  movq    %rbp, %rsp
+; nextln:  popq    %rbp
+; nextln:  ret

 }

@@ -744,34 +741,32 @@ block0(v0: i128, v1: i128, v2: i64, v3: i128, v4: i128, v5: i128):

 ; check:  pushq   %rbp
 ; nextln: movq    %rsp, %rbp
-; nextln: subq    $$32, %rsp
+; nextln: subq    $$16, %rsp
 ; nextln: movq    %r12, 0(%rsp)
 ; nextln: movq    %r13, 8(%rsp)
-; nextln: movq    %r14, 16(%rsp)
-; nextln: movq    %r8, %r14
-; nextln: movq    16(%rbp), %r10
+; nextln: movq    %r9, %r11
+; nextln: movq    16(%rbp), %r13
 ; nextln: movq    24(%rbp), %r12
-; nextln: movq    32(%rbp), %r11
-; nextln: movq    40(%rbp), %rax
-; nextln: movq    48(%rbp), %r13
-; nextln: movq    %rsi, %r8
+; nextln: movq    32(%rbp), %r10
+; nextln: movq    40(%rbp), %r9
+; nextln: movq    48(%rbp), %rax
 ; nextln: addq    %rdx, %rdi
-; nextln: adcq    %rcx, %r8
+; nextln: movq    %rsi, %rdx
+; nextln: adcq    %rcx, %rdx
 ; nextln: xorq    %rsi, %rsi
-; nextln: addq    %r14, %r9
-; nextln: adcq    %rsi, %r10
-; nextln: addq    %rax, %r12
-; nextln: adcq    %r13, %r11
-; nextln: addq    %r9, %rdi
-; nextln: adcq    %r10, %r8
+; nextln: addq    %r8, %r11
+; nextln: adcq    %rsi, %r13
+; nextln: addq    %r9, %r12
+; nextln: adcq    %rax, %r10
+; nextln: addq    %r11, %rdi
+; nextln: adcq    %r13, %rdx
 ; nextln: addq    %rdi, %r12
-; nextln: adcq    %r8, %r11
+; nextln: adcq    %rdx, %r10
 ; nextln: movq    %r12, %rax
-; nextln: movq    %r11, %rdx
+; nextln: movq    %r10, %rdx
 ; nextln: movq    0(%rsp), %r12
 ; nextln: movq    8(%rsp), %r13
-; nextln: movq    16(%rsp), %r14
-; nextln: addq    $$32, %rsp
+; nextln: addq    $$16, %rsp
 ; nextln: movq    %rbp, %rsp
 ; nextln: popq    %rbp
 ; nextln: ret
@@ -907,26 +902,25 @@ block0(v0: i128, v1: i128):

 ; check:  pushq   %rbp
 ; nextln: movq    %rsp, %rbp
-; nextln: movq    %rsi, %rax
-; nextln: movq    %rdi, %rsi
+; nextln: movq    %rdi, %rax
+; nextln: movq    %rsi, %rdi
+; nextln: movq    %rax, %rsi
 ; nextln: movq    %rdx, %rcx
 ; nextln: shlq    %cl, %rsi
 ; nextln: movq    %rdx, %rcx
-; nextln: shlq    %cl, %rax
+; nextln: shlq    %cl, %rdi
 ; nextln: movl    $$64, %ecx
 ; nextln: subq    %rdx, %rcx
-; nextln: shrq    %cl, %rdi
+; nextln: shrq    %cl, %rax
 ; nextln: xorq    %rcx, %rcx
 ; nextln: testq   $$127, %rdx
-; nextln: cmovzq  %rcx, %rdi
-; nextln: orq     %rax, %rdi
-; nextln: xorq    %rax, %rax
-; nextln: andq    $$64, %rdx
-; nextln: cmovzq  %rdi, %rax
+; nextln: cmovzq  %rcx, %rax
+; nextln: orq     %rdi, %rax
+; nextln: testq   $$64, %rdx
 ; nextln: cmovzq  %rsi, %rcx
-; nextln: cmovnzq %rsi, %rax
-; nextln: movq    %rax, %rdx
+; nextln: cmovzq  %rax, %rsi
 ; nextln: movq    %rcx, %rax
+; nextln: movq    %rsi, %rdx
 ; nextln: movq    %rbp, %rsp
 ; nextln: popq    %rbp
 ; nextln: ret
@@ -939,28 +933,26 @@ block0(v0: i128, v1: i128):

 ; check:  pushq   %rbp
 ; nextln: movq    %rsp, %rbp
-; nextln: movq    %rdi, %rax
-; nextln: movq    %rsi, %rdi
-; nextln: movq    %rdi, %rsi
+; nextln: movq    %rsi, %rax
+; nextln: movq    %rdx, %rcx
+; nextln: shrq    %cl, %rdi
+; nextln: movq    %rax, %rsi
 ; nextln: movq    %rdx, %rcx
 ; nextln: shrq    %cl, %rsi
-; nextln: movq    %rdx, %rcx
-; nextln: shrq    %cl, %rax
 ; nextln: movl    $$64, %ecx
 ; nextln: subq    %rdx, %rcx
-; nextln: shlq    %cl, %rdi
+; nextln: shlq    %cl, %rax
 ; nextln: xorq    %rcx, %rcx
 ; nextln: testq   $$127, %rdx
-; nextln: cmovzq  %rcx, %rdi
-; nextln: orq     %rax, %rdi
-; nextln: xorq    %rax, %rax
+; nextln: cmovzq  %rcx, %rax
+; nextln: orq     %rdi, %rax
 ; nextln: xorq    %rcx, %rcx
-; nextln: andq    $$64, %rdx
-; nextln: cmovzq  %rsi, %rax
-; nextln: cmovzq  %rdi, %rcx
-; nextln: cmovnzq %rsi, %rcx
-; nextln: movq    %rax, %rdx
-; nextln: movq    %rcx, %rax
+; nextln: testq   $$64, %rdx
+; nextln: movq    %rsi, %rdi
+; nextln: cmovzq  %rax, %rdi
+; nextln: cmovzq  %rsi, %rcx
+; nextln: movq    %rdi, %rax
+; nextln: movq    %rcx, %rdx
 ; nextln: movq    %rbp, %rsp
 ; nextln: popq    %rbp
 ; nextln: ret
@@ -1006,53 +998,51 @@ block0(v0: i128, v1: i128):
    return v2
 }

-; check: pushq   %rbp
+; check:  pushq   %rbp
 ; nextln: movq    %rsp, %rbp
-; nextln: movq    %rdi, %r8
-; nextln: movq    %r8, %r9
-; nextln: movq    %rdx, %rcx
-; nextln: shlq    %cl, %r9
-; nextln: movq    %rsi, %rax
+; nextln: movq    %rdi, %rax
 ; nextln: movq    %rdx, %rcx
 ; nextln: shlq    %cl, %rax
+; nextln: movq    %rsi, %r8
+; nextln: movq    %rdx, %rcx
+; nextln: shlq    %cl, %r8
 ; nextln: movl    $$64, %ecx
 ; nextln: subq    %rdx, %rcx
-; nextln: movq    %r8, %r10
-; nextln: shrq    %cl, %r10
-; nextln: xorq    %rdi, %rdi
+; nextln: movq    %rdi, %r9
+; nextln: shrq    %cl, %r9
+; nextln: xorq    %rcx, %rcx
 ; nextln: testq   $$127, %rdx
-; nextln: cmovzq  %rdi, %r10
-; nextln: orq     %rax, %r10
-; nextln: xorq    %rax, %rax
-; nextln: movq    %rdx, %rcx
-; nextln: andq    $$64, %rcx
-; nextln: cmovzq  %r10, %rax
-; nextln: cmovzq  %r9, %rdi
-; nextln: cmovnzq %r9, %rax
+; nextln: cmovzq  %rcx, %r9
+; nextln: orq     %r8, %r9
+; nextln: testq   $$64, %rdx
+; nextln: movq    %rcx, %r8
+; nextln: cmovzq  %rax, %r8
+; nextln: cmovzq  %r9, %rax
 ; nextln: movl    $$128, %r9d
 ; nextln: subq    %rdx, %r9
-; nextln: movq    %rsi, %rdx
+; nextln: movq    %rdi, %rdx
 ; nextln: movq    %r9, %rcx
 ; nextln: shrq    %cl, %rdx
+; nextln: movq    %rsi, %rdi
 ; nextln: movq    %r9, %rcx
-; nextln: shrq    %cl, %r8
+; nextln: shrq    %cl, %rdi
 ; nextln: movl    $$64, %ecx
 ; nextln: subq    %r9, %rcx
 ; nextln: shlq    %cl, %rsi
 ; nextln: xorq    %rcx, %rcx
 ; nextln: testq   $$127, %r9
 ; nextln: cmovzq  %rcx, %rsi
-; nextln: orq     %r8, %rsi
-; nextln: xorq    %rcx, %rcx
-; nextln: xorq    %r8, %r8
-; nextln: andq    $$64, %r9
-; nextln: cmovzq  %rdx, %rcx
-; nextln: cmovzq  %rsi, %r8
-; nextln: cmovnzq %rdx, %r8
-; nextln: orq     %rdi, %r8
-; nextln: orq     %rax, %rcx
+; nextln: orq     %rdx, %rsi
+; nextln: xorq    %rdx, %rdx
+; nextln: testq   $$64, %r9
+; nextln: movq    %rdi, %rcx
+; nextln: cmovzq  %rsi, %rcx
+; nextln: movq    %rdx, %rsi
+; nextln: cmovzq  %rdi, %rsi
+; nextln: orq     %rcx, %r8
+; nextln: orq     %rsi, %rax
+; nextln: movq    %rax, %rdx
 ; nextln: movq    %r8, %rax
-; nextln: movq    %rcx, %rdx
 ; nextln: movq    %rbp, %rsp
 ; nextln: popq    %rbp
 ; nextln: ret
--- a/cranelift/isle/.gitignore
+++ b/cranelift/isle/.gitignore
@@ -0,0 +1,3 @@
+/target
+*~
+.*.swp
--- a/cranelift/isle/README.md
+++ b/cranelift/isle/README.md
@@ -0,0 +1,530 @@
+# ISLE: Instruction Selection/Lowering Expressions DSL
+
+## Table of Contents
+
+* [Introduction](#introduction)
+* [Example Usage](#example-usage)
+* [Tutorial](#tutorial)
+* [Implementation](#implementation)
+* [Sketch of Instruction Selector](#sketch-of-instruction-selector)
+
+## Introduction
+
+ISLE is a DSL that allows one to write instruction-lowering rules for a
+compiler backend. It is based on a "term-rewriting" paradigm in which the input
+-- some sort of compiler IR -- is, conceptually, a tree of terms, and we have a
+set of rewrite rules that turn this into another tree of terms.
+
+This repository contains a prototype meta-compiler that compiles ISLE rules
+down to an instruction selector implementation in generated Rust code. The
+generated code operates efficiently in a single pass over the input, and merges
+all rules into a decision tree, sharing work where possible, while respecting
+user-configurable priorities on each rule.
+
+The ISLE language is designed so that the rules can both be compiled into an
+efficient compiler backend and can be used in formal reasoning about the
+compiler. The compiler in this repository implements the former. The latter
+use-case is future work and outside the scope of this prototype, but at a high
+level, the rules can be seen as simple equivalences between values in two
+languages, and so should be translatable to formal constraints or other logical
+specification languages.
+
+Some more details and motivation are in [BA RFC
+#15](https://github.com/bytecodealliance/rfcs/pull/15); additional
+documentation will eventually be added to carefully specify the language
+semantics.
+
+## Example Usage
+
+Build `islec`, the ISLE compiler:
+
+```shell
+$ cargo build --release
+```
+
+Compile a `.isle` source file into Rust code:
+
+```shell
+$ target/release/islec -i isle_examples/test.isle -o isle_examples/test.rs
+```
+
+Include that Rust code in your crate and compile it:
+
+```shell
+$ rustc isle_examples/test_main.rs
+```
+
+## Tutorial
+
+This tutorial walks through defining an instruction selection and lowering pass
+for a simple, RISC-y, high-level IR down to low-level, CISC-y machine
+instructions. It is intentionally somewhat similar to CLIF to MachInst lowering,
+although it restricts the input and output languages to only adds, loads, and
+constants so that we can focus on ISLE itself.
+
+> The full ISLE source code for this tutorial is available at
+> `isle_examples/tutorial.isle`.
+
+The ISLE language is based around rules for translating a term (i.e. expression)
+into another term. Terms are typed, so before we can write rules for translating
+some type of term into another type of term, we have to define those types:
+
+```lisp
+;; Declare that we are using the `i32` primitive type from Rust.
+(type i32 (primitive i32))
+
+;; Our high-level, RISC-y input IR.
+(type HighLevelInst
+  (enum (Add (a Value) (b Value))
+        (Load (addr Value))
+        (Const (c i32))))
+
+;; A value in our high-level IR is a Rust `Copy` type. Values are either defined
+;; by an instruction, or are a basic block argument.
+(type Value (primitive Value))
+
+;; Our low-level, CISC-y machine instructions.
+(type LowLevelInst
+  (enum (Add (mode AddrMode))
+        (Load (offset i32) (addr Reg))
+        (Const (c i32))))
+
+;; Different kinds of addressing modes for operands to our low-level machine
+;; instructions.
+(type AddrMode
+  (enum
+    ;; Both operands in registers.
+    (RegReg (a Reg) (b Reg))
+    ;; The destination/first operand is a register; the second operand is in
+    ;; memory at `[b + offset]`.
+    (RegMem (a Reg) (b Reg) (offset i32))
+    ;; The destination/first operand is a register, second operand is an
+    ;; immediate.
+    (RegImm (a Reg) (imm i32))))
+
+;; The register type is a Rust `Copy` type.
+(type Reg (primitive Reg))
+```
+
+Now we can start writing some basic lowering rules! We declare the top-level
+lowering function (a "constructor term" in ISLE terminology) and attach rules to
+it. The simplest case is matching a high-level `Const` instruction and lowering
+that to a low-level `Const` instruction, since there isn't any translation we
+really have to do.
+
+```lisp
+;; Declare our top-level lowering function. We will attach rules to this
+;; declaration for lowering various patterns of `HighLevelInst` inputs.
+(decl lower (HighLevelInst) LowLevelInst)
+
+;; Simple rule for lowering constants.
+(rule (lower (HighLevelInst.Const c))
+      (LowLevelInst.Const c))
+```
+
+Each rule has the form `(rule <left-hand side> <right-hand-side>)`. The
+left-hand side (LHS) is a *pattern* and the right-hand side (RHS) is an
+*expression*. When the LHS pattern matches the input, then we evaluate the RHS
+expression. The LHS pattern can bind variables from the input that are then
+available in the right-hand side. For example, in our `Const`-lowering rule, the
+variable `c` is bound from the LHS and then reused in the RHS.
+
+Now we can compile this code by running
+
+```shell
+$ islec isle_examples/tutorial.isle
+```
+
+and we'll get the following output <sup>(ignoring any minor code generation
+changes in the future)</sup>:
+
+```rust
+// GENERATED BY ISLE. DO NOT EDIT!
+//
+// Generated automatically from the instruction-selection DSL code in:
+// - isle_examples/tutorial.isle
+
+// [Type and `Context` definitions removed for brevity...]
+
+// Generated as internal constructor for term lower.
+pub fn constructor_lower<C: Context>(ctx: &mut C, arg0: &HighLevelInst) -> Option<LowLevelInst> {
+    let pattern0_0 = arg0;
+    if let &HighLevelInst::Const { c: pattern1_0 } = pattern0_0 {
+        // Rule at isle_examples/tutorial.isle line 45.
+        let expr0_0 = LowLevelInst::Const {
+            c: pattern1_0,
+        };
+        return Some(expr0_0);
+    }
+    return None;
+}
+```
+
+There are a few things to notice about this generated Rust code:
+
+* The `lower` constructor term becomes the `constructor_lower` function in the
+  generated code.
+
+* The function returns a value of type `Option<LowLevelInst>` and returns `None`
+  when it doesn't know how to lower an input `HighLevelInst`. This is useful for
+  incrementally porting hand-written lowering code to ISLE.
+
+* There is a helpful comment documenting where in the ISLE source code a rule
+  was defined. The goal is to ISLE more transparent and less magical.
+
+* The code is parameterized by a type that implements a `Context`
+  trait. Implementing this trait is how you glue the generated code into your
+  compiler. Right now this is an empty trait; more on `Context` later.
+
+* Lastly, and most importantly, this generated Rust code is basically what we
+  would have written by hand to do the same thing, other than things like
+  variable names. It checks if the input is a `Const`, and if so, translates it
+  into a `LowLevelInst::Const`.
+
+Okay, one rule isn't very impressive, but in order to start writing more rules
+we need to be able to put the result of a lowered instruction into a `Reg`. This
+might internally have to do arbitrary things like update use counts or anything
+else that Cranelift's existing `LowerCtx::put_input_in_reg` does for different
+target architectures. To allow for plugging in this kind of arbitrary logic,
+ISLE supports *external constructors*. These end up as methods of the `Context`
+trait in the generated Rust code, and you can implement them however you want
+with custom Rust code.
+
+Here is how we declare an external helper to put a value into a register:
+
+```lisp
+;; Declare an external constructor that puts a high-level `Value` into a
+;; low-level `Reg`.
+(decl put_in_reg (Value) Reg)
+(extern constructor put_in_reg put_in_reg)
+```
+
+If we rerun `islec` on our ISLE source, instead of an empty `Context` trait, now
+we will get this trait definition:
+
+```rust
+pub trait Context {
+    fn put_in_reg(&mut self, arg0: Value) -> (Reg,);
+}
+```
+
+With the `put_in_reg` helper available, we can define rules for lowering loads
+and adds:
+
+```lisp
+;; Simple rule for lowering adds.
+(rule (lower (HighLevelInst.Add a b))
+      (LowLevelInst.Add
+        (AddrMode.RegReg (put_in_reg a) (put_in_reg b))))
+
+;; Simple rule for lowering loads.
+(rule (lower (HighLevelInst.Load addr))
+      (LowLevelInst.Load 0 (put_in_reg addr)))
+```
+
+If we compile our ISLE source into Rust code once again, the generated code for
+`lower` now looks like this:
+
+```rust
+// Generated as internal constructor for term lower.
+pub fn constructor_lower<C: Context>(ctx: &mut C, arg0: &HighLevelInst) -> Option<LowLevelInst> {
+    let pattern0_0 = arg0;
+    match pattern0_0 {
+        &HighLevelInst::Const { c: pattern1_0 } => {
+            // Rule at isle_examples/tutorial.isle line 45.
+            let expr0_0 = LowLevelInst::Const {
+                c: pattern1_0,
+            };
+            return Some(expr0_0);
+        }
+        &HighLevelInst::Load { addr: pattern1_0 } => {
+            // Rule at isle_examples/tutorial.isle line 59.
+            let expr0_0: i32 = 0;
+            let expr1_0 = C::put_in_reg(ctx, pattern1_0);
+            let expr2_0 = LowLevelInst::Load {
+                offset: expr0_0,
+                addr: expr1_0,
+            };
+            return Some(expr2_0);
+        }
+        &HighLevelInst::Add { a: pattern1_0, b: pattern1_1 } => {
+            // Rule at isle_examples/tutorial.isle line 54.
+            let expr0_0 = C::put_in_reg(ctx, pattern1_0);
+            let expr1_0 = C::put_in_reg(ctx, pattern1_1);
+            let expr2_0 = AddrMode::RegReg {
+                a: expr0_0,
+                b: expr1_0,
+            };
+            let expr3_0 = LowLevelInst::Add {
+                mode: expr2_0,
+            };
+            return Some(expr3_0);
+        }
+        _ => {}
+    }
+    return None;
+}
+```
+
+As you can see, each of our rules was collapsed into a single, efficient `match`
+expression. Just like we would have otherwise written by hand. And wherever we
+need to get a high-level operand as a low-level register, there is a call to the
+`Context::put_in_reg` trait method, allowing us to hook whatever arbitrary logic
+we need to when putting a value into a register when we implement the `Context`
+trait.
+
+Things start to get more interesting when we want to do things like sink a load
+into the add's addressing mode. This is only desirable when our add is the only
+use of the loaded value. Furthermore, it is only valid to do when there isn't
+any store that might write to the same address we are loading from in between
+the load and the add. Otherwise, moving the load across the store could result
+in a miscompilation where we load the wrong value to add:
+
+```text
+x = load addr
+store 42 -> addr
+y = add x, 1
+
+==/==>
+
+store 42 -> addr
+x = load addr
+y = add x, 1
+```
+
+We can encode these kinds of preconditions in an *external extractor*. An
+extractor is like our regular constructor functions, but it is used inside LHS
+patterns, rather than RHS expressions, and its arguments and results flipped
+around: instead of taking arguments and producing results, it takes a result and
+(fallibly) produces the arguments. This allows us to write custom preconditions
+for matching code.
+
+Let's make this more clear with a concrete example. Here is the declaration of
+an external extractor to match on the high-level instruction that defined a
+given operand `Value`, along with a new rule to sink loads into adds:
+
+```lisp
+;; Declare an external extractor for extracting the instruction that defined a
+;; given operand value.
+(decl inst_result (HighLevelInst) Value)
+(extern extractor inst_result inst_result)
+
+;; Rule to sink loads into adds.
+(rule (lower (HighLevelInst.Add a (inst_result (HighLevelInst.Load addr))))
+      (LowLevelInst.Add
+        (AddrMode.RegMem (put_in_reg a)
+                         (put_in_reg addr)
+                         0)))
+```
+
+Note that the operand `Value` passed into this extractor might be a basic block
+parameter, in which case there is no such instruction. Or there might be a store
+or function call instruction in between the current instruction and the
+instruction that defines the given operand value, in which case we want to
+"hide" the instruction so that we don't illegally sink loads into adds they
+shouldn't be sunk into. So this extractor might fail to return an instruction
+for a given operand `Value`.
+
+If we recompile our ISLE source into Rust code once again, we see a new
+`inst_result` method defined on our `Context` trait, we notice that its
+arguments and returns are flipped around from the `decl` in the ISLE source
+because it is an extractor, and finally that it returns an `Option` because it
+isn't guaranteed that we can extract a defining instruction for the given
+operand `Value`:
+
+```rust
+pub trait Context {
+    fn put_in_reg(&mut self, arg0: Value) -> (Reg,);
+    fn inst_result(&mut self, arg0: Value) -> Option<(HighLevelInst,)>;
+}
+```
+
+And if we look at the generated code for our `lower` function, there is a new,
+nested case for sinking loads into adds that uses the `Context::inst_result`
+trait method to see if our new rule can be applied:
+
+```rust
+// Generated as internal constructor for term lower.
+pub fn constructor_lower<C: Context>(ctx: &mut C, arg0: &HighLevelInst) -> Option<LowLevelInst> {
+    let pattern0_0 = arg0;
+    match pattern0_0 {
+        &HighLevelInst::Const { c: pattern1_0 } => {
+            // [...]
+        }
+        &HighLevelInst::Load { addr: pattern1_0 } => {
+            // [...]
+        }
+        &HighLevelInst::Add { a: pattern1_0, b: pattern1_1 } => {
+            if let Some((pattern2_0,)) = C::inst_result(ctx, pattern1_1) {
+                if let &HighLevelInst::Load { addr: pattern3_0 } = &pattern2_0 {
+                    // Rule at isle_examples/tutorial.isle line 68.
+                    let expr0_0 = C::put_in_reg(ctx, pattern1_0);
+                    let expr1_0 = C::put_in_reg(ctx, pattern3_0);
+                    let expr2_0: i32 = 0;
+                    let expr3_0 = AddrMode::RegMem {
+                        a: expr0_0,
+                        b: expr1_0,
+                        offset: expr2_0,
+                    };
+                    let expr4_0 = LowLevelInst::Add {
+                        mode: expr3_0,
+                    };
+                    return Some(expr4_0);
+                }
+            }
+            // Rule at isle_examples/tutorial.isle line 54.
+            let expr0_0 = C::put_in_reg(ctx, pattern1_0);
+            let expr1_0 = C::put_in_reg(ctx, pattern1_1);
+            let expr2_0 = AddrMode::RegReg {
+                a: expr0_0,
+                b: expr1_0,
+            };
+            let expr3_0 = LowLevelInst::Add {
+                mode: expr2_0,
+            };
+            return Some(expr3_0);
+        }
+        _ => {}
+    }
+    return None;
+}
+```
+
+Once again, this is pretty much the code you would have otherwise written by
+hand to sink the load into the add.
+
+At this point we can start defining a whole bunch of even-more-complicated
+lowering rules that do things like take advantage of folding static offsets into
+loads into adds:
+
+```lisp
+;; Rule to sink a load of a base address with a static offset into a single add.
+(rule (lower (HighLevelInst.Add
+               a
+               (inst_result (HighLevelInst.Load
+                              (inst_result (HighLevelInst.Add
+                                             base
+                                             (inst_result (HighLevelInst.Const offset))))))))
+      (LowLevelInst.Add
+        (AddrMode.RegMem (put_in_reg a)
+                         (put_in_reg base)
+                         offset)))
+
+;; Rule for sinking an immediate into an add.
+(rule (lower (HighLevelInst.Add a (inst_result (HighLevelInst.Const c))))
+      (LowLevelInst.Add
+        (AddrMode.RegImm (put_in_reg a) c)))
+
+;; Rule for lowering loads of a base address with a static offset.
+(rule (lower (HighLevelInst.Load
+               (inst_result (HighLevelInst.Add
+                              base
+                              (inst_result (HighLevelInst.Const offset))))))
+      (LowLevelInst.Load offset (put_in_reg base)))
+```
+
+I'm not going to show the generated Rust code for these new rules here because
+it is starting to get a bit too big. But you can compile
+`isle_examples/tutorial.isle` and verify yourself that it generates the code you
+expect it to.
+
+In conclusion, adding new lowering rules is easy with ISLE. And you still get
+that efficient, compact tree of `match` expressions in the generated Rust code
+that you would otherwise write by hand.
+
+## Implementation
+
+This is an overview of `islec`'s passes and data structures:
+
+```text
+    +------------------+
+    | ISLE Source Text |
+    +------------------+
+             |
+             | Lex
+             V
+         +--------+
+         | Tokens |
+         +--------+
+             |
+             | Parse
+             V
+   +----------------------+
+   | Abstract Syntax Tree |
+   +----------------------+
+             |
+             | Semantic Analysis
+             V
+----------------------------+
+| Term and Type Environments |
+----------------------------+
+             |
+             | Trie Construction
+             V
+       +-----------+
+       | Term Trie |
+       +-----------+
+             |
+             | Code Generation
+             V
+    +------------------+
+    | Rust Source Code |
+    +------------------+
+```
+
+### Lexing
+
+Lexing breaks up the input ISLE source text into a stream of tokens. Our lexer
+is pull-based, meaning that we don't eagerly construct the full stream of
+tokens. Instead, we wait until the next token is requested, at which point we
+lazily lex it.
+
+Relevant source files:
+
+* `isle/src/lexer.rs`
+
+### Parsing
+
+Parsing translates the stream of tokens into an abstract syntax tree (AST). Our
+parser is a simple, hand-written, recursive-descent parser.
+
+Relevant source files:
+
+* `isle/src/ast.rs`
+* `isle/src/parser.rs`
+
+### Semantic Analysis
+
+Semantic analysis performs type checking, figures out which rules apply to which
+terms, etc. It creates a type environment and a term environment that we can use
+to get information about our terms throughout the rest of the pipeline.
+
+Relevant source files:
+
+* `isle/src/sema.rs`
+
+### Trie Construction
+
+The trie construction phase linearizes each rule's LHS pattern and inserts them
+into a trie that maps LHS patterns to RHS expressions. This trie is the skeleton
+of the decision tree that will be emitted during code generation.
+
+Relevant source files:
+
+* `isle/src/ir.rs`
+* `isle/src/trie.rs`
+
+### Code Generation
+
+Code generation takes in the term trie and emits Rust source code that
+implements it.
+
+Relevant source files:
+
+* `isle/src/codegen.rs`
+
+## Sketch of Instruction Selector
+
+Please see [this Cranelift
+branch](https://github.com/cfallin/wasmtime/tree/isle) for an ongoing sketch of
+an instruction selector backend in Cranelift that uses ISLE.
--- a/cranelift/isle/TODO
+++ b/cranelift/isle/TODO
@@ -0,0 +1,22 @@
+- Document the semantics of the DSL!
+
+- Clean up and factor the codegen properly.
+
+- Get rid of the expression syntax `<EXPR` in patterns; do a type-dependent
+  parse instead where we know the polarity of pattern-term args and parse
+  in-args as exprs.
+
+- Look into whether optimizations are possible:
+  - More in-depth fallibility analysis (avoid failure edges where possible)
+
+- Slightly nicer human-readable generated code
+  - Include full rule body (S-expression) in comment, not just line number
+  - Inline some expressions (no more `let val23 = 1234; ... f(val23);`)
+
+- Build inlining and simplification: inline invocations of internal
+  constructors, and eliminate ctor-etor or makevariant-matchvariant pairs.
+
+- Ideas from discussion with fitzgen
+  - Turn arg-polarity and exprs on extractors into purer "InstFormat"
+  - Emit two contexts: an immutable context for inputs and a mutable context for
+    outputs
--- a/cranelift/isle/fuzz/.gitignore
+++ b/cranelift/isle/fuzz/.gitignore
@@ -0,0 +1,3 @@
+target
+corpus
+artifacts
--- a/cranelift/isle/fuzz/Cargo.toml
+++ b/cranelift/isle/fuzz/Cargo.toml
@@ -0,0 +1,21 @@
+[package]
+name = "isle-fuzz"
+version = "0.0.0"
+authors = ["Automatically generated"]
+publish = false
+edition = "2018"
+
+[package.metadata]
+cargo-fuzz = true
+
+[dependencies]
+env_logger = { version = "0.9.0", default-features = false }
+isle = { path = "../isle" }
+libfuzzer-sys = "0.4"
+log = "0.4.14"
+
+[[bin]]
+name = "compile"
+path = "fuzz_targets/compile.rs"
+test = false
+doc = false
--- a/cranelift/isle/fuzz/README.md
+++ b/cranelift/isle/fuzz/README.md
@@ -0,0 +1,4 @@
+# ISLE Fuzz Targets
+
+These are separate from the top-level `wasmtime/fuzz` fuzz targets because we
+don't intend to run them on OSS-Fuzz. They are just for local ISLE hacking.
--- a/cranelift/isle/fuzz/fuzz_targets/compile.rs
+++ b/cranelift/isle/fuzz/fuzz_targets/compile.rs
@@ -0,0 +1,32 @@
+#![no_main]
+
+use libfuzzer_sys::fuzz_target;
+
+fuzz_target!(|s: &str| {
+    let _ = env_logger::try_init();
+
+    let lexer = isle::lexer::Lexer::from_str(s, "fuzz-input.isle");
+    log::debug!("lexer = {:?}", lexer);
+    let lexer = match lexer {
+        Ok(l) => l,
+        Err(_) => return,
+    };
+
+    let defs = isle::parser::parse(lexer);
+    log::debug!("defs = {:?}", defs);
+    let defs = match defs {
+        Ok(d) => d,
+        Err(_) => return,
+    };
+
+    let code = isle::compile::compile(&defs);
+    log::debug!("code = {:?}", code);
+    let code = match code {
+        Ok(c) => c,
+        Err(_) => return,
+    };
+
+    // TODO: check that the generated code is valid Rust. This will require
+    // stubbing out extern types, extractors, and constructors.
+    drop(code);
+});
--- a/cranelift/isle/isle/Cargo.toml
+++ b/cranelift/isle/isle/Cargo.toml
@@ -0,0 +1,14 @@
+[package]
+authors = ["The Cranelift Project Developers"]
+description = "ISLE: Instruction Selection and Lowering Expressions. A domain-specific language for instruction selection in Cranelift."
+edition = "2018"
+license = "Apache-2.0 WITH LLVM-exception"
+name = "isle"
+readme = "../README.md"
+repository = "https://github.com/bytecodealliance/wasmtime/tree/main/cranelift/isle"
+version = "0.78.0"
+
+[dependencies]
+log = "0.4"
+miette = "3.0.0"
+thiserror = "1.0.29"
--- a/cranelift/isle/isle/README.md
+++ b/cranelift/isle/isle/README.md
@@ -0,0 +1,9 @@
+# ISLE: Instruction Selection / Lowering Expressions
+
+ISLE is a domain specific language (DSL) for instruction selection and lowering
+clif instructions to vcode's `MachInst`s in Cranelift.
+
+ISLE is a statically-typed term-rewriting language. You define rewriting rules
+that map input terms (clif instructions) into output terms (`MachInst`s). These
+rules get compiled down into Rust source test that uses a tree of `match`
+expressions that is as good or better than what you would have written by hand.
--- a/cranelift/isle/isle/src/ast.rs
+++ b/cranelift/isle/isle/src/ast.rs
@@ -0,0 +1,420 @@
+//! Abstract syntax tree (AST) created from parsed ISLE.
+
+#![allow(missing_docs)]
+
+use crate::lexer::Pos;
+use std::sync::Arc;
+
+/// The parsed form of an ISLE file.
+#[derive(Clone, PartialEq, Eq, Debug)]
+pub struct Defs {
+    pub defs: Vec<Def>,
+    pub filenames: Vec<Arc<str>>,
+    pub file_texts: Vec<Arc<str>>,
+}
+
+/// One toplevel form in an ISLE file.
+#[derive(Clone, PartialEq, Eq, Debug)]
+pub enum Def {
+    Type(Type),
+    Rule(Rule),
+    Extractor(Extractor),
+    Decl(Decl),
+    Extern(Extern),
+}
+
+/// An identifier -- a variable, term symbol, or type.
+#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct Ident(pub String, pub Pos);
+
+/// A declaration of a type.
+#[derive(Clone, PartialEq, Eq, Debug)]
+pub struct Type {
+    pub name: Ident,
+    pub is_extern: bool,
+    pub ty: TypeValue,
+    pub pos: Pos,
+}
+
+/// The actual type-value: a primitive or an enum with variants.
+///
+/// TODO: add structs as well?
+#[derive(Clone, PartialEq, Eq, Debug)]
+pub enum TypeValue {
+    Primitive(Ident, Pos),
+    Enum(Vec<Variant>, Pos),
+}
+
+/// One variant of an enum type.
+#[derive(Clone, PartialEq, Eq, Debug)]
+pub struct Variant {
+    pub name: Ident,
+    pub fields: Vec<Field>,
+    pub pos: Pos,
+}
+
+/// One field of an enum variant.
+#[derive(Clone, PartialEq, Eq, Debug)]
+pub struct Field {
+    pub name: Ident,
+    pub ty: Ident,
+    pub pos: Pos,
+}
+
+/// A declaration of a term with its argument and return types.
+#[derive(Clone, PartialEq, Eq, Debug)]
+pub struct Decl {
+    pub term: Ident,
+    pub arg_tys: Vec<Ident>,
+    pub ret_ty: Ident,
+    pub pos: Pos,
+}
+
+#[derive(Clone, PartialEq, Eq, Debug)]
+pub struct Rule {
+    pub pattern: Pattern,
+    pub expr: Expr,
+    pub pos: Pos,
+    pub prio: Option<i64>,
+}
+
+/// An extractor macro: (A x y) becomes (B x _ y ...). Expanded during
+/// ast-to-sema pass.
+#[derive(Clone, PartialEq, Eq, Debug)]
+pub struct Extractor {
+    pub term: Ident,
+    pub args: Vec<Ident>,
+    pub template: Pattern,
+    pub pos: Pos,
+}
+
+/// A pattern: the left-hand side of a rule.
+#[derive(Clone, PartialEq, Eq, Debug)]
+pub enum Pattern {
+    /// An operator that binds a variable to a subterm and match the
+    /// subpattern.
+    BindPattern {
+        var: Ident,
+        subpat: Box<Pattern>,
+        pos: Pos,
+    },
+    /// A variable that has already been bound (`=x` syntax).
+    Var { var: Ident, pos: Pos },
+    /// An operator that matches a constant integer value.
+    ConstInt { val: i64, pos: Pos },
+    /// An operator that matches an external constant value.
+    ConstPrim { val: Ident, pos: Pos },
+    /// An application of a type variant or term.
+    Term {
+        sym: Ident,
+        args: Vec<TermArgPattern>,
+        pos: Pos,
+    },
+    /// An operator that matches anything.
+    Wildcard { pos: Pos },
+    /// N sub-patterns that must all match.
+    And { subpats: Vec<Pattern>, pos: Pos },
+    /// Internal use only: macro argument in a template.
+    MacroArg { index: usize, pos: Pos },
+}
+
+impl Pattern {
+    pub fn root_term(&self) -> Option<&Ident> {
+        match self {
+            &Pattern::BindPattern { ref subpat, .. } => subpat.root_term(),
+            &Pattern::Term { ref sym, .. } => Some(sym),
+            _ => None,
+        }
+    }
+
+    /// Call `f` for each of the terms in this pattern.
+    pub fn terms(&self, f: &mut dyn FnMut(Pos, &Ident)) {
+        match self {
+            Pattern::Term { sym, args, pos } => {
+                f(*pos, sym);
+                for arg in args {
+                    if let TermArgPattern::Pattern(p) = arg {
+                        p.terms(f);
+                    }
+                }
+            }
+            Pattern::And { subpats, .. } => {
+                for p in subpats {
+                    p.terms(f);
+                }
+            }
+            Pattern::BindPattern { subpat, .. } => {
+                subpat.terms(f);
+            }
+            Pattern::Var { .. }
+            | Pattern::ConstInt { .. }
+            | Pattern::ConstPrim { .. }
+            | Pattern::Wildcard { .. }
+            | Pattern::MacroArg { .. } => {}
+        }
+    }
+
+    pub fn make_macro_template(&self, macro_args: &[Ident]) -> Pattern {
+        log::trace!("make_macro_template: {:?} with {:?}", self, macro_args);
+        match self {
+            &Pattern::BindPattern {
+                ref var,
+                ref subpat,
+                pos,
+                ..
+            } if matches!(&**subpat, &Pattern::Wildcard { .. }) => {
+                if let Some(i) = macro_args.iter().position(|arg| arg.0 == var.0) {
+                    Pattern::MacroArg { index: i, pos }
+                } else {
+                    self.clone()
+                }
+            }
+            &Pattern::BindPattern {
+                ref var,
+                ref subpat,
+                pos,
+            } => Pattern::BindPattern {
+                var: var.clone(),
+                subpat: Box::new(subpat.make_macro_template(macro_args)),
+                pos,
+            },
+            &Pattern::And { ref subpats, pos } => {
+                let subpats = subpats
+                    .iter()
+                    .map(|subpat| subpat.make_macro_template(macro_args))
+                    .collect::<Vec<_>>();
+                Pattern::And { subpats, pos }
+            }
+            &Pattern::Term {
+                ref sym,
+                ref args,
+                pos,
+            } => {
+                let args = args
+                    .iter()
+                    .map(|arg| arg.make_macro_template(macro_args))
+                    .collect::<Vec<_>>();
+                Pattern::Term {
+                    sym: sym.clone(),
+                    args,
+                    pos,
+                }
+            }
+
+            &Pattern::Var { .. }
+            | &Pattern::Wildcard { .. }
+            | &Pattern::ConstInt { .. }
+            | &Pattern::ConstPrim { .. } => self.clone(),
+            &Pattern::MacroArg { .. } => unreachable!(),
+        }
+    }
+
+    pub fn subst_macro_args(&self, macro_args: &[Pattern]) -> Option<Pattern> {
+        log::trace!("subst_macro_args: {:?} with {:?}", self, macro_args);
+        match self {
+            &Pattern::BindPattern {
+                ref var,
+                ref subpat,
+                pos,
+            } => Some(Pattern::BindPattern {
+                var: var.clone(),
+                subpat: Box::new(subpat.subst_macro_args(macro_args)?),
+                pos,
+            }),
+            &Pattern::And { ref subpats, pos } => {
+                let subpats = subpats
+                    .iter()
+                    .map(|subpat| subpat.subst_macro_args(macro_args))
+                    .collect::<Option<Vec<_>>>()?;
+                Some(Pattern::And { subpats, pos })
+            }
+            &Pattern::Term {
+                ref sym,
+                ref args,
+                pos,
+            } => {
+                let args = args
+                    .iter()
+                    .map(|arg| arg.subst_macro_args(macro_args))
+                    .collect::<Option<Vec<_>>>()?;
+                Some(Pattern::Term {
+                    sym: sym.clone(),
+                    args,
+                    pos,
+                })
+            }
+
+            &Pattern::Var { .. }
+            | &Pattern::Wildcard { .. }
+            | &Pattern::ConstInt { .. }
+            | &Pattern::ConstPrim { .. } => Some(self.clone()),
+            &Pattern::MacroArg { index, .. } => macro_args.get(index).cloned(),
+        }
+    }
+
+    pub fn pos(&self) -> Pos {
+        match self {
+            &Pattern::ConstInt { pos, .. }
+            | &Pattern::ConstPrim { pos, .. }
+            | &Pattern::And { pos, .. }
+            | &Pattern::Term { pos, .. }
+            | &Pattern::BindPattern { pos, .. }
+            | &Pattern::Var { pos, .. }
+            | &Pattern::Wildcard { pos, .. }
+            | &Pattern::MacroArg { pos, .. } => pos,
+        }
+    }
+}
+
+/// A pattern in a term argument. Adds "evaluated expression" to kinds
+/// of patterns in addition to all options in `Pattern`.
+#[derive(Clone, PartialEq, Eq, Debug)]
+pub enum TermArgPattern {
+    /// A regular pattern that must match the existing value in the term's argument.
+    Pattern(Pattern),
+    /// An expression that is evaluated during the match phase and can
+    /// be given into an extractor. This is essentially a limited form
+    /// of unification or bidirectional argument flow (a la Prolog):
+    /// we can pass an arg *into* an extractor rather than getting the
+    /// arg *out of* it.
+    Expr(Expr),
+}
+
+impl TermArgPattern {
+    fn make_macro_template(&self, args: &[Ident]) -> TermArgPattern {
+        log::trace!("repplace_macro_args: {:?} with {:?}", self, args);
+        match self {
+            &TermArgPattern::Pattern(ref pat) => {
+                TermArgPattern::Pattern(pat.make_macro_template(args))
+            }
+            &TermArgPattern::Expr(_) => self.clone(),
+        }
+    }
+
+    fn subst_macro_args(&self, args: &[Pattern]) -> Option<TermArgPattern> {
+        match self {
+            &TermArgPattern::Pattern(ref pat) => {
+                Some(TermArgPattern::Pattern(pat.subst_macro_args(args)?))
+            }
+            &TermArgPattern::Expr(_) => Some(self.clone()),
+        }
+    }
+}
+
+/// An expression: the right-hand side of a rule.
+///
+/// Note that this *almost* looks like a core Lisp or lambda calculus,
+/// except that there is no abstraction (lambda). This first-order
+/// limit is what makes it analyzable.
+#[derive(Clone, PartialEq, Eq, Debug)]
+pub enum Expr {
+    /// A term: `(sym args...)`.
+    Term {
+        sym: Ident,
+        args: Vec<Expr>,
+        pos: Pos,
+    },
+    /// A variable use.
+    Var { name: Ident, pos: Pos },
+    /// A constant integer.
+    ConstInt { val: i64, pos: Pos },
+    /// A constant of some other primitive type.
+    ConstPrim { val: Ident, pos: Pos },
+    /// The `(let ((var ty val)*) body)` form.
+    Let {
+        defs: Vec<LetDef>,
+        body: Box<Expr>,
+        pos: Pos,
+    },
+}
+
+impl Expr {
+    pub fn pos(&self) -> Pos {
+        match self {
+            &Expr::Term { pos, .. }
+            | &Expr::Var { pos, .. }
+            | &Expr::ConstInt { pos, .. }
+            | &Expr::ConstPrim { pos, .. }
+            | &Expr::Let { pos, .. } => pos,
+        }
+    }
+
+    /// Call `f` for each of the terms in this expression.
+    pub fn terms(&self, f: &mut dyn FnMut(Pos, &Ident)) {
+        match self {
+            Expr::Term { sym, args, pos } => {
+                f(*pos, sym);
+                for arg in args {
+                    arg.terms(f);
+                }
+            }
+            Expr::Let { defs, body, .. } => {
+                for def in defs {
+                    def.val.terms(f);
+                }
+                body.terms(f);
+            }
+            Expr::Var { .. } | Expr::ConstInt { .. } | Expr::ConstPrim { .. } => {}
+        }
+    }
+}
+
+/// One variable locally bound in a `(let ...)` expression.
+#[derive(Clone, PartialEq, Eq, Debug)]
+pub struct LetDef {
+    pub var: Ident,
+    pub ty: Ident,
+    pub val: Box<Expr>,
+    pub pos: Pos,
+}
+
+/// An external binding: an extractor or constructor function attached
+/// to a term.
+#[derive(Clone, PartialEq, Eq, Debug)]
+pub enum Extern {
+    /// An external extractor: `(extractor Term rustfunc)` form.
+    Extractor {
+        /// The term to which this external extractor is attached.
+        term: Ident,
+        /// The Rust function name.
+        func: Ident,
+        /// The position of this decl.
+        pos: Pos,
+        /// Poliarity of args: whether values are inputs or outputs to
+        /// the external extractor function. This is a sort of
+        /// statically-defined approximation to Prolog-style
+        /// unification; we allow for the same flexible directionality
+        /// but fix it at DSL-definition time. By default, every arg
+        /// is an *output* from the extractor (and the 'retval", or
+        /// more precisely the term value that we are extracting, is
+        /// an "input").
+        arg_polarity: Option<Vec<ArgPolarity>>,
+        /// Infallibility: if an external extractor returns `(T1, T2,
+        /// ...)` rather than `Option<(T1, T2, ...)>`, and hence can
+        /// never fail, it is declared as such and allows for slightly
+        /// better code to be generated.
+        infallible: bool,
+    },
+    /// An external constructor: `(constructor Term rustfunc)` form.
+    Constructor {
+        /// The term to which this external constructor is attached.
+        term: Ident,
+        /// The Rust function name.
+        func: Ident,
+        /// The position of this decl.
+        pos: Pos,
+    },
+    /// An external constant: `(const $IDENT type)` form.
+    Const { name: Ident, ty: Ident, pos: Pos },
+}
+
+/// Whether an argument is an input or an output.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum ArgPolarity {
+    /// An arg that must be given an Expr in the pattern and passes data *to*
+    /// the extractor op.
+    Input,
+    /// An arg that must be given a regular pattern (not Expr) and receives data
+    /// *from* the extractor op.
+    Output,
+}
--- a/cranelift/isle/isle/src/codegen.rs
+++ b/cranelift/isle/isle/src/codegen.rs
@@ -0,0 +1,852 @@
+//! Generate Rust code from a series of Sequences.
+
+use crate::ir::{ExprInst, InstId, PatternInst, Value};
+use crate::sema::ExternalSig;
+use crate::sema::{TermEnv, TermId, Type, TypeEnv, TypeId, Variant};
+use crate::trie::{TrieEdge, TrieNode, TrieSymbol};
+use std::collections::{BTreeMap, BTreeSet};
+use std::fmt::Write;
+
+/// Emit Rust source code for the given type and term environments.
+pub fn codegen(typeenv: &TypeEnv, termenv: &TermEnv, tries: &BTreeMap<TermId, TrieNode>) -> String {
+    Codegen::compile(typeenv, termenv, tries).generate_rust()
+}
+
+#[derive(Clone, Debug)]
+struct Codegen<'a> {
+    typeenv: &'a TypeEnv,
+    termenv: &'a TermEnv,
+    functions_by_term: &'a BTreeMap<TermId, TrieNode>,
+}
+
+#[derive(Clone, Debug, Default)]
+struct BodyContext {
+    /// For each value: (is_ref, ty).
+    values: BTreeMap<Value, (bool, TypeId)>,
+}
+
+impl<'a> Codegen<'a> {
+    fn compile(
+        typeenv: &'a TypeEnv,
+        termenv: &'a TermEnv,
+        tries: &'a BTreeMap<TermId, TrieNode>,
+    ) -> Codegen<'a> {
+        Codegen {
+            typeenv,
+            termenv,
+            functions_by_term: tries,
+        }
+    }
+
+    fn generate_rust(&self) -> String {
+        let mut code = String::new();
+
+        self.generate_header(&mut code);
+        self.generate_ctx_trait(&mut code);
+        self.generate_internal_types(&mut code);
+        self.generate_internal_term_constructors(&mut code);
+
+        code
+    }
+
+    fn generate_header(&self, code: &mut String) {
+        writeln!(code, "// GENERATED BY ISLE. DO NOT EDIT!").unwrap();
+        writeln!(code, "//").unwrap();
+        writeln!(
+            code,
+            "// Generated automatically from the instruction-selection DSL code in:",
+        )
+        .unwrap();
+        for file in &self.typeenv.filenames {
+            writeln!(code, "// - {}", file).unwrap();
+        }
+
+        writeln!(
+            code,
+            "\n#![allow(dead_code, unreachable_code, unreachable_patterns)]"
+        )
+        .unwrap();
+        writeln!(
+            code,
+            "#![allow(unused_imports, unused_variables, non_snake_case)]"
+        )
+        .unwrap();
+        writeln!(code, "#![allow(irrefutable_let_patterns)]").unwrap();
+
+        writeln!(code, "\nuse super::*;  // Pulls in all external types.").unwrap();
+    }
+
+    fn generate_trait_sig(&self, code: &mut String, indent: &str, sig: &ExternalSig) {
+        writeln!(
+            code,
+            "{indent}fn {name}(&mut self, {params}) -> {opt_start}{open_paren}{rets}{close_paren}{opt_end};",
+            indent = indent,
+            name = sig.func_name,
+            params = sig.param_tys
+                .iter()
+                .enumerate()
+                .map(|(i, &ty)| format!("arg{}: {}", i, self.type_name(ty, /* by_ref = */ true)))
+                .collect::<Vec<_>>()
+                .join(", "),
+            opt_start = if sig.infallible { "" } else { "Option<" },
+            open_paren = if sig.ret_tys.len() != 1 { "(" } else { "" },
+            rets = sig.ret_tys
+                .iter()
+                .map(|&ty| self.type_name(ty, /* by_ref = */ false))
+                .collect::<Vec<_>>()
+                .join(", "),
+            close_paren = if sig.ret_tys.len() != 1 { ")" } else { "" },
+            opt_end = if sig.infallible { "" } else { ">" },
+        )
+        .unwrap();
+    }
+
+    fn generate_ctx_trait(&self, code: &mut String) {
+        writeln!(code, "").unwrap();
+        writeln!(
+            code,
+            "/// Context during lowering: an implementation of this trait"
+        )
+        .unwrap();
+        writeln!(
+            code,
+            "/// must be provided with all external constructors and extractors."
+        )
+        .unwrap();
+        writeln!(
+            code,
+            "/// A mutable borrow is passed along through all lowering logic."
+        )
+        .unwrap();
+        writeln!(code, "pub trait Context {{").unwrap();
+        for term in &self.termenv.terms {
+            if term.has_external_extractor() {
+                let ext_sig = term.extractor_sig(self.typeenv).unwrap();
+                self.generate_trait_sig(code, "    ", &ext_sig);
+            }
+            if term.has_external_constructor() {
+                let ext_sig = term.constructor_sig(self.typeenv).unwrap();
+                self.generate_trait_sig(code, "    ", &ext_sig);
+            }
+        }
+        writeln!(code, "}}").unwrap();
+    }
+
+    fn generate_internal_types(&self, code: &mut String) {
+        for ty in &self.typeenv.types {
+            match ty {
+                &Type::Enum {
+                    name,
+                    is_extern,
+                    ref variants,
+                    pos,
+                    ..
+                } if !is_extern => {
+                    let name = &self.typeenv.syms[name.index()];
+                    writeln!(
+                        code,
+                        "\n/// Internal type {}: defined at {}.",
+                        name,
+                        pos.pretty_print_line(&self.typeenv.filenames[..])
+                    )
+                    .unwrap();
+                    writeln!(code, "#[derive(Clone, Debug)]").unwrap();
+                    writeln!(code, "pub enum {} {{", name).unwrap();
+                    for variant in variants {
+                        let name = &self.typeenv.syms[variant.name.index()];
+                        if variant.fields.is_empty() {
+                            writeln!(code, "    {},", name).unwrap();
+                        } else {
+                            writeln!(code, "    {} {{", name).unwrap();
+                            for field in &variant.fields {
+                                let name = &self.typeenv.syms[field.name.index()];
+                                let ty_name =
+                                    self.typeenv.types[field.ty.index()].name(&self.typeenv);
+                                writeln!(code, "        {}: {},", name, ty_name).unwrap();
+                            }
+                            writeln!(code, "    }},").unwrap();
+                        }
+                    }
+                    writeln!(code, "}}").unwrap();
+                }
+                _ => {}
+            }
+        }
+    }
+
+    fn type_name(&self, typeid: TypeId, by_ref: bool) -> String {
+        match &self.typeenv.types[typeid.index()] {
+            &Type::Primitive(_, sym, _) => self.typeenv.syms[sym.index()].clone(),
+            &Type::Enum { name, .. } => {
+                let r = if by_ref { "&" } else { "" };
+                format!("{}{}", r, self.typeenv.syms[name.index()])
+            }
+        }
+    }
+
+    fn value_name(&self, value: &Value) -> String {
+        match value {
+            &Value::Pattern { inst, output } => format!("pattern{}_{}", inst.index(), output),
+            &Value::Expr { inst, output } => format!("expr{}_{}", inst.index(), output),
+        }
+    }
+
+    fn ty_prim(&self, ty: TypeId) -> bool {
+        self.typeenv.types[ty.index()].is_prim()
+    }
+
+    fn value_binder(&self, value: &Value, is_ref: bool, ty: TypeId) -> String {
+        let prim = self.ty_prim(ty);
+        if prim || !is_ref {
+            format!("{}", self.value_name(value))
+        } else {
+            format!("ref {}", self.value_name(value))
+        }
+    }
+
+    fn value_by_ref(&self, value: &Value, ctx: &BodyContext) -> String {
+        let raw_name = self.value_name(value);
+        let &(is_ref, ty) = ctx.values.get(value).unwrap();
+        let prim = self.ty_prim(ty);
+        if is_ref || prim {
+            raw_name
+        } else {
+            format!("&{}", raw_name)
+        }
+    }
+
+    fn value_by_val(&self, value: &Value, ctx: &BodyContext) -> String {
+        let raw_name = self.value_name(value);
+        let &(is_ref, _) = ctx.values.get(value).unwrap();
+        if is_ref {
+            format!("{}.clone()", raw_name)
+        } else {
+            raw_name
+        }
+    }
+
+    fn define_val(&self, value: &Value, ctx: &mut BodyContext, is_ref: bool, ty: TypeId) {
+        let is_ref = !self.ty_prim(ty) && is_ref;
+        ctx.values.insert(value.clone(), (is_ref, ty));
+    }
+
+    fn const_int(&self, val: i64, ty: TypeId) -> String {
+        let is_bool = match &self.typeenv.types[ty.index()] {
+            &Type::Primitive(_, name, _) => &self.typeenv.syms[name.index()] == "bool",
+            _ => unreachable!(),
+        };
+        if is_bool {
+            format!("{}", val != 0)
+        } else {
+            format!("{}", val)
+        }
+    }
+
+    fn generate_internal_term_constructors(&self, code: &mut String) {
+        for (&termid, trie) in self.functions_by_term {
+            let termdata = &self.termenv.terms[termid.index()];
+
+            // Skip terms that are enum variants or that have external
+            // constructors/extractors.
+            if !termdata.has_constructor() || termdata.has_external_constructor() {
+                continue;
+            }
+
+            let sig = termdata.constructor_sig(self.typeenv).unwrap();
+
+            let args = sig
+                .param_tys
+                .iter()
+                .enumerate()
+                .map(|(i, &ty)| format!("arg{}: {}", i, self.type_name(ty, true)))
+                .collect::<Vec<_>>()
+                .join(", ");
+            assert_eq!(sig.ret_tys.len(), 1);
+            let ret = self.type_name(sig.ret_tys[0], false);
+
+            writeln!(
+                code,
+                "\n// Generated as internal constructor for term {}.",
+                self.typeenv.syms[termdata.name.index()],
+            )
+            .unwrap();
+            writeln!(
+                code,
+                "pub fn {}<C: Context>(ctx: &mut C, {}) -> Option<{}> {{",
+                sig.func_name, args, ret,
+            )
+            .unwrap();
+
+            let mut body_ctx: BodyContext = Default::default();
+            let returned =
+                self.generate_body(code, /* depth = */ 0, trie, "    ", &mut body_ctx);
+            if !returned {
+                writeln!(code, "    return None;").unwrap();
+            }
+
+            writeln!(code, "}}").unwrap();
+        }
+    }
+
+    fn generate_expr_inst(
+        &self,
+        code: &mut String,
+        id: InstId,
+        inst: &ExprInst,
+        indent: &str,
+        ctx: &mut BodyContext,
+        returns: &mut Vec<(usize, String)>,
+    ) {
+        log::trace!("generate_expr_inst: {:?}", inst);
+        match inst {
+            &ExprInst::ConstInt { ty, val } => {
+                let value = Value::Expr {
+                    inst: id,
+                    output: 0,
+                };
+                self.define_val(&value, ctx, /* is_ref = */ false, ty);
+                let name = self.value_name(&value);
+                let ty_name = self.type_name(ty, /* by_ref = */ false);
+                writeln!(
+                    code,
+                    "{}let {}: {} = {};",
+                    indent,
+                    name,
+                    ty_name,
+                    self.const_int(val, ty)
+                )
+                .unwrap();
+            }
+            &ExprInst::ConstPrim { ty, val } => {
+                let value = Value::Expr {
+                    inst: id,
+                    output: 0,
+                };
+                self.define_val(&value, ctx, /* is_ref = */ false, ty);
+                let name = self.value_name(&value);
+                let ty_name = self.type_name(ty, /* by_ref = */ false);
+                writeln!(
+                    code,
+                    "{}let {}: {} = {};",
+                    indent,
+                    name,
+                    ty_name,
+                    self.typeenv.syms[val.index()],
+                )
+                .unwrap();
+            }
+            &ExprInst::CreateVariant {
+                ref inputs,
+                ty,
+                variant,
+            } => {
+                let variantinfo = match &self.typeenv.types[ty.index()] {
+                    &Type::Primitive(..) => panic!("CreateVariant with primitive type"),
+                    &Type::Enum { ref variants, .. } => &variants[variant.index()],
+                };
+                let mut input_fields = vec![];
+                for ((input_value, _), field) in inputs.iter().zip(variantinfo.fields.iter()) {
+                    let field_name = &self.typeenv.syms[field.name.index()];
+                    let value_expr = self.value_by_val(input_value, ctx);
+                    input_fields.push(format!("{}: {}", field_name, value_expr));
+                }
+
+                let output = Value::Expr {
+                    inst: id,
+                    output: 0,
+                };
+                let outputname = self.value_name(&output);
+                let full_variant_name = format!(
+                    "{}::{}",
+                    self.type_name(ty, false),
+                    self.typeenv.syms[variantinfo.name.index()]
+                );
+                if input_fields.is_empty() {
+                    writeln!(
+                        code,
+                        "{}let {} = {};",
+                        indent, outputname, full_variant_name
+                    )
+                    .unwrap();
+                } else {
+                    writeln!(
+                        code,
+                        "{}let {} = {} {{",
+                        indent, outputname, full_variant_name
+                    )
+                    .unwrap();
+                    for input_field in input_fields {
+                        writeln!(code, "{}    {},", indent, input_field).unwrap();
+                    }
+                    writeln!(code, "{}}};", indent).unwrap();
+                }
+                self.define_val(&output, ctx, /* is_ref = */ false, ty);
+            }
+            &ExprInst::Construct {
+                ref inputs,
+                term,
+                infallible,
+                ..
+            } => {
+                let mut input_exprs = vec![];
+                for (input_value, input_ty) in inputs {
+                    let value_expr = if self.typeenv.types[input_ty.index()].is_prim() {
+                        self.value_by_val(input_value, ctx)
+                    } else {
+                        self.value_by_ref(input_value, ctx)
+                    };
+                    input_exprs.push(value_expr);
+                }
+
+                let output = Value::Expr {
+                    inst: id,
+                    output: 0,
+                };
+                let outputname = self.value_name(&output);
+                let termdata = &self.termenv.terms[term.index()];
+                let sig = termdata.constructor_sig(self.typeenv).unwrap();
+                assert_eq!(input_exprs.len(), sig.param_tys.len());
+                let fallible_try = if infallible { "" } else { "?" };
+                writeln!(
+                    code,
+                    "{}let {} = {}(ctx, {}){};",
+                    indent,
+                    outputname,
+                    sig.full_name,
+                    input_exprs.join(", "),
+                    fallible_try,
+                )
+                .unwrap();
+                self.define_val(&output, ctx, /* is_ref = */ false, termdata.ret_ty);
+            }
+            &ExprInst::Return {
+                index, ref value, ..
+            } => {
+                let value_expr = self.value_by_val(value, ctx);
+                returns.push((index, value_expr));
+            }
+        }
+    }
+
+    fn match_variant_binders(
+        &self,
+        variant: &Variant,
+        arg_tys: &[TypeId],
+        id: InstId,
+        ctx: &mut BodyContext,
+    ) -> Vec<String> {
+        arg_tys
+            .iter()
+            .zip(variant.fields.iter())
+            .enumerate()
+            .map(|(i, (&ty, field))| {
+                let value = Value::Pattern {
+                    inst: id,
+                    output: i,
+                };
+                let valuename = self.value_binder(&value, /* is_ref = */ true, ty);
+                let fieldname = &self.typeenv.syms[field.name.index()];
+                self.define_val(&value, ctx, /* is_ref = */ false, field.ty);
+                format!("{}: {}", fieldname, valuename)
+            })
+            .collect::<Vec<_>>()
+    }
+
+    /// Returns a `bool` indicating whether this pattern inst is
+    /// infallible.
+    fn generate_pattern_inst(
+        &self,
+        code: &mut String,
+        id: InstId,
+        inst: &PatternInst,
+        indent: &str,
+        ctx: &mut BodyContext,
+    ) -> bool {
+        match inst {
+            &PatternInst::Arg { index, ty } => {
+                let output = Value::Pattern {
+                    inst: id,
+                    output: 0,
+                };
+                let outputname = self.value_name(&output);
+                let is_ref = match &self.typeenv.types[ty.index()] {
+                    &Type::Primitive(..) => false,
+                    _ => true,
+                };
+                writeln!(code, "{}let {} = arg{};", indent, outputname, index).unwrap();
+                self.define_val(
+                    &Value::Pattern {
+                        inst: id,
+                        output: 0,
+                    },
+                    ctx,
+                    is_ref,
+                    ty,
+                );
+                true
+            }
+            &PatternInst::MatchEqual { ref a, ref b, .. } => {
+                let a = self.value_by_ref(a, ctx);
+                let b = self.value_by_ref(b, ctx);
+                writeln!(code, "{}if {} == {} {{", indent, a, b).unwrap();
+                false
+            }
+            &PatternInst::MatchInt {
+                ref input, int_val, ..
+            } => {
+                let input = self.value_by_val(input, ctx);
+                writeln!(code, "{}if {} == {} {{", indent, input, int_val).unwrap();
+                false
+            }
+            &PatternInst::MatchPrim { ref input, val, .. } => {
+                let input = self.value_by_val(input, ctx);
+                let sym = &self.typeenv.syms[val.index()];
+                writeln!(code, "{}if {} == {} {{", indent, input, sym).unwrap();
+                false
+            }
+            &PatternInst::MatchVariant {
+                ref input,
+                input_ty,
+                variant,
+                ref arg_tys,
+            } => {
+                let input = self.value_by_ref(input, ctx);
+                let variants = match &self.typeenv.types[input_ty.index()] {
+                    &Type::Primitive(..) => panic!("primitive type input to MatchVariant"),
+                    &Type::Enum { ref variants, .. } => variants,
+                };
+                let ty_name = self.type_name(input_ty, /* is_ref = */ true);
+                let variant = &variants[variant.index()];
+                let variantname = &self.typeenv.syms[variant.name.index()];
+                let args = self.match_variant_binders(variant, &arg_tys[..], id, ctx);
+                let args = if args.is_empty() {
+                    "".to_string()
+                } else {
+                    format!("{{ {} }}", args.join(", "))
+                };
+                writeln!(
+                    code,
+                    "{}if let {}::{} {} = {} {{",
+                    indent, ty_name, variantname, args, input
+                )
+                .unwrap();
+                false
+            }
+            &PatternInst::Extract {
+                ref inputs,
+                ref output_tys,
+                term,
+                infallible,
+                ..
+            } => {
+                let termdata = &self.termenv.terms[term.index()];
+                let sig = termdata.extractor_sig(self.typeenv).unwrap();
+
+                let input_values = inputs
+                    .iter()
+                    .map(|input| self.value_by_ref(input, ctx))
+                    .collect::<Vec<_>>();
+                let output_binders = output_tys
+                    .iter()
+                    .enumerate()
+                    .map(|(i, &ty)| {
+                        let output_val = Value::Pattern {
+                            inst: id,
+                            output: i,
+                        };
+                        self.define_val(&output_val, ctx, /* is_ref = */ false, ty);
+                        self.value_binder(&output_val, /* is_ref = */ false, ty)
+                    })
+                    .collect::<Vec<_>>();
+
+                if infallible {
+                    writeln!(
+                        code,
+                        "{indent}let {open_paren}{vars}{close_paren} = {name}(ctx, {args});",
+                        indent = indent,
+                        open_paren = if output_binders.len() == 1 { "" } else { "(" },
+                        vars = output_binders.join(", "),
+                        close_paren = if output_binders.len() == 1 { "" } else { ")" },
+                        name = sig.full_name,
+                        args = input_values.join(", "),
+                    )
+                    .unwrap();
+                    true
+                } else {
+                    writeln!(
+                        code,
+                        "{indent}if let Some({open_paren}{vars}{close_paren}) = {name}(ctx, {args}) {{",
+                        indent = indent,
+                        open_paren = if output_binders.len() == 1 { "" } else { "(" },
+                        vars = output_binders.join(", "),
+                        close_paren = if output_binders.len() == 1 { "" } else { ")" },
+                        name = sig.full_name,
+                        args = input_values.join(", "),
+                    )
+                    .unwrap();
+                    false
+                }
+            }
+            &PatternInst::Expr {
+                ref seq, output_ty, ..
+            } if seq.is_const_int().is_some() => {
+                let (ty, val) = seq.is_const_int().unwrap();
+                assert_eq!(ty, output_ty);
+
+                let output = Value::Pattern {
+                    inst: id,
+                    output: 0,
+                };
+                writeln!(
+                    code,
+                    "{}let {} = {};",
+                    indent,
+                    self.value_name(&output),
+                    self.const_int(val, ty),
+                )
+                .unwrap();
+                self.define_val(&output, ctx, /* is_ref = */ false, ty);
+                true
+            }
+            &PatternInst::Expr {
+                ref seq, output_ty, ..
+            } => {
+                let closure_name = format!("closure{}", id.index());
+                writeln!(code, "{}let {} = || {{", indent, closure_name).unwrap();
+                let subindent = format!("{}    ", indent);
+                let mut subctx = ctx.clone();
+                let mut returns = vec![];
+                for (id, inst) in seq.insts.iter().enumerate() {
+                    let id = InstId(id);
+                    self.generate_expr_inst(code, id, inst, &subindent, &mut subctx, &mut returns);
+                }
+                assert_eq!(returns.len(), 1);
+                writeln!(code, "{}return Some({});", subindent, returns[0].1).unwrap();
+                writeln!(code, "{}}};", indent).unwrap();
+
+                let output = Value::Pattern {
+                    inst: id,
+                    output: 0,
+                };
+                writeln!(
+                    code,
+                    "{}if let Some({}) = {}() {{",
+                    indent,
+                    self.value_binder(&output, /* is_ref = */ false, output_ty),
+                    closure_name
+                )
+                .unwrap();
+                self.define_val(&output, ctx, /* is_ref = */ false, output_ty);
+
+                false
+            }
+        }
+    }
+
+    fn generate_body(
+        &self,
+        code: &mut String,
+        depth: usize,
+        trie: &TrieNode,
+        indent: &str,
+        ctx: &mut BodyContext,
+    ) -> bool {
+        log::trace!("generate_body:\n{}", trie.pretty());
+        let mut returned = false;
+        match trie {
+            &TrieNode::Empty => {}
+
+            &TrieNode::Leaf { ref output, .. } => {
+                writeln!(
+                    code,
+                    "{}// Rule at {}.",
+                    indent,
+                    output.pos.pretty_print_line(&self.typeenv.filenames[..])
+                )
+                .unwrap();
+                // If this is a leaf node, generate the ExprSequence and return.
+                let mut returns = vec![];
+                for (id, inst) in output.insts.iter().enumerate() {
+                    let id = InstId(id);
+                    self.generate_expr_inst(code, id, inst, indent, ctx, &mut returns);
+                }
+
+                assert_eq!(returns.len(), 1);
+                writeln!(code, "{}return Some({});", indent, returns[0].1).unwrap();
+
+                returned = true;
+            }
+
+            &TrieNode::Decision { ref edges } => {
+                let subindent = format!("{}    ", indent);
+                // if this is a decision node, generate each match op
+                // in turn (in priority order). Sort the ops within
+                // each priority, and gather together adjacent
+                // MatchVariant ops with the same input and disjoint
+                // variants in order to create a `match` rather than a
+                // chain of if-lets.
+                let mut edges = edges.clone();
+                edges.sort_by(|e1, e2| {
+                    (-e1.range.min, &e1.symbol).cmp(&(-e2.range.min, &e2.symbol))
+                });
+
+                let mut i = 0;
+                while i < edges.len() {
+                    // Gather adjacent match variants so that we can turn these
+                    // into a `match` rather than a sequence of `if let`s.
+                    let mut last = i;
+                    let mut adjacent_variants = BTreeSet::new();
+                    let mut adjacent_variant_input = None;
+                    log::trace!(
+                        "edge: range = {:?}, symbol = {:?}",
+                        edges[i].range,
+                        edges[i].symbol
+                    );
+                    while last < edges.len() {
+                        match &edges[last].symbol {
+                            &TrieSymbol::Match {
+                                op: PatternInst::MatchVariant { input, variant, .. },
+                            } => {
+                                if adjacent_variant_input.is_none() {
+                                    adjacent_variant_input = Some(input);
+                                }
+                                if adjacent_variant_input == Some(input)
+                                    && !adjacent_variants.contains(&variant)
+                                {
+                                    adjacent_variants.insert(variant);
+                                    last += 1;
+                                } else {
+                                    break;
+                                }
+                            }
+                            _ => {
+                                break;
+                            }
+                        }
+                    }
+
+                    // Now `edges[i..last]` is a run of adjacent `MatchVariants`
+                    // (possibly an empty one). Only use a `match` form if there
+                    // are at least two adjacent options.
+                    if last - i > 1 {
+                        self.generate_body_matches(code, depth, &edges[i..last], indent, ctx);
+                        i = last;
+                        continue;
+                    } else {
+                        let &TrieEdge {
+                            ref symbol,
+                            ref node,
+                            ..
+                        } = &edges[i];
+                        i += 1;
+
+                        match symbol {
+                            &TrieSymbol::EndOfMatch => {
+                                returned = self.generate_body(code, depth + 1, node, indent, ctx);
+                            }
+                            &TrieSymbol::Match { ref op } => {
+                                let id = InstId(depth);
+                                let infallible =
+                                    self.generate_pattern_inst(code, id, op, indent, ctx);
+                                let i = if infallible { indent } else { &subindent[..] };
+                                let sub_returned =
+                                    self.generate_body(code, depth + 1, node, i, ctx);
+                                if !infallible {
+                                    writeln!(code, "{}}}", indent).unwrap();
+                                }
+                                if infallible && sub_returned {
+                                    returned = true;
+                                    break;
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        returned
+    }
+
+    fn generate_body_matches(
+        &self,
+        code: &mut String,
+        depth: usize,
+        edges: &[TrieEdge],
+        indent: &str,
+        ctx: &mut BodyContext,
+    ) {
+        let (input, input_ty) = match &edges[0].symbol {
+            &TrieSymbol::Match {
+                op:
+                    PatternInst::MatchVariant {
+                        input, input_ty, ..
+                    },
+            } => (input, input_ty),
+            _ => unreachable!(),
+        };
+        let (input_ty_sym, variants) = match &self.typeenv.types[input_ty.index()] {
+            &Type::Enum {
+                ref name,
+                ref variants,
+                ..
+            } => (name, variants),
+            _ => unreachable!(),
+        };
+        let input_ty_name = &self.typeenv.syms[input_ty_sym.index()];
+
+        // Emit the `match`.
+        writeln!(
+            code,
+            "{}match {} {{",
+            indent,
+            self.value_by_ref(&input, ctx)
+        )
+        .unwrap();
+
+        // Emit each case.
+        for &TrieEdge {
+            ref symbol,
+            ref node,
+            ..
+        } in edges
+        {
+            let id = InstId(depth);
+            let (variant, arg_tys) = match symbol {
+                &TrieSymbol::Match {
+                    op:
+                        PatternInst::MatchVariant {
+                            variant,
+                            ref arg_tys,
+                            ..
+                        },
+                } => (variant, arg_tys),
+                _ => unreachable!(),
+            };
+
+            let variantinfo = &variants[variant.index()];
+            let variantname = &self.typeenv.syms[variantinfo.name.index()];
+            let fields = self.match_variant_binders(variantinfo, arg_tys, id, ctx);
+            let fields = if fields.is_empty() {
+                "".to_string()
+            } else {
+                format!("{{ {} }}", fields.join(", "))
+            };
+            writeln!(
+                code,
+                "{}    &{}::{} {} => {{",
+                indent, input_ty_name, variantname, fields,
+            )
+            .unwrap();
+            let subindent = format!("{}        ", indent);
+            self.generate_body(code, depth + 1, node, &subindent, ctx);
+            writeln!(code, "{}    }}", indent).unwrap();
+        }
+
+        // Always add a catchall, because we don't do exhaustiveness
+        // checking on the MatcHVariants.
+        writeln!(code, "{}    _ => {{}}", indent).unwrap();
+
+        writeln!(code, "{}}}", indent).unwrap();
+    }
+}
--- a/cranelift/isle/isle/src/compile.rs
+++ b/cranelift/isle/isle/src/compile.rs
@@ -0,0 +1,12 @@
+//! Compilation process, from AST to Sema to Sequences of Insts.
+
+use crate::error::Result;
+use crate::{ast, codegen, sema, trie};
+
+/// Compile the given AST definitions into Rust source code.
+pub fn compile(defs: &ast::Defs) -> Result<String> {
+    let mut typeenv = sema::TypeEnv::from_ast(defs)?;
+    let termenv = sema::TermEnv::from_ast(&mut typeenv, defs)?;
+    let tries = trie::build_tries(&typeenv, &termenv);
+    Ok(codegen::codegen(&typeenv, &termenv, &tries))
+}
--- a/cranelift/isle/isle/src/error.rs
+++ b/cranelift/isle/isle/src/error.rs
@@ -0,0 +1,148 @@
+//! Error types.
+
+use miette::{Diagnostic, SourceCode, SourceSpan};
+use std::sync::Arc;
+
+/// Either `Ok(T)` or `Err(isle::Error)`.
+pub type Result<T> = std::result::Result<T, Error>;
+
+/// Errors produced by ISLE.
+#[derive(thiserror::Error, Diagnostic, Clone, Debug)]
+pub enum Error {
+    /// An I/O error.
+    #[error("{context}")]
+    IoError {
+        /// The underlying I/O error.
+        #[source]
+        error: Arc<std::io::Error>,
+        /// The context explaining what caused the I/O error.
+        context: String,
+    },
+
+    /// The input ISLE source has a parse error.
+    #[error("parse error: {msg}")]
+    #[diagnostic()]
+    ParseError {
+        /// The error message.
+        msg: String,
+
+        /// The input ISLE source.
+        #[source_code]
+        src: Source,
+
+        /// The location of the parse error.
+        #[label("{msg}")]
+        span: SourceSpan,
+    },
+
+    /// The input ISLE source has a type error.
+    #[error("type error: {msg}")]
+    #[diagnostic()]
+    TypeError {
+        /// The error message.
+        msg: String,
+
+        /// The input ISLE source.
+        #[source_code]
+        src: Source,
+
+        /// The location of the type error.
+        #[label("{msg}")]
+        span: SourceSpan,
+    },
+
+    /// Multiple errors.
+    #[error("Found {} errors:\n\n{}",
+            self.unwrap_errors().len(),
+            DisplayErrors(self.unwrap_errors()))]
+    #[diagnostic()]
+    Errors(#[related] Vec<Error>),
+}
+
+impl Error {
+    /// Create a `isle::Error` from the given I/O error and context.
+    pub fn from_io(error: std::io::Error, context: impl Into<String>) -> Self {
+        Error::IoError {
+            error: Arc::new(error),
+            context: context.into(),
+        }
+    }
+}
+
+impl From<Vec<Error>> for Error {
+    fn from(es: Vec<Error>) -> Self {
+        Error::Errors(es)
+    }
+}
+
+impl Error {
+    fn unwrap_errors(&self) -> &[Error] {
+        match self {
+            Error::Errors(e) => e,
+            _ => panic!("`isle::Error::unwrap_errors` on non-`isle::Error::Errors`"),
+        }
+    }
+}
+
+struct DisplayErrors<'a>(&'a [Error]);
+impl std::fmt::Display for DisplayErrors<'_> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        for e in self.0 {
+            writeln!(f, "{}", e)?;
+        }
+        Ok(())
+    }
+}
+
+/// A source file and its contents.
+#[derive(Clone)]
+pub struct Source {
+    name: Arc<str>,
+    text: Arc<str>,
+}
+
+impl std::fmt::Debug for Source {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("Source")
+            .field("name", &self.name)
+            .field("source", &"<redacted>");
+        Ok(())
+    }
+}
+
+impl Source {
+    pub(crate) fn new(name: Arc<str>, text: Arc<str>) -> Self {
+        Self { name, text }
+    }
+
+    /// Get this source's file name.
+    pub fn name(&self) -> &Arc<str> {
+        &self.name
+    }
+
+    /// Get this source's text contents.
+    pub fn text(&self) -> &Arc<str> {
+        &self.name
+    }
+}
+
+impl SourceCode for Source {
+    fn read_span<'a>(
+        &'a self,
+        span: &SourceSpan,
+        context_lines_before: usize,
+        context_lines_after: usize,
+    ) -> std::result::Result<Box<dyn miette::SpanContents<'a> + 'a>, miette::MietteError> {
+        let contents = self
+            .text
+            .read_span(span, context_lines_before, context_lines_after)?;
+        Ok(Box::new(miette::MietteSpanContents::new_named(
+            self.name.to_string(),
+            contents.data(),
+            contents.span().clone(),
+            contents.line(),
+            contents.column(),
+            contents.line_count(),
+        )))
+    }
+}
--- a/cranelift/isle/isle/src/ir.rs
+++ b/cranelift/isle/isle/src/ir.rs
@@ -0,0 +1,685 @@
+//! Lowered matching IR.
+
+use crate::lexer::Pos;
+use crate::sema::*;
+use std::collections::BTreeMap;
+
+declare_id!(
+    /// The id of an instruction in a `PatternSequence`.
+    InstId
+);
+
+/// A value produced by a LHS or RHS instruction.
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub enum Value {
+    /// A value produced by an instruction in the Pattern (LHS).
+    Pattern {
+        /// The instruction that produces this value.
+        inst: InstId,
+        /// This value is the `output`th value produced by this pattern.
+        output: usize,
+    },
+    /// A value produced by an instruction in the Expr (RHS).
+    Expr {
+        /// The instruction that produces this value.
+        inst: InstId,
+        /// This value is the `output`th value produced by this expression.
+        output: usize,
+    },
+}
+
+/// A single Pattern instruction.
+#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
+pub enum PatternInst {
+    /// Match a value as equal to another value. Produces no values.
+    MatchEqual {
+        /// The first value.
+        a: Value,
+        /// The second value.
+        b: Value,
+        /// The type of the values.
+        ty: TypeId,
+    },
+
+    /// Try matching the given value as the given integer. Produces no values.
+    MatchInt {
+        /// The value to match on.
+        input: Value,
+        /// The value's type.
+        ty: TypeId,
+        /// The integer to match against the value.
+        int_val: i64,
+    },
+
+    /// Try matching the given value as the given constant. Produces no values.
+    MatchPrim {
+        /// The value to match on.
+        input: Value,
+        /// The type of the value.
+        ty: TypeId,
+        /// The primitive to match against the value.
+        val: Sym,
+    },
+
+    /// Try matching the given value as the given variant, producing `|arg_tys|`
+    /// values as output.
+    MatchVariant {
+        /// The value to match on.
+        input: Value,
+        /// The type of the value.
+        input_ty: TypeId,
+        /// The types of values produced upon a successful match.
+        arg_tys: Vec<TypeId>,
+        /// The value type's variant that we are matching against.
+        variant: VariantId,
+    },
+
+    /// Evaluate an expression and provide the given value as the result of this
+    /// match instruction. The expression has access to the pattern-values up to
+    /// this point in the sequence.
+    Expr {
+        /// The expression to evaluate.
+        seq: ExprSequence,
+        /// The value produced by the expression.
+        output: Value,
+        /// The type of the output value.
+        output_ty: TypeId,
+    },
+
+    // NB: this has to come second-to-last, because it might be infallible, for
+    // the same reasons that `Arg` has to be last.
+    //
+    /// Invoke an extractor, taking the given values as input (the first is the
+    /// value to extract, the other are the `Input`-polarity extractor args) and
+    /// producing an output value for each `Output`-polarity extractor arg.
+    Extract {
+        /// The value to extract, followed by polarity extractor args.
+        inputs: Vec<Value>,
+        /// The types of the inputs.
+        input_tys: Vec<TypeId>,
+        /// The types of the output values produced upon a successful match.
+        output_tys: Vec<TypeId>,
+        /// This extractor's term.
+        term: TermId,
+        /// Whether this extraction is infallible or not.
+        infallible: bool,
+    },
+
+    // NB: This has to go last, since it is infallible, so that when we sort
+    // edges in the trie, we visit infallible edges after first having tried the
+    // more-specific fallible options.
+    //
+    /// Get the Nth input argument, which corresponds to the Nth field
+    /// of the root term.
+    Arg {
+        /// The index of the argument to get.
+        index: usize,
+        /// The type of the argument.
+        ty: TypeId,
+    },
+}
+
+/// A single Expr instruction.
+#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
+pub enum ExprInst {
+    /// Produce a constant integer.
+    ConstInt {
+        /// This integer type.
+        ty: TypeId,
+        /// The integer value. Must fit within the type.
+        val: i64,
+    },
+
+    /// Produce a constant extern value.
+    ConstPrim {
+        /// The primitive type.
+        ty: TypeId,
+        /// The primitive value.
+        val: Sym,
+    },
+
+    /// Create a variant.
+    CreateVariant {
+        /// The input arguments that will make up this variant's fields.
+        ///
+        /// These must be in the same order as the variant's fields.
+        inputs: Vec<(Value, TypeId)>,
+        /// The enum type.
+        ty: TypeId,
+        /// The variant within the enum that we are contructing.
+        variant: VariantId,
+    },
+
+    /// Invoke a constructor.
+    Construct {
+        /// The arguments to the constructor.
+        inputs: Vec<(Value, TypeId)>,
+        /// The type of the constructor.
+        ty: TypeId,
+        /// The constructor term.
+        term: TermId,
+        /// Whether this constructor is infallible or not.
+        infallible: bool,
+    },
+
+    /// Set the Nth return value. Produces no values.
+    Return {
+        /// The index of the return value to set.
+        index: usize,
+        /// The type of the return value.
+        ty: TypeId,
+        /// The value to set as the `index`th return value.
+        value: Value,
+    },
+}
+
+impl ExprInst {
+    /// Invoke `f` for each value in this expression.
+    pub fn visit_values<F: FnMut(Value)>(&self, mut f: F) {
+        match self {
+            &ExprInst::ConstInt { .. } => {}
+            &ExprInst::ConstPrim { .. } => {}
+            &ExprInst::Construct { ref inputs, .. }
+            | &ExprInst::CreateVariant { ref inputs, .. } => {
+                for (input, _ty) in inputs {
+                    f(*input);
+                }
+            }
+            &ExprInst::Return { value, .. } => {
+                f(value);
+            }
+        }
+    }
+}
+
+/// A linear sequence of instructions that match on and destructure an
+/// argument. A pattern is fallible (may not match). If it does not fail, its
+/// result consists of the values produced by the `PatternInst`s, which may be
+/// used by a subsequent `Expr`.
+#[derive(Clone, Debug, PartialEq, Eq, Hash, Default)]
+pub struct PatternSequence {
+    /// Instruction sequence for pattern.
+    ///
+    /// `InstId` indexes into this sequence for `Value::Pattern` values.
+    pub insts: Vec<PatternInst>,
+}
+
+/// A linear sequence of instructions that produce a new value from the
+/// right-hand side of a rule, given bindings that come from a `Pattern` derived
+/// from the left-hand side.
+#[derive(Clone, Debug, PartialEq, Eq, Hash, Default, PartialOrd, Ord)]
+pub struct ExprSequence {
+    /// Instruction sequence for expression.
+    ///
+    /// `InstId` indexes into this sequence for `Value::Expr` values.
+    pub insts: Vec<ExprInst>,
+    /// Position at which the rule producing this sequence was located.
+    pub pos: Pos,
+}
+
+impl ExprSequence {
+    /// Is this expression sequence producing a constant integer?
+    ///
+    /// If so, return the integer type and the constant.
+    pub fn is_const_int(&self) -> Option<(TypeId, i64)> {
+        if self.insts.len() == 2 && matches!(&self.insts[1], &ExprInst::Return { .. }) {
+            match &self.insts[0] {
+                &ExprInst::ConstInt { ty, val } => Some((ty, val)),
+                _ => None,
+            }
+        } else {
+            None
+        }
+    }
+}
+
+#[derive(Clone, Copy, Debug)]
+enum ValueOrArgs {
+    Value(Value),
+    ImplicitTermFromArgs(TermId),
+}
+
+impl ValueOrArgs {
+    fn to_value(&self) -> Option<Value> {
+        match self {
+            &ValueOrArgs::Value(v) => Some(v),
+            _ => None,
+        }
+    }
+}
+
+impl PatternSequence {
+    fn add_inst(&mut self, inst: PatternInst) -> InstId {
+        let id = InstId(self.insts.len());
+        self.insts.push(inst);
+        id
+    }
+
+    fn add_arg(&mut self, index: usize, ty: TypeId) -> Value {
+        let inst = InstId(self.insts.len());
+        self.add_inst(PatternInst::Arg { index, ty });
+        Value::Pattern { inst, output: 0 }
+    }
+
+    fn add_match_equal(&mut self, a: Value, b: Value, ty: TypeId) {
+        self.add_inst(PatternInst::MatchEqual { a, b, ty });
+    }
+
+    fn add_match_int(&mut self, input: Value, ty: TypeId, int_val: i64) {
+        self.add_inst(PatternInst::MatchInt { input, ty, int_val });
+    }
+
+    fn add_match_prim(&mut self, input: Value, ty: TypeId, val: Sym) {
+        self.add_inst(PatternInst::MatchPrim { input, ty, val });
+    }
+
+    fn add_match_variant(
+        &mut self,
+        input: Value,
+        input_ty: TypeId,
+        arg_tys: &[TypeId],
+        variant: VariantId,
+    ) -> Vec<Value> {
+        let inst = InstId(self.insts.len());
+        let mut outs = vec![];
+        for (i, _arg_ty) in arg_tys.iter().enumerate() {
+            let val = Value::Pattern { inst, output: i };
+            outs.push(val);
+        }
+        let arg_tys = arg_tys.iter().cloned().collect();
+        self.add_inst(PatternInst::MatchVariant {
+            input,
+            input_ty,
+            arg_tys,
+            variant,
+        });
+        outs
+    }
+
+    fn add_extract(
+        &mut self,
+        inputs: Vec<Value>,
+        input_tys: Vec<TypeId>,
+        output_tys: Vec<TypeId>,
+        term: TermId,
+        infallible: bool,
+    ) -> Vec<Value> {
+        let inst = InstId(self.insts.len());
+        let mut outs = vec![];
+        for i in 0..output_tys.len() {
+            let val = Value::Pattern { inst, output: i };
+            outs.push(val);
+        }
+        let output_tys = output_tys.iter().cloned().collect();
+        self.add_inst(PatternInst::Extract {
+            inputs,
+            input_tys,
+            output_tys,
+            term,
+            infallible,
+        });
+        outs
+    }
+
+    fn add_expr_seq(&mut self, seq: ExprSequence, output: Value, output_ty: TypeId) -> Value {
+        let inst = self.add_inst(PatternInst::Expr {
+            seq,
+            output,
+            output_ty,
+        });
+
+        // Create values for all outputs.
+        Value::Pattern { inst, output: 0 }
+    }
+
+    /// Generate PatternInsts to match the given (sub)pattern. Works
+    /// recursively down the AST.
+    fn gen_pattern(
+        &mut self,
+        input: ValueOrArgs,
+        typeenv: &TypeEnv,
+        termenv: &TermEnv,
+        pat: &Pattern,
+        vars: &mut BTreeMap<VarId, Value>,
+    ) {
+        match pat {
+            &Pattern::BindPattern(_ty, var, ref subpat) => {
+                // Bind the appropriate variable and recurse.
+                assert!(!vars.contains_key(&var));
+                if let Some(v) = input.to_value() {
+                    vars.insert(var, v);
+                }
+                let root_term = self.gen_pattern(input, typeenv, termenv, &*subpat, vars);
+                root_term
+            }
+            &Pattern::Var(ty, var) => {
+                // Assert that the value matches the existing bound var.
+                let var_val = vars
+                    .get(&var)
+                    .cloned()
+                    .expect("Variable should already be bound");
+                let input_val = input
+                    .to_value()
+                    .expect("Cannot match an =var pattern against root term");
+                self.add_match_equal(input_val, var_val, ty);
+            }
+            &Pattern::ConstInt(ty, value) => {
+                // Assert that the value matches the constant integer.
+                let input_val = input
+                    .to_value()
+                    .expect("Cannot match an integer pattern against root term");
+                self.add_match_int(input_val, ty, value);
+            }
+            &Pattern::ConstPrim(ty, value) => {
+                let input_val = input
+                    .to_value()
+                    .expect("Cannot match a constant-primitive pattern against root term");
+                self.add_match_prim(input_val, ty, value);
+            }
+            &Pattern::Term(ty, term, ref args) => {
+                match input {
+                    ValueOrArgs::ImplicitTermFromArgs(termid) => {
+                        assert_eq!(
+                            termid, term,
+                            "Cannot match a different term against root pattern"
+                        );
+                        let termdata = &termenv.terms[term.index()];
+                        let arg_tys = &termdata.arg_tys[..];
+                        for (i, subpat) in args.iter().enumerate() {
+                            let value = self.add_arg(i, arg_tys[i]);
+                            let subpat = match subpat {
+                                &TermArgPattern::Expr(..) => {
+                                    panic!("Should have been caught in typechecking")
+                                }
+                                &TermArgPattern::Pattern(ref pat) => pat,
+                            };
+                            self.gen_pattern(
+                                ValueOrArgs::Value(value),
+                                typeenv,
+                                termenv,
+                                subpat,
+                                vars,
+                            );
+                        }
+                    }
+                    ValueOrArgs::Value(input) => {
+                        // Determine whether the term has an external extractor or not.
+                        let termdata = &termenv.terms[term.index()];
+                        let arg_tys = &termdata.arg_tys[..];
+                        match &termdata.kind {
+                            TermKind::EnumVariant { variant } => {
+                                let arg_values =
+                                    self.add_match_variant(input, ty, arg_tys, *variant);
+                                for (subpat, value) in args.iter().zip(arg_values.into_iter()) {
+                                    let subpat = match subpat {
+                                        &TermArgPattern::Pattern(ref pat) => pat,
+                                        _ => unreachable!("Should have been caught by sema"),
+                                    };
+                                    self.gen_pattern(
+                                        ValueOrArgs::Value(value),
+                                        typeenv,
+                                        termenv,
+                                        subpat,
+                                        vars,
+                                    );
+                                }
+                            }
+                            TermKind::Decl {
+                                extractor_kind: None,
+                                ..
+                            } => {
+                                panic!("Pattern invocation of undefined term body")
+                            }
+                            TermKind::Decl {
+                                extractor_kind: Some(ExtractorKind::InternalExtractor { .. }),
+                                ..
+                            } => {
+                                panic!("Should have been expanded away")
+                            }
+                            TermKind::Decl {
+                                extractor_kind:
+                                    Some(ExtractorKind::ExternalExtractor {
+                                        ref arg_polarity,
+                                        infallible,
+                                        ..
+                                    }),
+                                ..
+                            } => {
+                                // Evaluate all `input` args.
+                                let mut inputs = vec![];
+                                let mut input_tys = vec![];
+                                let mut output_tys = vec![];
+                                let mut output_pats = vec![];
+                                inputs.push(input);
+                                input_tys.push(termdata.ret_ty);
+                                for (arg, pol) in args.iter().zip(arg_polarity.iter()) {
+                                    match pol {
+                                        &ArgPolarity::Input => {
+                                            let expr = match arg {
+                                                &TermArgPattern::Expr(ref expr) => expr,
+                                                _ => panic!(
+                                                    "Should have been caught by typechecking"
+                                                ),
+                                            };
+                                            let mut seq = ExprSequence::default();
+                                            let value = seq.gen_expr(typeenv, termenv, expr, vars);
+                                            seq.add_return(expr.ty(), value);
+                                            let value = self.add_expr_seq(seq, value, expr.ty());
+                                            inputs.push(value);
+                                            input_tys.push(expr.ty());
+                                        }
+                                        &ArgPolarity::Output => {
+                                            let pat = match arg {
+                                                &TermArgPattern::Pattern(ref pat) => pat,
+                                                _ => panic!(
+                                                    "Should have been caught by typechecking"
+                                                ),
+                                            };
+                                            output_tys.push(pat.ty());
+                                            output_pats.push(pat);
+                                        }
+                                    }
+                                }
+
+                                // Invoke the extractor.
+                                let arg_values = self.add_extract(
+                                    inputs,
+                                    input_tys,
+                                    output_tys,
+                                    term,
+                                    *infallible,
+                                );
+
+                                for (pat, &val) in output_pats.iter().zip(arg_values.iter()) {
+                                    self.gen_pattern(
+                                        ValueOrArgs::Value(val),
+                                        typeenv,
+                                        termenv,
+                                        pat,
+                                        vars,
+                                    );
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+            &Pattern::And(_ty, ref children) => {
+                for child in children {
+                    self.gen_pattern(input, typeenv, termenv, child, vars);
+                }
+            }
+            &Pattern::Wildcard(_ty) => {
+                // Nothing!
+            }
+        }
+    }
+}
+
+impl ExprSequence {
+    fn add_inst(&mut self, inst: ExprInst) -> InstId {
+        let id = InstId(self.insts.len());
+        self.insts.push(inst);
+        id
+    }
+
+    fn add_const_int(&mut self, ty: TypeId, val: i64) -> Value {
+        let inst = InstId(self.insts.len());
+        self.add_inst(ExprInst::ConstInt { ty, val });
+        Value::Expr { inst, output: 0 }
+    }
+
+    fn add_const_prim(&mut self, ty: TypeId, val: Sym) -> Value {
+        let inst = InstId(self.insts.len());
+        self.add_inst(ExprInst::ConstPrim { ty, val });
+        Value::Expr { inst, output: 0 }
+    }
+
+    fn add_create_variant(
+        &mut self,
+        inputs: &[(Value, TypeId)],
+        ty: TypeId,
+        variant: VariantId,
+    ) -> Value {
+        let inst = InstId(self.insts.len());
+        let inputs = inputs.iter().cloned().collect();
+        self.add_inst(ExprInst::CreateVariant {
+            inputs,
+            ty,
+            variant,
+        });
+        Value::Expr { inst, output: 0 }
+    }
+
+    fn add_construct(
+        &mut self,
+        inputs: &[(Value, TypeId)],
+        ty: TypeId,
+        term: TermId,
+        infallible: bool,
+    ) -> Value {
+        let inst = InstId(self.insts.len());
+        let inputs = inputs.iter().cloned().collect();
+        self.add_inst(ExprInst::Construct {
+            inputs,
+            ty,
+            term,
+            infallible,
+        });
+        Value::Expr { inst, output: 0 }
+    }
+
+    fn add_return(&mut self, ty: TypeId, value: Value) {
+        self.add_inst(ExprInst::Return {
+            index: 0,
+            ty,
+            value,
+        });
+    }
+
+    /// Creates a sequence of ExprInsts to generate the given
+    /// expression value. Returns the value ID as well as the root
+    /// term ID, if any.
+    fn gen_expr(
+        &mut self,
+        typeenv: &TypeEnv,
+        termenv: &TermEnv,
+        expr: &Expr,
+        vars: &BTreeMap<VarId, Value>,
+    ) -> Value {
+        log::trace!("gen_expr: expr {:?}", expr);
+        match expr {
+            &Expr::ConstInt(ty, val) => self.add_const_int(ty, val),
+            &Expr::ConstPrim(ty, val) => self.add_const_prim(ty, val),
+            &Expr::Let {
+                ty: _ty,
+                ref bindings,
+                ref body,
+            } => {
+                let mut vars = vars.clone();
+                for &(var, _var_ty, ref var_expr) in bindings {
+                    let var_value = self.gen_expr(typeenv, termenv, &*var_expr, &vars);
+                    vars.insert(var, var_value);
+                }
+                self.gen_expr(typeenv, termenv, body, &vars)
+            }
+            &Expr::Var(_ty, var_id) => vars.get(&var_id).cloned().unwrap(),
+            &Expr::Term(ty, term, ref arg_exprs) => {
+                let termdata = &termenv.terms[term.index()];
+                let mut arg_values_tys = vec![];
+                for (arg_ty, arg_expr) in termdata.arg_tys.iter().cloned().zip(arg_exprs.iter()) {
+                    arg_values_tys
+                        .push((self.gen_expr(typeenv, termenv, &*arg_expr, &vars), arg_ty));
+                }
+                match &termdata.kind {
+                    TermKind::EnumVariant { variant } => {
+                        self.add_create_variant(&arg_values_tys[..], ty, *variant)
+                    }
+                    TermKind::Decl {
+                        constructor_kind: Some(ConstructorKind::InternalConstructor),
+                        ..
+                    } => {
+                        self.add_construct(
+                            &arg_values_tys[..],
+                            ty,
+                            term,
+                            /* infallible = */ false,
+                        )
+                    }
+                    TermKind::Decl {
+                        constructor_kind: Some(ConstructorKind::ExternalConstructor { .. }),
+                        ..
+                    } => {
+                        self.add_construct(
+                            &arg_values_tys[..],
+                            ty,
+                            term,
+                            /* infallible = */ true,
+                        )
+                    }
+                    TermKind::Decl {
+                        constructor_kind: None,
+                        ..
+                    } => panic!("Should have been caught by typechecking"),
+                }
+            }
+        }
+    }
+}
+
+/// Build a sequence from a rule.
+pub fn lower_rule(
+    tyenv: &TypeEnv,
+    termenv: &TermEnv,
+    rule: RuleId,
+) -> (PatternSequence, ExprSequence) {
+    let mut pattern_seq: PatternSequence = Default::default();
+    let mut expr_seq: ExprSequence = Default::default();
+    expr_seq.pos = termenv.rules[rule.index()].pos;
+
+    let ruledata = &termenv.rules[rule.index()];
+    let mut vars = BTreeMap::new();
+    let root_term = ruledata
+        .lhs
+        .root_term()
+        .expect("Pattern must have a term at the root");
+
+    log::trace!("lower_rule: ruledata {:?}", ruledata,);
+
+    // Lower the pattern, starting from the root input value.
+    pattern_seq.gen_pattern(
+        ValueOrArgs::ImplicitTermFromArgs(root_term),
+        tyenv,
+        termenv,
+        &ruledata.lhs,
+        &mut vars,
+    );
+
+    // Lower the expression, making use of the bound variables
+    // from the pattern.
+    let rhs_root_val = expr_seq.gen_expr(tyenv, termenv, &ruledata.rhs, &vars);
+    // Return the root RHS value.
+    let output_ty = ruledata.rhs.ty();
+    expr_seq.add_return(output_ty, rhs_root_val);
+    (pattern_seq, expr_seq)
+}
--- a/cranelift/isle/isle/src/lexer.rs
+++ b/cranelift/isle/isle/src/lexer.rs
@@ -0,0 +1,405 @@
+//! Lexer for the ISLE language.
+
+use crate::error::{Error, Result, Source};
+use std::borrow::Cow;
+use std::path::Path;
+use std::sync::Arc;
+
+/// The lexer.
+///
+/// Breaks source text up into a sequence of tokens (with source positions).
+#[derive(Clone, Debug)]
+pub struct Lexer<'a> {
+    /// Arena of filenames from the input source.
+    ///
+    /// Indexed via `Pos::file`.
+    pub filenames: Vec<Arc<str>>,
+
+    /// Arena of file source texts.
+    ///
+    /// Indexed via `Pos::file`.
+    pub file_texts: Vec<Arc<str>>,
+
+    file_starts: Vec<usize>,
+    buf: Cow<'a, [u8]>,
+    pos: Pos,
+    lookahead: Option<(Pos, Token)>,
+}
+
+/// A source position.
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Default, Hash, PartialOrd, Ord)]
+pub struct Pos {
+    /// This source position's file.
+    ///
+    /// Indexes into `Lexer::filenames` early in the compiler pipeline, and
+    /// later into `TypeEnv::filenames` once we get into semantic analysis.
+    pub file: usize,
+    /// This source position's byte offset in the file.
+    pub offset: usize,
+    /// This source position's line number in the file.
+    pub line: usize,
+    /// This source position's column number in the file.
+    pub col: usize,
+}
+
+impl Pos {
+    /// Print this source position as `file.isle:12:34`.
+    pub fn pretty_print(&self, filenames: &[Arc<str>]) -> String {
+        format!("{}:{}:{}", filenames[self.file], self.line, self.col)
+    }
+    /// Print this source position as `file.isle line 12`.
+    pub fn pretty_print_line(&self, filenames: &[Arc<str>]) -> String {
+        format!("{} line {}", filenames[self.file], self.line)
+    }
+}
+
+/// A token of ISLE source.
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub enum Token {
+    /// Left paren.
+    LParen,
+    /// Right paren.
+    RParen,
+    /// A symbol, e.g. `Foo`.
+    Symbol(String),
+    /// An integer.
+    Int(i64),
+    /// `@`
+    At,
+    /// `<`
+    Lt,
+}
+
+impl<'a> Lexer<'a> {
+    /// Create a new lexer for the given source contents and filename.
+    pub fn from_str(s: &'a str, filename: &'a str) -> Result<Lexer<'a>> {
+        let mut l = Lexer {
+            filenames: vec![filename.into()],
+            file_texts: vec![s.into()],
+            file_starts: vec![0],
+            buf: Cow::Borrowed(s.as_bytes()),
+            pos: Pos {
+                file: 0,
+                offset: 0,
+                line: 1,
+                col: 0,
+            },
+            lookahead: None,
+        };
+        l.reload()?;
+        Ok(l)
+    }
+
+    /// Create a new lexer from the given files.
+    pub fn from_files<P>(file_paths: impl IntoIterator<Item = P>) -> Result<Lexer<'a>>
+    where
+        P: AsRef<Path>,
+    {
+        let mut filenames = Vec::<Arc<str>>::new();
+        let mut file_texts = Vec::<Arc<str>>::new();
+
+        for f in file_paths {
+            let f = f.as_ref();
+
+            filenames.push(f.display().to_string().into());
+
+            let s = std::fs::read_to_string(f)
+                .map_err(|e| Error::from_io(e, format!("failed to read file: {}", f.display())))?;
+            file_texts.push(s.into());
+        }
+
+        assert!(!filenames.is_empty());
+
+        let mut file_starts = vec![];
+        let mut buf = String::new();
+        for text in &file_texts {
+            file_starts.push(buf.len());
+            buf += &text;
+            buf += "\n";
+        }
+
+        let mut l = Lexer {
+            filenames,
+            file_texts,
+            buf: Cow::Owned(buf.into_bytes()),
+            file_starts,
+            pos: Pos {
+                file: 0,
+                offset: 0,
+                line: 1,
+                col: 0,
+            },
+            lookahead: None,
+        };
+        l.reload()?;
+        Ok(l)
+    }
+
+    /// Get the lexer's current source position.
+    pub fn pos(&self) -> Pos {
+        Pos {
+            file: self.pos.file,
+            offset: self.pos.offset - self.file_starts[self.pos.file],
+            line: self.pos.line,
+            col: self.pos.file,
+        }
+    }
+
+    fn advance_pos(&mut self) {
+        self.pos.col += 1;
+        if self.buf[self.pos.offset] == b'\n' {
+            self.pos.line += 1;
+            self.pos.col = 0;
+        }
+        self.pos.offset += 1;
+        if self.pos.file + 1 < self.file_starts.len() {
+            let next_start = self.file_starts[self.pos.file + 1];
+            if self.pos.offset >= next_start {
+                assert!(self.pos.offset == next_start);
+                self.pos.file += 1;
+                self.pos.line = 1;
+            }
+        }
+    }
+
+    fn error(&self, pos: Pos, msg: impl Into<String>) -> Error {
+        Error::ParseError {
+            msg: msg.into(),
+            src: Source::new(
+                self.filenames[pos.file].clone(),
+                self.file_texts[pos.file].clone(),
+            ),
+            span: miette::SourceSpan::from((self.pos().offset, 1)),
+        }
+    }
+
+    fn next_token(&mut self) -> Result<Option<(Pos, Token)>> {
+        fn is_sym_first_char(c: u8) -> bool {
+            match c {
+                b'-' | b'0'..=b'9' | b'(' | b')' | b';' => false,
+                c if c.is_ascii_whitespace() => false,
+                _ => true,
+            }
+        }
+        fn is_sym_other_char(c: u8) -> bool {
+            match c {
+                b'(' | b')' | b';' | b'@' | b'<' => false,
+                c if c.is_ascii_whitespace() => false,
+                _ => true,
+            }
+        }
+
+        // Skip any whitespace and any comments.
+        while self.pos.offset < self.buf.len() {
+            if self.buf[self.pos.offset].is_ascii_whitespace() {
+                self.advance_pos();
+                continue;
+            }
+            if self.buf[self.pos.offset] == b';' {
+                while self.pos.offset < self.buf.len() && self.buf[self.pos.offset] != b'\n' {
+                    self.advance_pos();
+                }
+                continue;
+            }
+            break;
+        }
+
+        if self.pos.offset == self.buf.len() {
+            return Ok(None);
+        }
+
+        let char_pos = self.pos();
+        match self.buf[self.pos.offset] {
+            b'(' => {
+                self.advance_pos();
+                Ok(Some((char_pos, Token::LParen)))
+            }
+            b')' => {
+                self.advance_pos();
+                Ok(Some((char_pos, Token::RParen)))
+            }
+            b'@' => {
+                self.advance_pos();
+                Ok(Some((char_pos, Token::At)))
+            }
+            b'<' => {
+                self.advance_pos();
+                Ok(Some((char_pos, Token::Lt)))
+            }
+            c if is_sym_first_char(c) => {
+                let start = self.pos.offset;
+                let start_pos = self.pos();
+                while self.pos.offset < self.buf.len()
+                    && is_sym_other_char(self.buf[self.pos.offset])
+                {
+                    self.advance_pos();
+                }
+                let end = self.pos.offset;
+                let s = std::str::from_utf8(&self.buf[start..end])
+                    .expect("Only ASCII characters, should be UTF-8");
+                debug_assert!(!s.is_empty());
+                Ok(Some((start_pos, Token::Symbol(s.to_string()))))
+            }
+            c if (c >= b'0' && c <= b'9') || c == b'-' => {
+                let start_pos = self.pos();
+                let neg = if c == b'-' {
+                    self.advance_pos();
+                    true
+                } else {
+                    false
+                };
+
+                let mut radix = 10;
+
+                // Check for hex literals.
+                if self.buf.get(self.pos.offset).copied() == Some(b'0')
+                    && self.buf.get(self.pos.offset + 1).copied() == Some(b'x')
+                {
+                    self.advance_pos();
+                    self.advance_pos();
+                    radix = 16;
+                }
+
+                // Find the range in the buffer for this integer literal. We'll
+                // pass this range to `i64::from_str_radix` to do the actual
+                // string-to-integer conversion.
+                let start_offset = self.pos.offset;
+                while self.pos.offset < self.buf.len()
+                    && ((radix == 10
+                        && self.buf[self.pos.offset] >= b'0'
+                        && self.buf[self.pos.offset] <= b'9')
+                        || (radix == 16
+                            && ((self.buf[self.pos.offset] >= b'0'
+                                && self.buf[self.pos.offset] <= b'9')
+                                || (self.buf[self.pos.offset] >= b'a'
+                                    && self.buf[self.pos.offset] <= b'f')
+                                || (self.buf[self.pos.offset] >= b'A'
+                                    && self.buf[self.pos.offset] <= b'F'))))
+                {
+                    self.advance_pos();
+                }
+                let end_offset = self.pos.offset;
+
+                let num = i64::from_str_radix(
+                    std::str::from_utf8(&self.buf[start_offset..end_offset]).unwrap(),
+                    radix,
+                )
+                .map_err(|e| self.error(start_pos, e.to_string()))?;
+
+                let tok = if neg {
+                    Token::Int(num.checked_neg().ok_or_else(|| {
+                        self.error(start_pos, "integer literal cannot fit in i64")
+                    })?)
+                } else {
+                    Token::Int(num)
+                };
+                Ok(Some((start_pos, tok)))
+            }
+            c => panic!("Unexpected character '{}' at offset {}", c, self.pos.offset),
+        }
+    }
+
+    /// Get the next token from this lexer's token stream, if any.
+    pub fn next(&mut self) -> Result<Option<(Pos, Token)>> {
+        let tok = self.lookahead.take();
+        self.reload()?;
+        Ok(tok)
+    }
+
+    fn reload(&mut self) -> Result<()> {
+        if self.lookahead.is_none() && self.pos.offset < self.buf.len() {
+            self.lookahead = self.next_token()?;
+        }
+        Ok(())
+    }
+
+    /// Peek ahead at the next token.
+    pub fn peek(&self) -> Option<&(Pos, Token)> {
+        self.lookahead.as_ref()
+    }
+
+    /// Are we at the end of the source input?
+    pub fn eof(&self) -> bool {
+        self.lookahead.is_none()
+    }
+}
+
+impl Token {
+    /// Is this an `Int` token?
+    pub fn is_int(&self) -> bool {
+        match self {
+            Token::Int(_) => true,
+            _ => false,
+        }
+    }
+
+    /// Is this a `Sym` token?
+    pub fn is_sym(&self) -> bool {
+        match self {
+            Token::Symbol(_) => true,
+            _ => false,
+        }
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+
+    fn lex(s: &str, file: &str) -> Vec<Token> {
+        let mut toks = vec![];
+        let mut lexer = Lexer::from_str(s, file).unwrap();
+        while let Some((_, tok)) = lexer.next().unwrap() {
+            toks.push(tok);
+        }
+        toks
+    }
+
+    #[test]
+    fn lexer_basic() {
+        assert_eq!(
+            lex(
+                ";; comment\n; another\r\n   \t(one two three 23 -568  )\n",
+                "lexer_basic"
+            ),
+            vec![
+                Token::LParen,
+                Token::Symbol("one".to_string()),
+                Token::Symbol("two".to_string()),
+                Token::Symbol("three".to_string()),
+                Token::Int(23),
+                Token::Int(-568),
+                Token::RParen
+            ]
+        );
+    }
+
+    #[test]
+    fn ends_with_sym() {
+        assert_eq!(
+            lex("asdf", "ends_with_sym"),
+            vec![Token::Symbol("asdf".to_string()),]
+        );
+    }
+
+    #[test]
+    fn ends_with_num() {
+        assert_eq!(lex("23", "ends_with_num"), vec![Token::Int(23)],);
+    }
+
+    #[test]
+    fn weird_syms() {
+        assert_eq!(
+            lex("(+ [] => !! _test!;comment\n)", "weird_syms"),
+            vec![
+                Token::LParen,
+                Token::Symbol("+".to_string()),
+                Token::Symbol("[]".to_string()),
+                Token::Symbol("=>".to_string()),
+                Token::Symbol("!!".to_string()),
+                Token::Symbol("_test!".to_string()),
+                Token::RParen,
+            ]
+        );
+    }
+}
--- a/cranelift/isle/isle/src/lib.rs
+++ b/cranelift/isle/isle/src/lib.rs
@@ -0,0 +1,29 @@
+#![doc = include_str!("../README.md")]
+#![deny(missing_docs)]
+
+macro_rules! declare_id {
+    (
+        $(#[$attr:meta])*
+            $name:ident
+    ) => {
+        $(#[$attr])*
+            #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+        pub struct $name(pub usize);
+        impl $name {
+            /// Get the index of this id.
+            pub fn index(self) -> usize {
+                self.0
+            }
+        }
+    };
+}
+
+pub mod ast;
+pub mod codegen;
+pub mod compile;
+pub mod error;
+pub mod ir;
+pub mod lexer;
+pub mod parser;
+pub mod sema;
+pub mod trie;
--- a/cranelift/isle/isle/src/parser.rs
+++ b/cranelift/isle/isle/src/parser.rs
@@ -0,0 +1,504 @@
+//! Parser for ISLE language.
+
+use crate::ast::*;
+use crate::error::*;
+use crate::lexer::{Lexer, Pos, Token};
+
+/// Parse the top-level ISLE definitions and return their AST.
+pub fn parse(lexer: Lexer) -> Result<Defs> {
+    let parser = Parser::new(lexer);
+    parser.parse_defs()
+}
+
+/// The ISLE parser.
+///
+/// Takes in a lexer and creates an AST.
+#[derive(Clone, Debug)]
+struct Parser<'a> {
+    lexer: Lexer<'a>,
+}
+
+impl<'a> Parser<'a> {
+    /// Construct a new parser from the given lexer.
+    pub fn new(lexer: Lexer<'a>) -> Parser<'a> {
+        Parser { lexer }
+    }
+
+    fn error(&self, pos: Pos, msg: String) -> Error {
+        Error::ParseError {
+            msg,
+            src: Source::new(
+                self.lexer.filenames[pos.file].clone(),
+                self.lexer.file_texts[pos.file].clone(),
+            ),
+            span: miette::SourceSpan::from((pos.offset, 1)),
+        }
+    }
+
+    fn take<F: Fn(&Token) -> bool>(&mut self, f: F) -> Result<Token> {
+        if let Some(&(pos, ref peek)) = self.lexer.peek() {
+            if !f(peek) {
+                return Err(self.error(pos, format!("Unexpected token {:?}", peek)));
+            }
+            Ok(self.lexer.next()?.unwrap().1)
+        } else {
+            Err(self.error(self.lexer.pos(), "Unexpected EOF".to_string()))
+        }
+    }
+
+    fn is<F: Fn(&Token) -> bool>(&self, f: F) -> bool {
+        if let Some(&(_, ref peek)) = self.lexer.peek() {
+            f(peek)
+        } else {
+            false
+        }
+    }
+
+    fn pos(&self) -> Pos {
+        self.lexer
+            .peek()
+            .map_or_else(|| self.lexer.pos(), |(pos, _)| *pos)
+    }
+
+    fn is_lparen(&self) -> bool {
+        self.is(|tok| *tok == Token::LParen)
+    }
+    fn is_rparen(&self) -> bool {
+        self.is(|tok| *tok == Token::RParen)
+    }
+    fn is_at(&self) -> bool {
+        self.is(|tok| *tok == Token::At)
+    }
+    fn is_lt(&self) -> bool {
+        self.is(|tok| *tok == Token::Lt)
+    }
+    fn is_sym(&self) -> bool {
+        self.is(|tok| tok.is_sym())
+    }
+    fn is_int(&self) -> bool {
+        self.is(|tok| tok.is_int())
+    }
+    fn is_sym_str(&self, s: &str) -> bool {
+        self.is(|tok| match tok {
+            &Token::Symbol(ref tok_s) if tok_s == s => true,
+            _ => false,
+        })
+    }
+
+    fn is_const(&self) -> bool {
+        self.is(|tok| match tok {
+            &Token::Symbol(ref tok_s) if tok_s.starts_with("$") => true,
+            _ => false,
+        })
+    }
+
+    fn lparen(&mut self) -> Result<()> {
+        self.take(|tok| *tok == Token::LParen).map(|_| ())
+    }
+    fn rparen(&mut self) -> Result<()> {
+        self.take(|tok| *tok == Token::RParen).map(|_| ())
+    }
+    fn at(&mut self) -> Result<()> {
+        self.take(|tok| *tok == Token::At).map(|_| ())
+    }
+    fn lt(&mut self) -> Result<()> {
+        self.take(|tok| *tok == Token::Lt).map(|_| ())
+    }
+
+    fn symbol(&mut self) -> Result<String> {
+        match self.take(|tok| tok.is_sym())? {
+            Token::Symbol(s) => Ok(s),
+            _ => unreachable!(),
+        }
+    }
+
+    fn int(&mut self) -> Result<i64> {
+        match self.take(|tok| tok.is_int())? {
+            Token::Int(i) => Ok(i),
+            _ => unreachable!(),
+        }
+    }
+
+    fn parse_defs(mut self) -> Result<Defs> {
+        let mut defs = vec![];
+        while !self.lexer.eof() {
+            defs.push(self.parse_def()?);
+        }
+        Ok(Defs {
+            defs,
+            filenames: self.lexer.filenames,
+            file_texts: self.lexer.file_texts,
+        })
+    }
+
+    fn parse_def(&mut self) -> Result<Def> {
+        self.lparen()?;
+        let pos = self.pos();
+        let def = match &self.symbol()?[..] {
+            "type" => Def::Type(self.parse_type()?),
+            "decl" => Def::Decl(self.parse_decl()?),
+            "rule" => Def::Rule(self.parse_rule()?),
+            "extractor" => Def::Extractor(self.parse_etor()?),
+            "extern" => Def::Extern(self.parse_extern()?),
+            s => {
+                return Err(self.error(pos, format!("Unexpected identifier: {}", s)));
+            }
+        };
+        self.rparen()?;
+        Ok(def)
+    }
+
+    fn str_to_ident(&self, pos: Pos, s: &str) -> Result<Ident> {
+        let first = s
+            .chars()
+            .next()
+            .ok_or_else(|| self.error(pos, "empty symbol".into()))?;
+        if !first.is_alphabetic() && first != '_' && first != '$' {
+            return Err(self.error(
+                pos,
+                format!("Identifier '{}' does not start with letter or _ or $", s),
+            ));
+        }
+        if s.chars()
+            .skip(1)
+            .any(|c| !c.is_alphanumeric() && c != '_' && c != '.' && c != '$')
+        {
+            return Err(self.error(
+                pos,
+                format!(
+                    "Identifier '{}' contains invalid character (not a-z, A-Z, 0-9, _, ., $)",
+                    s
+                ),
+            ));
+        }
+        Ok(Ident(s.to_string(), pos))
+    }
+
+    fn parse_ident(&mut self) -> Result<Ident> {
+        let pos = self.pos();
+        let s = self.symbol()?;
+        self.str_to_ident(pos, &s)
+    }
+
+    fn parse_const(&mut self) -> Result<Ident> {
+        let pos = self.pos();
+        let ident = self.parse_ident()?;
+        if ident.0.starts_with("$") {
+            let s = &ident.0[1..];
+            Ok(Ident(s.to_string(), ident.1))
+        } else {
+            Err(self.error(
+                pos,
+                "Not a constant identifier; must start with a '$'".to_string(),
+            ))
+        }
+    }
+
+    fn parse_type(&mut self) -> Result<Type> {
+        let pos = self.pos();
+        let name = self.parse_ident()?;
+        let mut is_extern = false;
+        if self.is_sym_str("extern") {
+            self.symbol()?;
+            is_extern = true;
+        }
+        let ty = self.parse_typevalue()?;
+        Ok(Type {
+            name,
+            is_extern,
+            ty,
+            pos,
+        })
+    }
+
+    fn parse_typevalue(&mut self) -> Result<TypeValue> {
+        let pos = self.pos();
+        self.lparen()?;
+        if self.is_sym_str("primitive") {
+            self.symbol()?;
+            let primitive_ident = self.parse_ident()?;
+            self.rparen()?;
+            Ok(TypeValue::Primitive(primitive_ident, pos))
+        } else if self.is_sym_str("enum") {
+            self.symbol()?;
+            let mut variants = vec![];
+            while !self.is_rparen() {
+                let variant = self.parse_type_variant()?;
+                variants.push(variant);
+            }
+            self.rparen()?;
+            Ok(TypeValue::Enum(variants, pos))
+        } else {
+            Err(self.error(pos, "Unknown type definition".to_string()))
+        }
+    }
+
+    fn parse_type_variant(&mut self) -> Result<Variant> {
+        if self.is_sym() {
+            let pos = self.pos();
+            let name = self.parse_ident()?;
+            Ok(Variant {
+                name,
+                fields: vec![],
+                pos,
+            })
+        } else {
+            let pos = self.pos();
+            self.lparen()?;
+            let name = self.parse_ident()?;
+            let mut fields = vec![];
+            while !self.is_rparen() {
+                fields.push(self.parse_type_field()?);
+            }
+            self.rparen()?;
+            Ok(Variant { name, fields, pos })
+        }
+    }
+
+    fn parse_type_field(&mut self) -> Result<Field> {
+        let pos = self.pos();
+        self.lparen()?;
+        let name = self.parse_ident()?;
+        let ty = self.parse_ident()?;
+        self.rparen()?;
+        Ok(Field { name, ty, pos })
+    }
+
+    fn parse_decl(&mut self) -> Result<Decl> {
+        let pos = self.pos();
+        let term = self.parse_ident()?;
+
+        self.lparen()?;
+        let mut arg_tys = vec![];
+        while !self.is_rparen() {
+            arg_tys.push(self.parse_ident()?);
+        }
+        self.rparen()?;
+
+        let ret_ty = self.parse_ident()?;
+
+        Ok(Decl {
+            term,
+            arg_tys,
+            ret_ty,
+            pos,
+        })
+    }
+
+    fn parse_extern(&mut self) -> Result<Extern> {
+        let pos = self.pos();
+        if self.is_sym_str("constructor") {
+            self.symbol()?;
+            let term = self.parse_ident()?;
+            let func = self.parse_ident()?;
+            Ok(Extern::Constructor { term, func, pos })
+        } else if self.is_sym_str("extractor") {
+            self.symbol()?;
+
+            let infallible = if self.is_sym_str("infallible") {
+                self.symbol()?;
+                true
+            } else {
+                false
+            };
+
+            let term = self.parse_ident()?;
+            let func = self.parse_ident()?;
+
+            let arg_polarity = if self.is_lparen() {
+                let mut pol = vec![];
+                self.lparen()?;
+                while !self.is_rparen() {
+                    if self.is_sym_str("in") {
+                        self.symbol()?;
+                        pol.push(ArgPolarity::Input);
+                    } else if self.is_sym_str("out") {
+                        self.symbol()?;
+                        pol.push(ArgPolarity::Output);
+                    } else {
+                        return Err(self.error(pos, "Invalid argument polarity".to_string()));
+                    }
+                }
+                self.rparen()?;
+                Some(pol)
+            } else {
+                None
+            };
+            Ok(Extern::Extractor {
+                term,
+                func,
+                pos,
+                arg_polarity,
+                infallible,
+            })
+        } else if self.is_sym_str("const") {
+            self.symbol()?;
+            let pos = self.pos();
+            let name = self.parse_const()?;
+            let ty = self.parse_ident()?;
+            Ok(Extern::Const { name, ty, pos })
+        } else {
+            Err(self.error(
+                pos,
+                "Invalid extern: must be (extern constructor ...) or (extern extractor ...)"
+                    .to_string(),
+            ))
+        }
+    }
+
+    fn parse_etor(&mut self) -> Result<Extractor> {
+        let pos = self.pos();
+        self.lparen()?;
+        let term = self.parse_ident()?;
+        let mut args = vec![];
+        while !self.is_rparen() {
+            args.push(self.parse_ident()?);
+        }
+        self.rparen()?;
+        let template = self.parse_pattern()?;
+        Ok(Extractor {
+            term,
+            args,
+            template,
+            pos,
+        })
+    }
+
+    fn parse_rule(&mut self) -> Result<Rule> {
+        let pos = self.pos();
+        let prio = if self.is_int() {
+            Some(self.int()?)
+        } else {
+            None
+        };
+        let pattern = self.parse_pattern()?;
+        let expr = self.parse_expr()?;
+        Ok(Rule {
+            pattern,
+            expr,
+            pos,
+            prio,
+        })
+    }
+
+    fn parse_pattern(&mut self) -> Result<Pattern> {
+        let pos = self.pos();
+        if self.is_int() {
+            Ok(Pattern::ConstInt {
+                val: self.int()?,
+                pos,
+            })
+        } else if self.is_const() {
+            let val = self.parse_const()?;
+            Ok(Pattern::ConstPrim { val, pos })
+        } else if self.is_sym_str("_") {
+            self.symbol()?;
+            Ok(Pattern::Wildcard { pos })
+        } else if self.is_sym() {
+            let s = self.symbol()?;
+            if s.starts_with("=") {
+                let s = &s[1..];
+                let var = self.str_to_ident(pos, s)?;
+                Ok(Pattern::Var { var, pos })
+            } else {
+                let var = self.str_to_ident(pos, &s)?;
+                if self.is_at() {
+                    self.at()?;
+                    let subpat = Box::new(self.parse_pattern()?);
+                    Ok(Pattern::BindPattern { var, subpat, pos })
+                } else {
+                    Ok(Pattern::BindPattern {
+                        var,
+                        subpat: Box::new(Pattern::Wildcard { pos }),
+                        pos,
+                    })
+                }
+            }
+        } else if self.is_lparen() {
+            self.lparen()?;
+            if self.is_sym_str("and") {
+                self.symbol()?;
+                let mut subpats = vec![];
+                while !self.is_rparen() {
+                    subpats.push(self.parse_pattern()?);
+                }
+                self.rparen()?;
+                Ok(Pattern::And { subpats, pos })
+            } else {
+                let sym = self.parse_ident()?;
+                let mut args = vec![];
+                while !self.is_rparen() {
+                    args.push(self.parse_pattern_term_arg()?);
+                }
+                self.rparen()?;
+                Ok(Pattern::Term { sym, args, pos })
+            }
+        } else {
+            Err(self.error(pos, "Unexpected pattern".into()))
+        }
+    }
+
+    fn parse_pattern_term_arg(&mut self) -> Result<TermArgPattern> {
+        if self.is_lt() {
+            self.lt()?;
+            Ok(TermArgPattern::Expr(self.parse_expr()?))
+        } else {
+            Ok(TermArgPattern::Pattern(self.parse_pattern()?))
+        }
+    }
+
+    fn parse_expr(&mut self) -> Result<Expr> {
+        let pos = self.pos();
+        if self.is_lparen() {
+            self.lparen()?;
+            if self.is_sym_str("let") {
+                self.symbol()?;
+                self.lparen()?;
+                let mut defs = vec![];
+                while !self.is_rparen() {
+                    let def = self.parse_letdef()?;
+                    defs.push(def);
+                }
+                self.rparen()?;
+                let body = Box::new(self.parse_expr()?);
+                self.rparen()?;
+                Ok(Expr::Let { defs, body, pos })
+            } else {
+                let sym = self.parse_ident()?;
+                let mut args = vec![];
+                while !self.is_rparen() {
+                    args.push(self.parse_expr()?);
+                }
+                self.rparen()?;
+                Ok(Expr::Term { sym, args, pos })
+            }
+        } else if self.is_sym_str("#t") {
+            self.symbol()?;
+            Ok(Expr::ConstInt { val: 1, pos })
+        } else if self.is_sym_str("#f") {
+            self.symbol()?;
+            Ok(Expr::ConstInt { val: 0, pos })
+        } else if self.is_const() {
+            let val = self.parse_const()?;
+            Ok(Expr::ConstPrim { val, pos })
+        } else if self.is_sym() {
+            let name = self.parse_ident()?;
+            Ok(Expr::Var { name, pos })
+        } else if self.is_int() {
+            let val = self.int()?;
+            Ok(Expr::ConstInt { val, pos })
+        } else {
+            Err(self.error(pos, "Invalid expression".into()))
+        }
+    }
+
+    fn parse_letdef(&mut self) -> Result<LetDef> {
+        let pos = self.pos();
+        self.lparen()?;
+        let var = self.parse_ident()?;
+        let ty = self.parse_ident()?;
+        let val = Box::new(self.parse_expr()?);
+        self.rparen()?;
+        Ok(LetDef { var, ty, val, pos })
+    }
+}
--- a/cranelift/isle/isle/src/sema.rs
+++ b/cranelift/isle/isle/src/sema.rs
--- a/cranelift/isle/isle/src/trie.rs
+++ b/cranelift/isle/isle/src/trie.rs
@@ -0,0 +1,587 @@
+//! Trie construction.
+
+use crate::ir::{lower_rule, ExprSequence, PatternInst, PatternSequence};
+use crate::sema::{RuleId, TermEnv, TermId, TypeEnv};
+use std::collections::BTreeMap;
+
+/// Construct the tries for each term.
+pub fn build_tries(typeenv: &TypeEnv, termenv: &TermEnv) -> BTreeMap<TermId, TrieNode> {
+    let mut builder = TermFunctionsBuilder::new(typeenv, termenv);
+    builder.build();
+    log::trace!("builder: {:?}", builder);
+    builder.finalize()
+}
+
+/// One "input symbol" for the decision tree that handles matching on
+/// a term. Each symbol represents one step: we either run a match op,
+/// or we finish the match.
+///
+/// Note that in the original Peepmatic scheme, the input-symbol to
+/// the FSM was specified slightly differently. The automaton
+/// responded to alphabet symbols that corresponded only to match
+/// results, and the "extra state" was used at each automaton node to
+/// represent the op to run next. This extra state differentiated
+/// nodes that would otherwise be merged together by
+/// deduplication. That scheme works well enough, but the "extra
+/// state" is slightly confusing and diverges slightly from a pure
+/// automaton.
+///
+/// Instead, here, we imagine that the user of the automaton/trie can
+/// query the possible transition edges out of the current state. Each
+/// of these edges corresponds to one possible match op to run. After
+/// running a match op, we reach a new state corresponding to
+/// successful matches up to that point.
+///
+/// However, it's a bit more subtle than this. Consider the
+/// prioritization problem. We want to give the DSL user the ability
+/// to change the order in which rules apply, for example to have a
+/// tier of "fallback rules" that apply only if more custom rules do
+/// not match.
+///
+/// A somewhat simplistic answer to this problem is "more specific
+/// rule wins". However, this implies the existence of a total
+/// ordering of linearized match sequences that may not fully capture
+/// the intuitive meaning of "more specific". Consider three left-hand
+/// sides:
+///
+/// - (A _ _)
+/// - (A (B _) _)
+/// - (A _ (B _))
+///
+/// Intuitively, the first is the least specific. Given the input `(A
+/// (B 1) (B 2))`, we can say for sure that the first should not be
+/// chosen, because either the second or third would match "more" of
+/// the input tree. But which of the second and third should be
+/// chosen? A "lexicographic ordering" rule would say that we sort
+/// left-hand sides such that the `(B _)` sub-pattern comes before the
+/// wildcard `_`, so the second rule wins. But that is arbitrarily
+/// privileging one over the other based on the order of the
+/// arguments.
+///
+/// Instead, we can accept explicit priorities from the user to allow
+/// either choice. So we need a data structure that can associate
+/// matching inputs *with priorities* to outputs.
+///
+/// Next, we build a decision tree rather than an FSM. Why? Because
+/// we're compiling to a structured language, Rust, and states become
+/// *program points* rather than *data*, we cannot easily support a
+/// DAG structure. In other words, we are not producing a FSM that we
+/// can interpret at runtime; rather we are compiling code in which
+/// each state corresponds to a sequence of statements and
+/// control-flow that branches to a next state, we naturally need
+/// nesting; we cannot codegen arbitrary state transitions in an
+/// efficient manner. We could support a limited form of DAG that
+/// reifies "diamonds" (two alternate paths that reconverge), but
+/// supporting this in a way that lets the output refer to values from
+/// either side is very complex (we need to invent phi-nodes), and the
+/// cases where we want to do this rather than invoke a sub-term (that
+/// is compiled to a separate function) are rare. Finally, note that
+/// one reason to deduplicate nodes and turn a tree back into a DAG --
+/// "output-suffix sharing" as some other instruction-rewriter
+/// engines, such as Peepmatic, do -- is not done, because all
+/// "output" occurs at leaf nodes; this is necessary because we do not
+/// want to start invoking external constructors until we are sure of
+/// the match. Some of the code-sharing advantages of the "suffix
+/// sharing" scheme can be obtained in a more flexible and
+/// user-controllable way (with less understanding of internal
+/// compiler logic needed) by factoring logic into different internal
+/// terms, which become different compiled functions. This is likely
+/// to happen anyway as part of good software engineering practice.
+///
+/// We prepare for codegen by building a "prioritized trie", where the
+/// trie associates input strings with priorities to output values.
+/// Each input string is a sequence of match operators followed by an
+/// "end of match" token, and each output is a sequence of ops that
+/// build the output expression. Each input-output mapping is
+/// associated with a priority. The goal of the trie is to generate a
+/// decision-tree procedure that lets us execute match ops in a
+/// deterministic way, eventually landing at a state that corresponds
+/// to the highest-priority matching rule and can produce the output.
+///
+/// To build this trie, we construct nodes with edges to child nodes;
+/// each edge consists of (i) one input token (a `PatternInst` or
+/// EOM), and (ii) the minimum and maximum priorities of rules along
+/// this edge. In a way this resembles an interval tree, though the
+/// intervals of children need not be disjoint.
+///
+/// To add a rule to this trie, we perform the usual trie-insertion
+/// logic, creating edges and subnodes where necessary, and updating
+/// the priority-range of each edge that we traverse to include the
+/// priority of the inserted rule.
+///
+/// However, we need to be a little bit careful, because with only
+/// priority ranges in place and the potential for overlap, we have
+/// something that resembles an NFA. For example, consider the case
+/// where we reach a node in the trie and have two edges with two
+/// match ops, one corresponding to a rule with priority 10, and the
+/// other corresponding to two rules, with priorities 20 and 0. The
+/// final match could lie along *either* path, so we have to traverse
+/// both.
+///
+/// So, to avoid this, we perform a sort of moral equivalent to the
+/// NFA-to-DFA conversion "on the fly" as we insert nodes by
+/// duplicating subtrees. At any node, when inserting with a priority
+/// P and when outgoing edges lie in a range [P_lo, P_hi] such that P
+/// >= P_lo and P <= P_hi, we "priority-split the edges" at priority
+/// P.
+///
+/// To priority-split the edges in a node at priority P:
+///
+/// - For each out-edge with priority [P_lo, P_hi] s.g. P \in [P_lo,
+///   P_hi], and token T:
+///   - Trim the subnode at P, yielding children C_lo and C_hi.
+///   - Both children must be non-empty (have at least one leaf)
+///     because the original node must have had a leaf at P_lo
+///     and a leaf at P_hi.
+///   - Replace the one edge with two edges, one for each child, with
+///     the original match op, and with ranges calculated according to
+///     the trimmed children.
+///
+/// To trim a node into range [P_lo, P_hi]:
+///
+/// - For a decision node:
+///   - If any edges have a range outside the bounds of the trimming
+///     range, trim the bounds of the edge, and trim the subtree under the
+///     edge into the trimmed edge's range. If the subtree is trimmed
+///     to `None`, remove the edge.
+///   - If all edges are removed, the decision node becomes `None`.
+/// - For a leaf node:
+///   - If the priority is outside the range, the node becomes `None`.
+///
+/// As we descend a path to insert a leaf node, we (i) priority-split
+/// if any edges' priority ranges overlap the insertion priority
+/// range, and (ii) expand priority ranges on edges to include the new
+/// leaf node's priority.
+///
+/// As long as we do this, we ensure the two key priority-trie
+/// invariants:
+///
+/// 1. At a given node, no two edges exist with priority ranges R_1,
+///    R_2 such that R_1 ∩ R_2 ≠ ∅, unless R_1 and R_2 are unit ranges
+///    ([x, x]) and are on edges with different match-ops.
+/// 2. Along the path from the root to any leaf node with priority P,
+///    each edge has a priority range R such that P ∈ R.
+///
+/// Note that this means that multiple edges with a single match-op
+/// may exist, with different priorities.
+#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
+pub enum TrieSymbol {
+    /// Run a match operation to continue matching a LHS.
+    Match {
+        /// The match operation to run.
+        op: PatternInst,
+    },
+    /// We successfully matched a LHS.
+    EndOfMatch,
+}
+
+impl TrieSymbol {
+    fn is_eom(&self) -> bool {
+        match self {
+            TrieSymbol::EndOfMatch => true,
+            _ => false,
+        }
+    }
+}
+
+/// A priority.
+pub type Prio = i64;
+
+/// An inclusive range of priorities.
+#[derive(Clone, Copy, Debug)]
+pub struct PrioRange {
+    /// The minimum of this range.
+    pub min: Prio,
+    /// The maximum of this range.
+    pub max: Prio,
+}
+
+impl PrioRange {
+    fn contains(&self, prio: Prio) -> bool {
+        prio >= self.min && prio <= self.max
+    }
+
+    fn is_unit(&self) -> bool {
+        self.min == self.max
+    }
+
+    fn overlaps(&self, other: PrioRange) -> bool {
+        // This can be derived via DeMorgan: !(self.begin > other.end
+        // OR other.begin > self.end).
+        self.min <= other.max && other.min <= self.max
+    }
+
+    fn intersect(&self, other: PrioRange) -> PrioRange {
+        PrioRange {
+            min: std::cmp::max(self.min, other.min),
+            max: std::cmp::min(self.max, other.max),
+        }
+    }
+
+    fn union(&self, other: PrioRange) -> PrioRange {
+        PrioRange {
+            min: std::cmp::min(self.min, other.min),
+            max: std::cmp::max(self.max, other.max),
+        }
+    }
+
+    fn split_at(&self, prio: Prio) -> (PrioRange, PrioRange) {
+        assert!(self.contains(prio));
+        assert!(!self.is_unit());
+        if prio == self.min {
+            (
+                PrioRange {
+                    min: self.min,
+                    max: self.min,
+                },
+                PrioRange {
+                    min: self.min + 1,
+                    max: self.max,
+                },
+            )
+        } else {
+            (
+                PrioRange {
+                    min: self.min,
+                    max: prio - 1,
+                },
+                PrioRange {
+                    min: prio,
+                    max: self.max,
+                },
+            )
+        }
+    }
+}
+
+/// An edge in our term trie.
+#[derive(Clone, Debug)]
+pub struct TrieEdge {
+    /// The priority range for this edge's sub-trie.
+    pub range: PrioRange,
+    /// The match operation to perform for this edge.
+    pub symbol: TrieSymbol,
+    /// This edge's sub-trie.
+    pub node: TrieNode,
+}
+
+/// A node in the term trie.
+#[derive(Clone, Debug)]
+pub enum TrieNode {
+    /// One or more patterns could match.
+    ///
+    /// Maybe one pattern already has matched, but there are more (higher
+    /// priority and/or same priority but more specific) patterns that could
+    /// still match.
+    Decision {
+        /// The child sub-tries that we can match from this point on.
+        edges: Vec<TrieEdge>,
+    },
+
+    /// The successful match of an LHS pattern, and here is its RHS expression.
+    Leaf {
+        /// The priority of this rule.
+        prio: Prio,
+        /// The RHS expression to evaluate upon a successful LHS pattern match.
+        output: ExprSequence,
+    },
+
+    /// No LHS pattern matches.
+    Empty,
+}
+
+impl TrieNode {
+    fn is_empty(&self) -> bool {
+        matches!(self, &TrieNode::Empty)
+    }
+
+    fn insert(
+        &mut self,
+        prio: Prio,
+        mut input: impl Iterator<Item = TrieSymbol>,
+        output: ExprSequence,
+    ) -> bool {
+        // Take one input symbol. There must be *at least* one, EOM if
+        // nothing else.
+        let op = input
+            .next()
+            .expect("Cannot insert into trie with empty input sequence");
+        let is_last = op.is_eom();
+
+        // If we are empty, turn into a decision node.
+        if self.is_empty() {
+            *self = TrieNode::Decision { edges: vec![] };
+        }
+
+        // We must be a decision node.
+        let edges = match self {
+            &mut TrieNode::Decision { ref mut edges } => edges,
+            _ => panic!("insert on leaf node!"),
+        };
+
+        // Do we need to split?
+        let needs_split = edges
+            .iter()
+            .any(|edge| edge.range.contains(prio) && !edge.range.is_unit());
+
+        // If so, pass over all edges/subnodes and split each.
+        if needs_split {
+            let mut new_edges = vec![];
+            for edge in std::mem::take(edges) {
+                if !edge.range.contains(prio) || edge.range.is_unit() {
+                    new_edges.push(edge);
+                    continue;
+                }
+
+                let (lo_range, hi_range) = edge.range.split_at(prio);
+                let lo = edge.node.trim(lo_range);
+                let hi = edge.node.trim(hi_range);
+                if let Some((node, range)) = lo {
+                    new_edges.push(TrieEdge {
+                        range,
+                        symbol: edge.symbol.clone(),
+                        node,
+                    });
+                }
+                if let Some((node, range)) = hi {
+                    new_edges.push(TrieEdge {
+                        range,
+                        symbol: edge.symbol,
+                        node,
+                    });
+                }
+            }
+            *edges = new_edges;
+        }
+
+        // Now find or insert the appropriate edge.
+        let mut edge: Option<usize> = None;
+        let mut last_edge_with_op: Option<usize> = None;
+        let mut last_edge_with_op_prio: Option<Prio> = None;
+        for i in 0..(edges.len() + 1) {
+            if i == edges.len() || prio > edges[i].range.max {
+                // We've passed all edges with overlapping priority
+                // ranges. Maybe the last edge we saw with the op
+                // we're inserting can have its range expanded,
+                // however.
+                if last_edge_with_op.is_some() {
+                    // Move it to the end of the run of equal-unit-range ops.
+                    edges.swap(last_edge_with_op.unwrap(), i - 1);
+                    edge = Some(i - 1);
+                    edges[i - 1].range.max = prio;
+                    break;
+                }
+                edges.insert(
+                    i,
+                    TrieEdge {
+                        range: PrioRange {
+                            min: prio,
+                            max: prio,
+                        },
+                        symbol: op.clone(),
+                        node: TrieNode::Empty,
+                    },
+                );
+                edge = Some(i);
+                break;
+            }
+            if i == edges.len() {
+                break;
+            }
+            if edges[i].symbol == op {
+                last_edge_with_op = Some(i);
+                last_edge_with_op_prio = Some(edges[i].range.max);
+            }
+            if last_edge_with_op_prio.is_some()
+                && last_edge_with_op_prio.unwrap() < edges[i].range.max
+            {
+                last_edge_with_op = None;
+                last_edge_with_op_prio = None;
+            }
+            if edges[i].range.contains(prio) && edges[i].symbol == op {
+                edge = Some(i);
+                break;
+            }
+        }
+        let edge = edge.expect("Must have found an edge at least at last iter");
+        let edge = &mut edges[edge];
+
+        if is_last {
+            if !edge.node.is_empty() {
+                // If a leaf node already exists at an overlapping
+                // prio for this op, there are two competing rules, so
+                // we can't insert this one.
+                return false;
+            }
+            edge.node = TrieNode::Leaf { prio, output };
+            true
+        } else {
+            edge.node.insert(prio, input, output)
+        }
+    }
+
+    fn trim(&self, range: PrioRange) -> Option<(TrieNode, PrioRange)> {
+        match self {
+            &TrieNode::Empty => None,
+            &TrieNode::Leaf { prio, ref output } => {
+                if range.contains(prio) {
+                    Some((
+                        TrieNode::Leaf {
+                            prio,
+                            output: output.clone(),
+                        },
+                        PrioRange {
+                            min: prio,
+                            max: prio,
+                        },
+                    ))
+                } else {
+                    None
+                }
+            }
+            &TrieNode::Decision { ref edges } => {
+                let edges = edges
+                    .iter()
+                    .filter_map(|edge| {
+                        if !edge.range.overlaps(range) {
+                            None
+                        } else {
+                            let range = range.intersect(edge.range);
+                            if let Some((node, range)) = edge.node.trim(range) {
+                                Some(TrieEdge {
+                                    range,
+                                    symbol: edge.symbol.clone(),
+                                    node,
+                                })
+                            } else {
+                                None
+                            }
+                        }
+                    })
+                    .collect::<Vec<_>>();
+
+                if edges.is_empty() {
+                    None
+                } else {
+                    let range = edges
+                        .iter()
+                        .map(|edge| edge.range)
+                        .reduce(|a, b| a.union(b))
+                        .expect("reduce on non-empty vec must not return None");
+                    Some((TrieNode::Decision { edges }, range))
+                }
+            }
+        }
+    }
+
+    /// Get a pretty-printed version of this trie, for debugging.
+    pub fn pretty(&self) -> String {
+        let mut s = String::new();
+        pretty_rec(&mut s, self, "");
+        return s;
+
+        fn pretty_rec(s: &mut String, node: &TrieNode, indent: &str) {
+            match node {
+                TrieNode::Decision { edges } => {
+                    s.push_str(indent);
+                    s.push_str("TrieNode::Decision:\n");
+
+                    let new_indent = indent.to_owned() + "    ";
+                    for edge in edges {
+                        s.push_str(indent);
+                        s.push_str(&format!(
+                            "  edge: range = {:?}, symbol: {:?}\n",
+                            edge.range, edge.symbol
+                        ));
+                        pretty_rec(s, &edge.node, &new_indent);
+                    }
+                }
+                TrieNode::Empty | TrieNode::Leaf { .. } => {
+                    s.push_str(indent);
+                    s.push_str(&format!("{:?}\n", node));
+                }
+            }
+        }
+    }
+}
+
+/// Builder context for one function in generated code corresponding
+/// to one root input term.
+///
+/// A `TermFunctionBuilder` can correspond to the matching
+/// control-flow and operations that we execute either when evaluating
+/// *forward* on a term, trying to match left-hand sides against it
+/// and transforming it into another term; or *backward* on a term,
+/// trying to match another rule's left-hand side against an input to
+/// produce the term in question (when the term is used in the LHS of
+/// the calling term).
+#[derive(Debug)]
+struct TermFunctionBuilder {
+    trie: TrieNode,
+}
+
+impl TermFunctionBuilder {
+    fn new() -> Self {
+        TermFunctionBuilder {
+            trie: TrieNode::Empty,
+        }
+    }
+
+    fn add_rule(&mut self, prio: Prio, pattern_seq: PatternSequence, expr_seq: ExprSequence) {
+        let symbols = pattern_seq
+            .insts
+            .into_iter()
+            .map(|op| TrieSymbol::Match { op })
+            .chain(std::iter::once(TrieSymbol::EndOfMatch));
+        self.trie.insert(prio, symbols, expr_seq);
+    }
+}
+
+#[derive(Debug)]
+struct TermFunctionsBuilder<'a> {
+    typeenv: &'a TypeEnv,
+    termenv: &'a TermEnv,
+    builders_by_term: BTreeMap<TermId, TermFunctionBuilder>,
+}
+
+impl<'a> TermFunctionsBuilder<'a> {
+    fn new(typeenv: &'a TypeEnv, termenv: &'a TermEnv) -> Self {
+        log::trace!("typeenv: {:?}", typeenv);
+        log::trace!("termenv: {:?}", termenv);
+        Self {
+            builders_by_term: BTreeMap::new(),
+            typeenv,
+            termenv,
+        }
+    }
+
+    fn build(&mut self) {
+        for rule in 0..self.termenv.rules.len() {
+            let rule = RuleId(rule);
+            let prio = self.termenv.rules[rule.index()].prio.unwrap_or(0);
+
+            let (pattern, expr) = lower_rule(self.typeenv, self.termenv, rule);
+            let root_term = self.termenv.rules[rule.index()].lhs.root_term().unwrap();
+
+            log::trace!(
+                "build:\n- rule {:?}\n- pattern {:?}\n- expr {:?}",
+                self.termenv.rules[rule.index()],
+                pattern,
+                expr
+            );
+            self.builders_by_term
+                .entry(root_term)
+                .or_insert_with(|| TermFunctionBuilder::new())
+                .add_rule(prio, pattern.clone(), expr.clone());
+        }
+    }
+
+    fn finalize(self) -> BTreeMap<TermId, TrieNode> {
+        let functions_by_term = self
+            .builders_by_term
+            .into_iter()
+            .map(|(term, builder)| (term, builder.trie))
+            .collect::<BTreeMap<_, _>>();
+        functions_by_term
+    }
+}
--- a/cranelift/isle/isle_examples/construct-and-extract.isle
+++ b/cranelift/isle/isle_examples/construct-and-extract.isle
@@ -0,0 +1,17 @@
+(type i32 (primitive i32))
+
+(type B (enum (B (x i32) (y i32))))
+
+;; `isub` has a constructor and extractor.
+(decl isub (i32 i32) B)
+(rule (isub x y)
+      (B.B x y))
+(extractor (isub x y)
+           (B.B x y))
+
+;; `value_array_2` has both an external extractor and an external constructor.
+(type Value (primitive Value))
+(type ValueArray2 extern (enum))
+(decl value_array_2 (Value Value) ValueArray2)
+(extern extractor infallible value_array_2 unpack_value_array_2)
+(extern constructor value_array_2 pack_value_array_2)
--- a/cranelift/isle/isle_examples/error1.isle
+++ b/cranelift/isle/isle_examples/error1.isle
@@ -0,0 +1,36 @@
+(type u32 (primitive u32))
+(type bool (primitive bool))
+(type A (enum (A1 (x u32))))
+
+(decl Ext1 (u32) A)
+(decl Ext2 (u32) A)
+(extern extractor Ext1 ext1)
+(extern extractor Ext2 ext2)
+
+(decl C (bool) A)
+(extern constructor C c)
+
+(decl Lower (A) A)
+
+(rule
+  (Lower
+    (and
+      a
+      (Ext1 x)
+      (Ext2 =q)))
+  (C y))
+
+(type R (enum (A (x u32))))
+
+(type Opcode (enum A B C))
+(type MachInst (enum D E F))
+(decl Lower2 (Opcode) MachInst)
+(rule
+  (Lower2 (Opcode.A))
+  (R.A (Opcode.A)))
+(rule
+  (Lower2 (Opcode.B))
+  (MachInst.E))
+(rule
+  (Lower2 (Opcode.C))
+  (MachInst.F))
--- a/cranelift/isle/isle_examples/let.isle
+++ b/cranelift/isle/isle_examples/let.isle
@@ -0,0 +1,21 @@
+(type u32 (primitive u32))
+(type A (enum (Add (x u32) (y u32)) (Sub (x u32) (y u32))))
+(type B (enum (B (z u32))))
+
+(decl Sub (u32 u32) u32)
+(extern constructor Sub sub)
+
+(decl Add (u32 u32) u32)
+(extern constructor Add add)
+
+(decl Lower (A) B)
+
+(rule
+  (Lower (A.Add x y))
+  (let ((z u32 (Add x y)))
+    (B.B z)))
+
+(rule
+  (Lower (A.Sub x y))
+  (let ((z u32 (Sub x y)))
+    (B.B z)))
--- a/cranelift/isle/isle_examples/test.isle
+++ b/cranelift/isle/isle_examples/test.isle
@@ -0,0 +1,21 @@
+(type u32 (primitive u32))
+(type A (enum (A1 (x u32)) (A2 (x u32))))
+(type B (enum (B1 (x u32)) (B2 (x u32))))
+
+(decl Input (A) u32)
+(extern extractor Input get_input) ;; fn get_input<C>(ctx: &mut C, ret: u32) -> Option<(A,)>
+
+(decl Lower (A) B)
+
+(rule
+  (Lower (A.A1 sub @ (Input (A.A2 42))))
+  (B.B2 sub))
+
+(decl Extractor (B) A)
+(extractor
+  (Extractor x)
+  (A.A2 x))
+
+(rule
+  (Lower (Extractor b))
+  (B.B1 b))
--- a/cranelift/isle/isle_examples/test2.isle
+++ b/cranelift/isle/isle_examples/test2.isle
@@ -0,0 +1,24 @@
+(type u32 (primitive u32))
+(type A (enum
+  (A1 (x B) (y B))))
+(type B (enum
+  (B1 (x u32))
+  (B2 (x u32))))
+
+(decl A2B (A) B)
+
+(rule 1
+  (A2B (A.A1 _ (B.B1 x)))
+  (B.B1 x))
+
+(rule 0
+  (A2B (A.A1 (B.B1 x) _))
+  (B.B1 x))
+
+(rule 0
+  (A2B (A.A1 (B.B2 x) _))
+  (B.B1 x))
+
+(rule -1
+  (A2B (A.A1 _ _))
+  (B.B1 42))
--- a/cranelift/isle/isle_examples/test3.isle
+++ b/cranelift/isle/isle_examples/test3.isle
@@ -0,0 +1,66 @@
+(type Opcode extern (enum
+  Iadd
+  Isub
+  Load
+  Store))
+
+(type Inst (primitive Inst))
+(type InstInput (primitive InstInput))
+(type Reg (primitive Reg))
+(type u32 (primitive u32))
+
+(decl Op (Opcode) Inst)
+(extern extractor infallible Op get_opcode)
+
+(decl InstInput (InstInput u32) Inst)
+(extern extractor infallible InstInput get_inst_input (out in))
+
+(decl Producer (Inst) InstInput)
+(extern extractor Producer get_input_producer)
+
+(decl UseInput (InstInput) Reg)
+(extern constructor UseInput put_input_in_reg)
+
+(type MachInst (enum
+  (Add (a Reg) (b Reg))
+  (Add3 (a Reg) (b Reg) (c Reg))
+  (Sub (a Reg) (b Reg))))
+
+(decl Lower (Inst) MachInst)
+
+;; Extractors that give syntax sugar for (Iadd ra rb), etc.
+;;
+;; Note that this is somewhat simplistic: it directly connects inputs to
+;; MachInst regs; really we'd want to return a VReg or InstInput that we can use
+;; another extractor to connect to another (producer) inst.
+;;
+;; Also, note that while it looks a little indirect, a verification effort could
+;; define equivalences across the `rule` LHS/RHS pairs, and the types ensure that
+;; we are dealing (at the semantic level) with pure value equivalences of
+;; "terms", not arbitrary side-effecting calls.
+
+(decl Iadd (InstInput InstInput) Inst)
+(decl Isub (InstInput InstInput) Inst)
+(extractor
+  (Iadd a b)
+  (and
+    (Op (Opcode.Iadd))
+    (InstInput a <0)
+    (InstInput b <1)))
+(extractor
+  (Isub a b)
+  (and
+    (Op (Opcode.Isub))
+    (InstInput a <0)
+    (InstInput b <1)))
+
+;; Now the nice syntax-sugar that "end-user" backend authors can write:
+(rule
+  (Lower (Iadd ra rb))
+  (MachInst.Add (UseInput ra) (UseInput rb)))
+(rule
+  (Lower (Iadd (Producer (Iadd ra rb)) rc))
+  (MachInst.Add3 (UseInput ra) (UseInput rb) (UseInput rc)))
+(rule
+  (Lower (Isub ra rb))
+  (MachInst.Sub (UseInput ra) (UseInput rb)))
--- a/cranelift/isle/isle_examples/test4.isle
+++ b/cranelift/isle/isle_examples/test4.isle
@@ -0,0 +1,42 @@
+(type u32 (primitive u32))
+(type bool (primitive bool))
+(type A (enum (A1 (x u32))))
+
+(decl Ext1 (u32) A)
+(decl Ext2 (u32) A)
+(extern extractor Ext1 ext1)
+(extern extractor Ext2 ext2)
+
+(extern const $A u32)
+(extern const $B u32)
+
+(decl C (bool) A)
+(extern constructor C c)
+
+(decl Lower (A) A)
+
+(rule
+  (Lower
+    (and
+      a
+      (Ext1 x)
+      (Ext2 =x)))
+  (C #t))
+
+(type Opcode (enum A B C))
+(type MachInst (enum D E F))
+(decl Lower2 (Opcode) MachInst)
+(rule
+  (Lower2 (Opcode.A))
+  (MachInst.D))
+(rule
+  (Lower2 (Opcode.B))
+  (MachInst.E))
+(rule
+  (Lower2 (Opcode.C))
+  (MachInst.F))
+
+(decl F (Opcode) u32)
+(rule
+ (F _)
+ $B)
--- a/cranelift/isle/isle_examples/test_main.rs
+++ b/cranelift/isle/isle_examples/test_main.rs
@@ -0,0 +1,12 @@
+mod test;
+
+struct Context;
+impl test::Context for Context {
+    fn get_input(&mut self, x: u32) -> Option<(test::A,)> {
+        Some((test::A::A1 { x: x + 1 },))
+    }
+}
+
+fn main() {
+    test::constructor_Lower(&mut Context, &test::A::A1 { x: 42 });
+}
--- a/cranelift/isle/isle_examples/tutorial.isle
+++ b/cranelift/isle/isle_examples/tutorial.isle
@@ -0,0 +1,96 @@
+;;;; Type Definitions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Declare that we are using the `i32` primitive type from Rust.
+(type i32 (primitive i32))
+
+;; Our high-level, RISC-y input IR.
+(type HighLevelInst
+  (enum (Add (a Value) (b Value))
+        (Load (addr Value))
+        (Const (c i32))))
+
+;; A value in our high-level IR is a Rust `Copy` type. Values are either defined
+;; by an instruction, or are a basic block argument.
+(type Value (primitive Value))
+
+;; Our low-level, CISC-y machine instructions.
+(type LowLevelInst
+  (enum (Add (mode AddrMode))
+        (Load (offset i32) (addr Reg))
+        (Const (c i32))))
+
+;; Different kinds of addressing modes for operands to our low-level machine
+;; instructions.
+(type AddrMode
+  (enum
+    ;; Both operands in registers.
+    (RegReg (a Reg) (b Reg))
+    ;; The destination/first operand is a register; the second operand is in
+    ;; memory at `[b + offset]`.
+    (RegMem (a Reg) (b Reg) (offset i32))
+    ;; The destination/first operand is a register, second operand is an
+    ;; immediate.
+    (RegImm (a Reg) (imm i32))))
+
+;; The register type is a Rust `Copy` type.
+(type Reg (primitive Reg))
+
+;;;; Rules ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Declare our top-level lowering function. We will attach rules to this
+;; declaration for lowering various patterns of `HighLevelInst` inputs.
+(decl lower (HighLevelInst) LowLevelInst)
+
+;; Simple rule for lowering constants.
+(rule (lower (HighLevelInst.Const c))
+      (LowLevelInst.Const c))
+
+;; Declare an external constructor that puts a high-level `Value` into a
+;; low-level `Reg`.
+(decl put_in_reg (Value) Reg)
+(extern constructor put_in_reg put_in_reg)
+
+;; Simple rule for lowering adds.
+(rule (lower (HighLevelInst.Add a b))
+      (LowLevelInst.Add
+        (AddrMode.RegReg (put_in_reg a) (put_in_reg b))))
+
+;; Simple rule for lowering loads.
+(rule (lower (HighLevelInst.Load addr))
+      (LowLevelInst.Load 0 (put_in_reg addr)))
+
+;; Declare an external extractor for extracting the instruction that defined a
+;; given operand value.
+(decl inst_result (HighLevelInst) Value)
+(extern extractor inst_result inst_result)
+
+;; Rule to sink loads into adds.
+(rule (lower (HighLevelInst.Add a (inst_result (HighLevelInst.Load addr))))
+      (LowLevelInst.Add
+        (AddrMode.RegMem (put_in_reg a)
+                         (put_in_reg addr)
+                         0)))
+
+;; Rule to sink a load of a base address with a static offset into a single add.
+(rule (lower (HighLevelInst.Add
+               a
+               (inst_result (HighLevelInst.Load
+                              (inst_result (HighLevelInst.Add
+                                             base
+                                             (inst_result (HighLevelInst.Const offset))))))))
+      (LowLevelInst.Add
+        (AddrMode.RegMem (put_in_reg a)
+                         (put_in_reg base)
+                         offset)))
+
+;; Rule for sinking an immediate into an add.
+(rule (lower (HighLevelInst.Add a (inst_result (HighLevelInst.Const c))))
+      (LowLevelInst.Add
+        (AddrMode.RegImm (put_in_reg a) c)))
+
+;; Rule for lowering loads of a base address with a static offset.
+(rule (lower (HighLevelInst.Load
+               (inst_result (HighLevelInst.Add
+                              base
+                              (inst_result (HighLevelInst.Const offset))))))
+      (LowLevelInst.Load offset (put_in_reg base)))
--- a/cranelift/isle/islec/Cargo.toml
+++ b/cranelift/isle/islec/Cargo.toml
@@ -0,0 +1,14 @@
+[package]
+name = "islec"
+version = "0.1.0"
+authors = ["The Cranelift Project Developers"]
+edition = "2018"
+license = "Apache-2.0 WITH LLVM-exception"
+publish = false
+
+[dependencies]
+log = "0.4"
+isle = { version = "*", path = "../isle/" }
+env_logger = { version = "0.8", default-features = false }
+miette = { version = "3.0.0", features = ["fancy"] }
+structopt = "0.3.23"
--- a/cranelift/isle/islec/src/main.rs
+++ b/cranelift/isle/islec/src/main.rs
@@ -0,0 +1,63 @@
+use isle::{compile, lexer, parser};
+use miette::{Context, IntoDiagnostic, Result};
+use std::{
+    fs,
+    io::{self, Write},
+    path::PathBuf,
+};
+use structopt::StructOpt;
+
+#[derive(StructOpt)]
+struct Opts {
+    /// The output file to write the generated Rust code to. `stdout` is used if
+    /// this is not given.
+    #[structopt(short, long, parse(from_os_str))]
+    output: Option<PathBuf>,
+
+    /// The input ISLE DSL source files.
+    #[structopt(parse(from_os_str))]
+    inputs: Vec<PathBuf>,
+}
+
+fn main() -> Result<()> {
+    let _ = env_logger::try_init();
+
+    let _ = miette::set_hook(Box::new(|_| {
+        Box::new(
+            miette::MietteHandlerOpts::new()
+                // `miette` mistakenly uses braille-optimized output for emacs's
+                // `M-x shell`.
+                .force_graphical(true)
+                .build(),
+        )
+    }));
+
+    let opts = Opts::from_args();
+
+    let lexer = lexer::Lexer::from_files(opts.inputs)?;
+    let defs = parser::parse(lexer)?;
+    let code = compile::compile(&defs)?;
+
+    let stdout = io::stdout();
+    let (mut output, output_name): (Box<dyn Write>, _) = match &opts.output {
+        Some(f) => {
+            let output = Box::new(
+                fs::File::create(f)
+                    .into_diagnostic()
+                    .with_context(|| format!("failed to create '{}'", f.display()))?,
+            );
+            (output, f.display().to_string())
+        }
+        None => {
+            let output = Box::new(stdout.lock());
+            (output, "<stdout>".to_string())
+        }
+    };
+
+    output
+        .write_all(code.as_bytes())
+        .into_diagnostic()
+        .with_context(|| format!("failed to write to '{}'", output_name))?;
+
+    Ok(())
+}