moved crates in lib/ to src/, renamed crates, modified some files' text (#660)

2019-01-28 18:56:54 -05:00
parent 54959cf5bb
commit 747ad3c4c5
508 changed files with 94 additions and 92 deletions
--- a/cranelift/codegen/src/abi.rs
+++ b/cranelift/codegen/src/abi.rs
@@ -0,0 +1,222 @@
+//! Common helper code for ABI lowering.
+//!
+//! This module provides functions and data structures that are useful for implementing the
+//! `TargetIsa::legalize_signature()` method.
+
+use crate::ir::{AbiParam, ArgumentExtension, ArgumentLoc, Type};
+use core::cmp::Ordering;
+use std::vec::Vec;
+
+/// Legalization action to perform on a single argument or return value when converting a
+/// signature.
+///
+/// An argument may go through a sequence of legalization steps before it reaches the final
+/// `Assign` action.
+#[derive(Clone, Copy, Debug)]
+pub enum ArgAction {
+    /// Assign the argument to the given location.
+    Assign(ArgumentLoc),
+
+    /// Convert the argument, then call again.
+    ///
+    /// This action can split an integer type into two smaller integer arguments, or it can split a
+    /// SIMD vector into halves.
+    Convert(ValueConversion),
+}
+
+impl From<ArgumentLoc> for ArgAction {
+    fn from(x: ArgumentLoc) -> Self {
+        ArgAction::Assign(x)
+    }
+}
+
+impl From<ValueConversion> for ArgAction {
+    fn from(x: ValueConversion) -> Self {
+        ArgAction::Convert(x)
+    }
+}
+
+/// Legalization action to be applied to a value that is being passed to or from a legalized ABI.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum ValueConversion {
+    /// Split an integer types into low and high parts, using `isplit`.
+    IntSplit,
+
+    /// Split a vector type into halves with identical lane types, using `vsplit`.
+    VectorSplit,
+
+    /// Bit-cast to an integer type of the same size.
+    IntBits,
+
+    /// Sign-extend integer value to the required type.
+    Sext(Type),
+
+    /// Unsigned zero-extend value to the required type.
+    Uext(Type),
+}
+
+impl ValueConversion {
+    /// Apply this conversion to a type, return the converted type.
+    pub fn apply(self, ty: Type) -> Type {
+        match self {
+            ValueConversion::IntSplit => ty.half_width().expect("Integer type too small to split"),
+            ValueConversion::VectorSplit => ty.half_vector().expect("Not a vector"),
+            ValueConversion::IntBits => Type::int(ty.bits()).expect("Bad integer size"),
+            ValueConversion::Sext(nty) | ValueConversion::Uext(nty) => nty,
+        }
+    }
+
+    /// Is this a split conversion that results in two arguments?
+    pub fn is_split(self) -> bool {
+        match self {
+            ValueConversion::IntSplit | ValueConversion::VectorSplit => true,
+            _ => false,
+        }
+    }
+}
+
+/// Common trait for assigning arguments to registers or stack locations.
+///
+/// This will be implemented by individual ISAs.
+pub trait ArgAssigner {
+    /// Pick an assignment action for function argument (or return value) `arg`.
+    fn assign(&mut self, arg: &AbiParam) -> ArgAction;
+}
+
+/// Legalize the arguments in `args` using the given argument assigner.
+///
+/// This function can be used for both arguments and return values.
+pub fn legalize_args<AA: ArgAssigner>(args: &mut Vec<AbiParam>, aa: &mut AA) {
+    // Iterate over the arguments.
+    // We may need to mutate the vector in place, so don't use a normal iterator, and clone the
+    // argument to avoid holding a reference.
+    let mut argno = 0;
+    while let Some(arg) = args.get(argno).cloned() {
+        // Leave the pre-assigned arguments alone.
+        // We'll assume that they don't interfere with our assignments.
+        if arg.location.is_assigned() {
+            argno += 1;
+            continue;
+        }
+
+        match aa.assign(&arg) {
+            // Assign argument to a location and move on to the next one.
+            ArgAction::Assign(loc) => {
+                args[argno].location = loc;
+                argno += 1;
+            }
+            // Split this argument into two smaller ones. Then revisit both.
+            ArgAction::Convert(conv) => {
+                let value_type = conv.apply(arg.value_type);
+                let new_arg = AbiParam { value_type, ..arg };
+                args[argno].value_type = value_type;
+                if conv.is_split() {
+                    args.insert(argno + 1, new_arg);
+                }
+            }
+        }
+    }
+}
+
+/// Determine the right action to take when passing a `have` value type to a call signature where
+/// the next argument is `arg` which has a different value type.
+///
+/// The signature legalization process in `legalize_args` above can replace a single argument value
+/// with multiple arguments of smaller types. It can also change the type of an integer argument to
+/// a larger integer type, requiring the smaller value to be sign- or zero-extended.
+///
+/// The legalizer needs to repair the values at all ABI boundaries:
+///
+/// - Incoming function arguments to the entry EBB.
+/// - Function arguments passed to a call.
+/// - Return values from a call.
+/// - Return values passed to a return instruction.
+///
+/// The `legalize_abi_value` function helps the legalizer with the process. When the legalizer
+/// needs to pass a pre-legalized `have` argument, but the ABI argument `arg` has a different value
+/// type, `legalize_abi_value(have, arg)` tells the legalizer how to create the needed value type
+/// for the argument.
+///
+/// It may be necessary to call `legalize_abi_value` more than once for a given argument before the
+/// desired argument type appears. This will happen when a vector or integer type needs to be split
+/// more than once, for example.
+pub fn legalize_abi_value(have: Type, arg: &AbiParam) -> ValueConversion {
+    let have_bits = have.bits();
+    let arg_bits = arg.value_type.bits();
+
+    match have_bits.cmp(&arg_bits) {
+        // We have fewer bits than the ABI argument.
+        Ordering::Less => {
+            debug_assert!(
+                have.is_int() && arg.value_type.is_int(),
+                "Can only extend integer values"
+            );
+            match arg.extension {
+                ArgumentExtension::Uext => ValueConversion::Uext(arg.value_type),
+                ArgumentExtension::Sext => ValueConversion::Sext(arg.value_type),
+                _ => panic!("No argument extension specified"),
+            }
+        }
+        // We have the same number of bits as the argument.
+        Ordering::Equal => {
+            // This must be an integer vector that is split and then extended.
+            debug_assert!(arg.value_type.is_int());
+            debug_assert!(have.is_vector(), "expected vector type, got {}", have);
+            ValueConversion::VectorSplit
+        }
+        // We have more bits than the argument.
+        Ordering::Greater => {
+            if have.is_vector() {
+                ValueConversion::VectorSplit
+            } else if have.is_float() {
+                // Convert a float to int so it can be split the next time.
+                // ARM would do this to pass an `f64` in two registers.
+                ValueConversion::IntBits
+            } else {
+                ValueConversion::IntSplit
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::ir::types;
+    use crate::ir::AbiParam;
+
+    #[test]
+    fn legalize() {
+        let mut arg = AbiParam::new(types::I32);
+
+        assert_eq!(
+            legalize_abi_value(types::I64X2, &arg),
+            ValueConversion::VectorSplit
+        );
+        assert_eq!(
+            legalize_abi_value(types::I64, &arg),
+            ValueConversion::IntSplit
+        );
+
+        // Vector of integers is broken down, then sign-extended.
+        arg.extension = ArgumentExtension::Sext;
+        assert_eq!(
+            legalize_abi_value(types::I16X4, &arg),
+            ValueConversion::VectorSplit
+        );
+        assert_eq!(
+            legalize_abi_value(types::I16.by(2).unwrap(), &arg),
+            ValueConversion::VectorSplit
+        );
+        assert_eq!(
+            legalize_abi_value(types::I16, &arg),
+            ValueConversion::Sext(types::I32)
+        );
+
+        // 64-bit float is split as an integer.
+        assert_eq!(
+            legalize_abi_value(types::F64, &arg),
+            ValueConversion::IntBits
+        );
+    }
+}
--- a/cranelift/codegen/src/binemit/memorysink.rs
+++ b/cranelift/codegen/src/binemit/memorysink.rs
@@ -0,0 +1,145 @@
+//! Code sink that writes binary machine code into contiguous memory.
+//!
+//! The `CodeSink` trait is the most general way of extracting binary machine code from Cranelift,
+//! and it is implemented by things like the `test binemit` file test driver to generate
+//! hexadecimal machine code. The `CodeSink` has some undesirable performance properties because of
+//! the dual abstraction: `TargetIsa` is a trait object implemented by each supported ISA, so it
+//! can't have any generic functions that could be specialized for each `CodeSink` implementation.
+//! This results in many virtual function callbacks (one per `put*` call) when
+//! `TargetIsa::emit_inst()` is used.
+//!
+//! The `MemoryCodeSink` type fixes the performance problem because it is a type known to
+//! `TargetIsa` so it can specialize its machine code generation for the type. The trade-off is
+//! that a `MemoryCodeSink` will always write binary machine code to raw memory. It forwards any
+//! relocations to a `RelocSink` trait object. Relocations are less frequent than the
+//! `CodeSink::put*` methods, so the performance impact of the virtual callbacks is less severe.
+
+use super::{Addend, CodeOffset, CodeSink, Reloc};
+use crate::ir::{ExternalName, JumpTable, SourceLoc, TrapCode};
+use core::ptr::write_unaligned;
+
+/// A `CodeSink` that writes binary machine code directly into memory.
+///
+/// A `MemoryCodeSink` object should be used when emitting a Cranelift IR function into executable
+/// memory. It writes machine code directly to a raw pointer without any bounds checking, so make
+/// sure to allocate enough memory for the whole function. The number of bytes required is returned
+/// by the `Context::compile()` function.
+///
+/// Any relocations in the function are forwarded to the `RelocSink` trait object.
+///
+/// Note that `MemoryCodeSink` writes multi-byte values in the native byte order of the host. This
+/// is not the right thing to do for cross compilation.
+pub struct MemoryCodeSink<'a> {
+    data: *mut u8,
+    offset: isize,
+    /// Size of the machine code portion of output
+    pub code_size: isize,
+    relocs: &'a mut RelocSink,
+    traps: &'a mut TrapSink,
+}
+
+impl<'a> MemoryCodeSink<'a> {
+    /// Create a new memory code sink that writes a function to the memory pointed to by `data`.
+    ///
+    /// This function is unsafe since `MemoryCodeSink` does not perform bounds checking on the
+    /// memory buffer, and it can't guarantee that the `data` pointer is valid.
+    pub unsafe fn new(data: *mut u8, relocs: &'a mut RelocSink, traps: &'a mut TrapSink) -> Self {
+        Self {
+            data,
+            offset: 0,
+            code_size: 0,
+            relocs,
+            traps,
+        }
+    }
+}
+
+/// A trait for receiving relocations for code that is emitted directly into memory.
+pub trait RelocSink {
+    /// Add a relocation referencing an EBB at the current offset.
+    fn reloc_ebb(&mut self, _: CodeOffset, _: Reloc, _: CodeOffset);
+
+    /// Add a relocation referencing an external symbol at the current offset.
+    fn reloc_external(&mut self, _: CodeOffset, _: Reloc, _: &ExternalName, _: Addend);
+
+    /// Add a relocation referencing a jump table.
+    fn reloc_jt(&mut self, _: CodeOffset, _: Reloc, _: JumpTable);
+}
+
+/// A trait for receiving trap codes and offsets.
+///
+/// If you don't need information about possible traps, you can use the
+/// [`NullTrapSink`](binemit/trait.TrapSink.html) implementation.
+pub trait TrapSink {
+    /// Add trap information for a specific offset.
+    fn trap(&mut self, _: CodeOffset, _: SourceLoc, _: TrapCode);
+}
+
+impl<'a> CodeSink for MemoryCodeSink<'a> {
+    fn offset(&self) -> CodeOffset {
+        self.offset as CodeOffset
+    }
+
+    fn put1(&mut self, x: u8) {
+        unsafe {
+            write_unaligned(self.data.offset(self.offset), x);
+        }
+        self.offset += 1;
+    }
+
+    fn put2(&mut self, x: u16) {
+        unsafe {
+            #[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))]
+            write_unaligned(self.data.offset(self.offset) as *mut u16, x);
+        }
+        self.offset += 2;
+    }
+
+    fn put4(&mut self, x: u32) {
+        unsafe {
+            #[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))]
+            write_unaligned(self.data.offset(self.offset) as *mut u32, x);
+        }
+        self.offset += 4;
+    }
+
+    fn put8(&mut self, x: u64) {
+        unsafe {
+            #[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))]
+            write_unaligned(self.data.offset(self.offset) as *mut u64, x);
+        }
+        self.offset += 8;
+    }
+
+    fn reloc_ebb(&mut self, rel: Reloc, ebb_offset: CodeOffset) {
+        let ofs = self.offset();
+        self.relocs.reloc_ebb(ofs, rel, ebb_offset);
+    }
+
+    fn reloc_external(&mut self, rel: Reloc, name: &ExternalName, addend: Addend) {
+        let ofs = self.offset();
+        self.relocs.reloc_external(ofs, rel, name, addend);
+    }
+
+    fn reloc_jt(&mut self, rel: Reloc, jt: JumpTable) {
+        let ofs = self.offset();
+        self.relocs.reloc_jt(ofs, rel, jt);
+    }
+
+    fn trap(&mut self, code: TrapCode, srcloc: SourceLoc) {
+        let ofs = self.offset();
+        self.traps.trap(ofs, srcloc, code);
+    }
+
+    fn begin_rodata(&mut self) {
+        self.code_size = self.offset;
+    }
+}
+
+/// A `TrapSink` implementation that does nothing, which is convenient when
+/// compiling code that does not rely on trapping semantics.
+pub struct NullTrapSink {}
+
+impl TrapSink for NullTrapSink {
+    fn trap(&mut self, _offset: CodeOffset, _srcloc: SourceLoc, _code: TrapCode) {}
+}
--- a/cranelift/codegen/src/binemit/mod.rs
+++ b/cranelift/codegen/src/binemit/mod.rs
@@ -0,0 +1,140 @@
+//! Binary machine code emission.
+//!
+//! The `binemit` module contains code for translating Cranelift's intermediate representation into
+//! binary machine code.
+
+mod memorysink;
+mod relaxation;
+mod shrink;
+
+pub use self::memorysink::{MemoryCodeSink, NullTrapSink, RelocSink, TrapSink};
+pub use self::relaxation::relax_branches;
+pub use self::shrink::shrink_instructions;
+pub use crate::regalloc::RegDiversions;
+
+use crate::ir::{ExternalName, Function, Inst, JumpTable, SourceLoc, TrapCode};
+use core::fmt;
+
+/// Offset in bytes from the beginning of the function.
+///
+/// Cranelift can be used as a cross compiler, so we don't want to use a type like `usize` which
+/// depends on the *host* platform, not the *target* platform.
+pub type CodeOffset = u32;
+
+/// Addend to add to the symbol value.
+pub type Addend = i64;
+
+/// Relocation kinds for every ISA
+#[derive(Copy, Clone, Debug)]
+pub enum Reloc {
+    /// absolute 4-byte
+    Abs4,
+    /// absolute 8-byte
+    Abs8,
+    /// x86 PC-relative 4-byte
+    X86PCRel4,
+    /// x86 call to PC-relative 4-byte
+    X86CallPCRel4,
+    /// x86 call to PLT-relative 4-byte
+    X86CallPLTRel4,
+    /// x86 GOT PC-relative 4-byte
+    X86GOTPCRel4,
+    /// Arm32 call target
+    Arm32Call,
+    /// Arm64 call target
+    Arm64Call,
+    /// RISC-V call target
+    RiscvCall,
+}
+
+impl fmt::Display for Reloc {
+    /// Display trait implementation drops the arch, since its used in contexts where the arch is
+    /// already unambiguous, e.g. clif syntax with isa specified. In other contexts, use Debug.
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match *self {
+            Reloc::Abs4 => write!(f, "Abs4"),
+            Reloc::Abs8 => write!(f, "Abs8"),
+            Reloc::X86PCRel4 => write!(f, "PCRel4"),
+            Reloc::X86CallPCRel4 => write!(f, "CallPCRel4"),
+            Reloc::X86CallPLTRel4 => write!(f, "CallPLTRel4"),
+            Reloc::X86GOTPCRel4 => write!(f, "GOTPCRel4"),
+            Reloc::Arm32Call | Reloc::Arm64Call | Reloc::RiscvCall => write!(f, "Call"),
+        }
+    }
+}
+
+/// Abstract interface for adding bytes to the code segment.
+///
+/// A `CodeSink` will receive all of the machine code for a function. It also accepts relocations
+/// which are locations in the code section that need to be fixed up when linking.
+pub trait CodeSink {
+    /// Get the current position.
+    fn offset(&self) -> CodeOffset;
+
+    /// Add 1 byte to the code section.
+    fn put1(&mut self, _: u8);
+
+    /// Add 2 bytes to the code section.
+    fn put2(&mut self, _: u16);
+
+    /// Add 4 bytes to the code section.
+    fn put4(&mut self, _: u32);
+
+    /// Add 8 bytes to the code section.
+    fn put8(&mut self, _: u64);
+
+    /// Add a relocation referencing an EBB at the current offset.
+    fn reloc_ebb(&mut self, _: Reloc, _: CodeOffset);
+
+    /// Add a relocation referencing an external symbol plus the addend at the current offset.
+    fn reloc_external(&mut self, _: Reloc, _: &ExternalName, _: Addend);
+
+    /// Add a relocation referencing a jump table.
+    fn reloc_jt(&mut self, _: Reloc, _: JumpTable);
+
+    /// Add trap information for the current offset.
+    fn trap(&mut self, _: TrapCode, _: SourceLoc);
+
+    /// Code output is complete, read-only data may follow.
+    fn begin_rodata(&mut self);
+}
+
+/// Report a bad encoding error.
+#[cold]
+pub fn bad_encoding(func: &Function, inst: Inst) -> ! {
+    panic!(
+        "Bad encoding {} for {}",
+        func.encodings[inst],
+        func.dfg.display_inst(inst, None)
+    );
+}
+
+/// Emit a function to `sink`, given an instruction emitter function.
+///
+/// This function is called from the `TargetIsa::emit_function()` implementations with the
+/// appropriate instruction emitter.
+pub fn emit_function<CS, EI>(func: &Function, emit_inst: EI, sink: &mut CS)
+where
+    CS: CodeSink,
+    EI: Fn(&Function, Inst, &mut RegDiversions, &mut CS),
+{
+    let mut divert = RegDiversions::new();
+    for ebb in func.layout.ebbs() {
+        divert.clear();
+        debug_assert_eq!(func.offsets[ebb], sink.offset());
+        for inst in func.layout.ebb_insts(ebb) {
+            emit_inst(func, inst, &mut divert, sink);
+        }
+    }
+
+    sink.begin_rodata();
+
+    // output jump tables
+    for (jt, jt_data) in func.jump_tables.iter() {
+        let jt_offset = func.jt_offsets[jt];
+        for ebb in jt_data.iter() {
+            let rel_offset: i32 = func.offsets[*ebb] as i32 - jt_offset as i32;
+            sink.put4(rel_offset as u32)
+        }
+    }
+}
--- a/cranelift/codegen/src/binemit/relaxation.rs
+++ b/cranelift/codegen/src/binemit/relaxation.rs
@@ -0,0 +1,216 @@
+//! Branch relaxation and offset computation.
+//!
+//! # EBB header offsets
+//!
+//! Before we can generate binary machine code for branch instructions, we need to know the final
+//! offsets of all the EBB headers in the function. This information is encoded in the
+//! `func.offsets` table.
+//!
+//! # Branch relaxation
+//!
+//! Branch relaxation is the process of ensuring that all branches in the function have enough
+//! range to encode their destination. It is common to have multiple branch encodings in an ISA.
+//! For example, x86 branches can have either an 8-bit or a 32-bit displacement.
+//!
+//! On RISC architectures, it can happen that conditional branches have a shorter range than
+//! unconditional branches:
+//!
+//! ```clif
+//!     brz v1, ebb17
+//! ```
+//!
+//! can be transformed into:
+//!
+//! ```clif
+//!     brnz v1, ebb23
+//!     jump ebb17
+//! ebb23:
+//! ```
+
+use crate::binemit::CodeOffset;
+use crate::cursor::{Cursor, FuncCursor};
+use crate::ir::{Function, InstructionData, Opcode};
+use crate::isa::{EncInfo, TargetIsa};
+use crate::iterators::IteratorExtras;
+use crate::regalloc::RegDiversions;
+use crate::timing;
+use crate::CodegenResult;
+use log::debug;
+
+/// Relax branches and compute the final layout of EBB headers in `func`.
+///
+/// Fill in the `func.offsets` table so the function is ready for binary emission.
+pub fn relax_branches(func: &mut Function, isa: &TargetIsa) -> CodegenResult<CodeOffset> {
+    let _tt = timing::relax_branches();
+
+    let encinfo = isa.encoding_info();
+
+    // Clear all offsets so we can recognize EBBs that haven't been visited yet.
+    func.offsets.clear();
+    func.offsets.resize(func.dfg.num_ebbs());
+
+    // Start by inserting fall through instructions.
+    fallthroughs(func);
+
+    let mut offset = 0;
+    let mut divert = RegDiversions::new();
+
+    // The relaxation algorithm iterates to convergence.
+    let mut go_again = true;
+    while go_again {
+        go_again = false;
+        offset = 0;
+
+        // Visit all instructions in layout order
+        let mut cur = FuncCursor::new(func);
+        while let Some(ebb) = cur.next_ebb() {
+            divert.clear();
+
+            // Record the offset for `ebb` and make sure we iterate until offsets are stable.
+            if cur.func.offsets[ebb] != offset {
+                debug_assert!(
+                    cur.func.offsets[ebb] < offset,
+                    "Code shrinking during relaxation"
+                );
+                cur.func.offsets[ebb] = offset;
+                go_again = true;
+            }
+
+            while let Some(inst) = cur.next_inst() {
+                divert.apply(&cur.func.dfg[inst]);
+
+                let enc = cur.func.encodings[inst];
+
+                // See if this might be a branch that is out of range.
+                if let Some(range) = encinfo.branch_range(enc) {
+                    if let Some(dest) = cur.func.dfg[inst].branch_destination() {
+                        let dest_offset = cur.func.offsets[dest];
+                        // This could be an out-of-range branch.
+                        // Relax it unless the destination offset has not been computed yet.
+                        if !range.contains(offset, dest_offset)
+                            && (dest_offset != 0 || Some(dest) == cur.func.layout.entry_block())
+                        {
+                            offset +=
+                                relax_branch(&mut cur, &divert, offset, dest_offset, &encinfo, isa);
+                            continue;
+                        }
+                    }
+                }
+
+                offset += encinfo.byte_size(enc, inst, &divert, &cur.func);
+            }
+        }
+    }
+
+    for (jt, jt_data) in func.jump_tables.iter() {
+        func.jt_offsets[jt] = offset;
+        // TODO: this should be computed based on the min size needed to hold
+        //        the furthest branch.
+        offset += jt_data.len() as u32 * 4;
+    }
+
+    Ok(offset)
+}
+
+/// Convert `jump` instructions to `fallthrough` instructions where possible and verify that any
+/// existing `fallthrough` instructions are correct.
+fn fallthroughs(func: &mut Function) {
+    for (ebb, succ) in func.layout.ebbs().adjacent_pairs() {
+        let term = func.layout.last_inst(ebb).expect("EBB has no terminator.");
+        if let InstructionData::Jump {
+            ref mut opcode,
+            destination,
+            ..
+        } = func.dfg[term]
+        {
+            match *opcode {
+                Opcode::Fallthrough => {
+                    // Somebody used a fall-through instruction before the branch relaxation pass.
+                    // Make sure it is correct, i.e. the destination is the layout successor.
+                    debug_assert_eq!(destination, succ, "Illegal fall-through in {}", ebb)
+                }
+                Opcode::Jump => {
+                    // If this is a jump to the successor EBB, change it to a fall-through.
+                    if destination == succ {
+                        *opcode = Opcode::Fallthrough;
+                        func.encodings[term] = Default::default();
+                    }
+                }
+                _ => {}
+            }
+        }
+    }
+}
+
+/// Relax the branch instruction at `cur` so it can cover the range `offset - dest_offset`.
+///
+/// Return the size of the replacement instructions up to and including the location where `cur` is
+/// left.
+fn relax_branch(
+    cur: &mut FuncCursor,
+    divert: &RegDiversions,
+    offset: CodeOffset,
+    dest_offset: CodeOffset,
+    encinfo: &EncInfo,
+    isa: &TargetIsa,
+) -> CodeOffset {
+    let inst = cur.current_inst().unwrap();
+    debug!(
+        "Relaxing [{}] {} for {:#x}-{:#x} range",
+        encinfo.display(cur.func.encodings[inst]),
+        cur.func.dfg.display_inst(inst, isa),
+        offset,
+        dest_offset
+    );
+
+    // Pick the first encoding that can handle the branch range.
+    let dfg = &cur.func.dfg;
+    let ctrl_type = dfg.ctrl_typevar(inst);
+    if let Some(enc) = isa
+        .legal_encodings(cur.func, &dfg[inst], ctrl_type)
+        .find(|&enc| {
+            let range = encinfo.branch_range(enc).expect("Branch with no range");
+            if !range.contains(offset, dest_offset) {
+                debug!("  trying [{}]: out of range", encinfo.display(enc));
+                false
+            } else if encinfo.operand_constraints(enc)
+                != encinfo.operand_constraints(cur.func.encodings[inst])
+            {
+                // Conservatively give up if the encoding has different constraints
+                // than the original, so that we don't risk picking a new encoding
+                // which the existing operands don't satisfy. We can't check for
+                // validity directly because we don't have a RegDiversions active so
+                // we don't know which registers are actually in use.
+                debug!("  trying [{}]: constraints differ", encinfo.display(enc));
+                false
+            } else {
+                debug!("  trying [{}]: OK", encinfo.display(enc));
+                true
+            }
+        })
+    {
+        cur.func.encodings[inst] = enc;
+        return encinfo.byte_size(enc, inst, &divert, &cur.func);
+    }
+
+    // Note: On some RISC ISAs, conditional branches have shorter range than unconditional
+    // branches, so one way of extending the range of a conditional branch is to invert its
+    // condition and make it branch over an unconditional jump which has the larger range.
+    //
+    // Splitting the EBB is problematic this late because there may be register diversions in
+    // effect across the conditional branch, and they can't survive the control flow edge to a new
+    // EBB. We have two options for handling that:
+    //
+    // 1. Set a flag on the new EBB that indicates it wants the preserve the register diversions of
+    //    its layout predecessor, or
+    // 2. Use an encoding macro for the branch-over-jump pattern so we don't need to split the EBB.
+    //
+    // It seems that 1. would allow us to share code among RISC ISAs that need this.
+    //
+    // We can't allow register diversions to survive from the layout predecessor because the layout
+    // predecessor could contain kill points for some values that are live in this EBB, and
+    // diversions are not automatically cancelled when the live range of a value ends.
+
+    // This assumes solution 2. above:
+    panic!("No branch in range for {:#x}-{:#x}", offset, dest_offset);
+}
--- a/cranelift/codegen/src/binemit/shrink.rs
+++ b/cranelift/codegen/src/binemit/shrink.rs
@@ -0,0 +1,70 @@
+//! Instruction shrinking.
+//!
+//! Sometimes there are multiple valid encodings for a given instruction. Cranelift often initially
+//! chooses the largest one, because this typically provides the register allocator the most
+//! flexibility. However, once register allocation is done, this is no longer important, and we
+//! can switch to smaller encodings when possible.
+
+use crate::ir::instructions::InstructionData;
+use crate::ir::Function;
+use crate::isa::TargetIsa;
+use crate::regalloc::RegDiversions;
+use crate::timing;
+use log::debug;
+
+/// Pick the smallest valid encodings for instructions.
+pub fn shrink_instructions(func: &mut Function, isa: &TargetIsa) {
+    let _tt = timing::shrink_instructions();
+
+    let encinfo = isa.encoding_info();
+    let mut divert = RegDiversions::new();
+
+    for ebb in func.layout.ebbs() {
+        divert.clear();
+        for inst in func.layout.ebb_insts(ebb) {
+            let enc = func.encodings[inst];
+            if enc.is_legal() {
+                // regmove/regfill/regspill are special instructions with register immediates
+                // that represented as normal operands, so the normal predicates below don't
+                // handle them correctly.
+                //
+                // Also, they need to be presented to the `RegDiversions` to update the
+                // location tracking.
+                //
+                // TODO: Eventually, we want the register allocator to avoid leaving these special
+                // instructions behind, but for now, just temporarily avoid trying to shrink them.
+                match func.dfg[inst] {
+                    InstructionData::RegMove { .. }
+                    | InstructionData::RegFill { .. }
+                    | InstructionData::RegSpill { .. } => {
+                        divert.apply(&func.dfg[inst]);
+                        continue;
+                    }
+                    _ => (),
+                }
+
+                let ctrl_type = func.dfg.ctrl_typevar(inst);
+
+                // Pick the last encoding with constraints that are satisfied.
+                let best_enc = isa
+                    .legal_encodings(func, &func.dfg[inst], ctrl_type)
+                    .filter(|e| encinfo.constraints[e.recipe()].satisfied(inst, &divert, &func))
+                    .min_by_key(|e| encinfo.byte_size(*e, inst, &divert, &func))
+                    .unwrap();
+
+                if best_enc != enc {
+                    func.encodings[inst] = best_enc;
+
+                    debug!(
+                        "Shrunk [{}] to [{}] in {}, reducing the size from {} to {}",
+                        encinfo.display(enc),
+                        encinfo.display(best_enc),
+                        func.dfg.display_inst(inst, isa),
+                        encinfo.byte_size(enc, inst, &divert, &func),
+                        encinfo.byte_size(best_enc, inst, &divert, &func)
+                    );
+                }
+            }
+        }
+    }
+}
--- a/cranelift/codegen/src/bitset.rs
+++ b/cranelift/codegen/src/bitset.rs
@@ -0,0 +1,161 @@
+//! Small Bitset
+//!
+//! This module defines a struct `BitSet<T>` encapsulating a bitset built over the type T.
+//! T is intended to be a primitive unsigned type. Currently it can be any type between u8 and u32
+//!
+//! If you would like to add support for larger bitsets in the future, you need to change the trait
+//! bound Into<u32> and the u32 in the implementation of `max_bits()`.
+use core::convert::{From, Into};
+use core::mem::size_of;
+use core::ops::{Add, BitOr, Shl, Sub};
+
+/// A small bitset built on a single primitive integer type
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub struct BitSet<T>(pub T);
+
+impl<T> BitSet<T>
+where
+    T: Into<u32>
+        + From<u8>
+        + BitOr<T, Output = T>
+        + Shl<u8, Output = T>
+        + Sub<T, Output = T>
+        + Add<T, Output = T>
+        + PartialEq
+        + Copy,
+{
+    /// Maximum number of bits supported by this BitSet instance
+    pub fn bits() -> usize {
+        size_of::<T>() * 8
+    }
+
+    /// Maximum number of bits supported by any bitset instance atm.
+    pub fn max_bits() -> usize {
+        size_of::<u32>() * 8
+    }
+
+    /// Check if this BitSet contains the number num
+    pub fn contains(&self, num: u8) -> bool {
+        debug_assert!((num as usize) < Self::bits());
+        debug_assert!((num as usize) < Self::max_bits());
+        self.0.into() & (1 << num) != 0
+    }
+
+    /// Return the smallest number contained in the bitset or None if empty
+    pub fn min(&self) -> Option<u8> {
+        if self.0.into() == 0 {
+            None
+        } else {
+            Some(self.0.into().trailing_zeros() as u8)
+        }
+    }
+
+    /// Return the largest number contained in the bitset or None if empty
+    pub fn max(&self) -> Option<u8> {
+        if self.0.into() == 0 {
+            None
+        } else {
+            let leading_zeroes = self.0.into().leading_zeros() as usize;
+            Some((Self::max_bits() - leading_zeroes - 1) as u8)
+        }
+    }
+
+    /// Construct a BitSet with the half-open range [lo,hi) filled in
+    pub fn from_range(lo: u8, hi: u8) -> Self {
+        debug_assert!(lo <= hi);
+        debug_assert!((hi as usize) <= Self::bits());
+        let one: T = T::from(1);
+        // I can't just do (one << hi) - one here as the shift may overflow
+        let hi_rng = if hi >= 1 {
+            (one << (hi - 1)) + ((one << (hi - 1)) - one)
+        } else {
+            T::from(0)
+        };
+
+        let lo_rng = (one << lo) - one;
+
+        BitSet(hi_rng - lo_rng)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn contains() {
+        let s = BitSet::<u8>(255);
+        for i in 0..7 {
+            assert!(s.contains(i));
+        }
+
+        let s1 = BitSet::<u8>(0);
+        for i in 0..7 {
+            assert!(!s1.contains(i));
+        }
+
+        let s2 = BitSet::<u8>(127);
+        for i in 0..6 {
+            assert!(s2.contains(i));
+        }
+        assert!(!s2.contains(7));
+
+        let s3 = BitSet::<u8>(2 | 4 | 64);
+        assert!(!s3.contains(0) && !s3.contains(3) && !s3.contains(4));
+        assert!(!s3.contains(5) && !s3.contains(7));
+        assert!(s3.contains(1) && s3.contains(2) && s3.contains(6));
+
+        let s4 = BitSet::<u16>(4 | 8 | 256 | 1024);
+        assert!(
+            !s4.contains(0)
+                && !s4.contains(1)
+                && !s4.contains(4)
+                && !s4.contains(5)
+                && !s4.contains(6)
+                && !s4.contains(7)
+                && !s4.contains(9)
+                && !s4.contains(11)
+        );
+        assert!(s4.contains(2) && s4.contains(3) && s4.contains(8) && s4.contains(10));
+    }
+
+    #[test]
+    fn minmax() {
+        let s = BitSet::<u8>(255);
+        assert_eq!(s.min(), Some(0));
+        assert_eq!(s.max(), Some(7));
+        assert!(s.min() == Some(0) && s.max() == Some(7));
+        let s1 = BitSet::<u8>(0);
+        assert!(s1.min() == None && s1.max() == None);
+        let s2 = BitSet::<u8>(127);
+        assert!(s2.min() == Some(0) && s2.max() == Some(6));
+        let s3 = BitSet::<u8>(2 | 4 | 64);
+        assert!(s3.min() == Some(1) && s3.max() == Some(6));
+        let s4 = BitSet::<u16>(4 | 8 | 256 | 1024);
+        assert!(s4.min() == Some(2) && s4.max() == Some(10));
+    }
+
+    #[test]
+    fn from_range() {
+        let s = BitSet::<u8>::from_range(5, 5);
+        assert!(s.0 == 0);
+
+        let s = BitSet::<u8>::from_range(0, 8);
+        assert!(s.0 == 255);
+
+        let s = BitSet::<u16>::from_range(0, 8);
+        assert!(s.0 == 255u16);
+
+        let s = BitSet::<u16>::from_range(0, 16);
+        assert!(s.0 == 65535u16);
+
+        let s = BitSet::<u8>::from_range(5, 6);
+        assert!(s.0 == 32u8);
+
+        let s = BitSet::<u8>::from_range(3, 7);
+        assert!(s.0 == 8 | 16 | 32 | 64);
+
+        let s = BitSet::<u16>::from_range(5, 11);
+        assert!(s.0 == 32 | 64 | 128 | 256 | 512 | 1024);
+    }
+}
--- a/cranelift/codegen/src/cfg_printer.rs
+++ b/cranelift/codegen/src/cfg_printer.rs
@@ -0,0 +1,79 @@
+//! The `CFGPrinter` utility.
+
+use core::fmt::{Display, Formatter, Result, Write};
+
+use crate::flowgraph::{BasicBlock, ControlFlowGraph};
+use crate::ir::instructions::BranchInfo;
+use crate::ir::Function;
+
+/// A utility for pretty-printing the CFG of a `Function`.
+pub struct CFGPrinter<'a> {
+    func: &'a Function,
+    cfg: ControlFlowGraph,
+}
+
+/// A utility for pretty-printing the CFG of a `Function`.
+impl<'a> CFGPrinter<'a> {
+    /// Create a new CFGPrinter.
+    pub fn new(func: &'a Function) -> Self {
+        Self {
+            func,
+            cfg: ControlFlowGraph::with_function(func),
+        }
+    }
+
+    /// Write the CFG for this function to `w`.
+    pub fn write(&self, w: &mut Write) -> Result {
+        self.header(w)?;
+        self.ebb_nodes(w)?;
+        self.cfg_connections(w)?;
+        writeln!(w, "}}")
+    }
+
+    fn header(&self, w: &mut Write) -> Result {
+        writeln!(w, "digraph \"{}\" {{", self.func.name)?;
+        if let Some(entry) = self.func.layout.entry_block() {
+            writeln!(w, "    {{rank=min; {}}}", entry)?;
+        }
+        Ok(())
+    }
+
+    fn ebb_nodes(&self, w: &mut Write) -> Result {
+        for ebb in &self.func.layout {
+            write!(w, "    {} [shape=record, label=\"{{{}", ebb, ebb)?;
+            // Add all outgoing branch instructions to the label.
+            for inst in self.func.layout.ebb_insts(ebb) {
+                let idata = &self.func.dfg[inst];
+                match idata.analyze_branch(&self.func.dfg.value_lists) {
+                    BranchInfo::SingleDest(dest, _) => {
+                        write!(w, " | <{}>{} {}", inst, idata.opcode(), dest)?
+                    }
+                    BranchInfo::Table(table, dest) => {
+                        write!(w, " | <{}>{} {}", inst, idata.opcode(), table)?;
+                        if let Some(dest) = dest {
+                            write!(w, " {}", dest)?
+                        }
+                    }
+                    BranchInfo::NotABranch => {}
+                }
+            }
+            writeln!(w, "}}\"]")?
+        }
+        Ok(())
+    }
+
+    fn cfg_connections(&self, w: &mut Write) -> Result {
+        for ebb in &self.func.layout {
+            for BasicBlock { ebb: parent, inst } in self.cfg.pred_iter(ebb) {
+                writeln!(w, "    {}:{} -> {}", parent, inst, ebb)?;
+            }
+        }
+        Ok(())
+    }
+}
+
+impl<'a> Display for CFGPrinter<'a> {
+    fn fmt(&self, f: &mut Formatter) -> Result {
+        self.write(f)
+    }
+}
--- a/cranelift/codegen/src/constant_hash.rs
+++ b/cranelift/codegen/src/constant_hash.rs
@@ -0,0 +1,78 @@
+//! Runtime support for precomputed constant hash tables.
+//!
+//! The `cranelift-codegen/meta-python/constant_hash.py` Python module can generate constant hash tables
+//! using open addressing and quadratic probing. The hash tables are arrays that are guaranteed to:
+//!
+//! - Have a power-of-two size.
+//! - Contain at least one empty slot.
+//!
+//! This module provides runtime support for lookups in these tables.
+
+/// Trait that must be implemented by the entries in a constant hash table.
+pub trait Table<K: Copy + Eq> {
+    /// Get the number of entries in this table which must be a power of two.
+    fn len(&self) -> usize;
+
+    /// Get the key corresponding to the entry at `idx`, or `None` if the entry is empty.
+    /// The `idx` must be in range.
+    fn key(&self, idx: usize) -> Option<K>;
+}
+
+/// Look for `key` in `table`.
+///
+/// The provided `hash` value must have been computed from `key` using the same hash function that
+/// was used to construct the table.
+///
+/// Returns `Ok(idx)` with the table index containing the found entry, or `Err(idx)` with the empty
+/// sentinel entry if no entry could be found.
+pub fn probe<K: Copy + Eq, T: Table<K> + ?Sized>(
+    table: &T,
+    key: K,
+    hash: usize,
+) -> Result<usize, usize> {
+    debug_assert!(table.len().is_power_of_two());
+    let mask = table.len() - 1;
+
+    let mut idx = hash;
+    let mut step = 0;
+
+    loop {
+        idx &= mask;
+
+        match table.key(idx) {
+            None => return Err(idx),
+            Some(k) if k == key => return Ok(idx),
+            _ => {}
+        }
+
+        // Quadratic probing.
+        step += 1;
+        // When `table.len()` is a power of two, it can be proven that `idx` will visit all
+        // entries. This means that this loop will always terminate if the hash table has even
+        // one unused entry.
+        debug_assert!(step < table.len());
+        idx += step;
+    }
+}
+
+/// A primitive hash function for matching opcodes.
+/// Must match `cranelift-codegen/meta-python/constant_hash.py` and `cranelift-codegen/meta/constant_hash.rs`.
+pub fn simple_hash(s: &str) -> usize {
+    let mut h: u32 = 5381;
+    for c in s.chars() {
+        h = (h ^ c as u32).wrapping_add(h.rotate_right(6));
+    }
+    h as usize
+}
+
+#[cfg(test)]
+mod tests {
+    use super::simple_hash;
+
+    #[test]
+    fn basic() {
+        // c.f. `meta-python/constant_hash.py` tests.
+        assert_eq!(simple_hash("Hello"), 0x2fa70c01);
+        assert_eq!(simple_hash("world"), 0x5b0c31d5);
+    }
+}
--- a/cranelift/codegen/src/context.rs
+++ b/cranelift/codegen/src/context.rs
@@ -0,0 +1,327 @@
+//! Cranelift compilation context and main entry point.
+//!
+//! When compiling many small functions, it is important to avoid repeatedly allocating and
+//! deallocating the data structures needed for compilation. The `Context` struct is used to hold
+//! on to memory allocations between function compilations.
+//!
+//! The context does not hold a `TargetIsa` instance which has to be provided as an argument
+//! instead. This is because an ISA instance is immutable and can be used by multiple compilation
+//! contexts concurrently. Typically, you would have one context per compilation thread and only a
+//! single ISA instance.
+
+use crate::binemit::{
+    relax_branches, shrink_instructions, CodeOffset, MemoryCodeSink, RelocSink, TrapSink,
+};
+use crate::dce::do_dce;
+use crate::dominator_tree::DominatorTree;
+use crate::flowgraph::ControlFlowGraph;
+use crate::ir::Function;
+use crate::isa::TargetIsa;
+use crate::legalize_function;
+use crate::licm::do_licm;
+use crate::loop_analysis::LoopAnalysis;
+use crate::nan_canonicalization::do_nan_canonicalization;
+use crate::postopt::do_postopt;
+use crate::regalloc;
+use crate::result::CodegenResult;
+use crate::settings::{FlagsOrIsa, OptLevel};
+use crate::simple_gvn::do_simple_gvn;
+use crate::simple_preopt::do_preopt;
+use crate::timing;
+use crate::unreachable_code::eliminate_unreachable_code;
+use crate::verifier::{verify_context, verify_locations, VerifierErrors, VerifierResult};
+use std::vec::Vec;
+
+/// Persistent data structures and compilation pipeline.
+pub struct Context {
+    /// The function we're compiling.
+    pub func: Function,
+
+    /// The control flow graph of `func`.
+    pub cfg: ControlFlowGraph,
+
+    /// Dominator tree for `func`.
+    pub domtree: DominatorTree,
+
+    /// Register allocation context.
+    pub regalloc: regalloc::Context,
+
+    /// Loop analysis of `func`.
+    pub loop_analysis: LoopAnalysis,
+}
+
+impl Context {
+    /// Allocate a new compilation context.
+    ///
+    /// The returned instance should be reused for compiling multiple functions in order to avoid
+    /// needless allocator thrashing.
+    pub fn new() -> Self {
+        Self::for_function(Function::new())
+    }
+
+    /// Allocate a new compilation context with an existing Function.
+    ///
+    /// The returned instance should be reused for compiling multiple functions in order to avoid
+    /// needless allocator thrashing.
+    pub fn for_function(func: Function) -> Self {
+        Self {
+            func,
+            cfg: ControlFlowGraph::new(),
+            domtree: DominatorTree::new(),
+            regalloc: regalloc::Context::new(),
+            loop_analysis: LoopAnalysis::new(),
+        }
+    }
+
+    /// Clear all data structures in this context.
+    pub fn clear(&mut self) {
+        self.func.clear();
+        self.cfg.clear();
+        self.domtree.clear();
+        self.regalloc.clear();
+        self.loop_analysis.clear();
+    }
+
+    /// Compile the function, and emit machine code into a `Vec<u8>`.
+    ///
+    /// Run the function through all the passes necessary to generate code for the target ISA
+    /// represented by `isa`, as well as the final step of emitting machine code into a
+    /// `Vec<u8>`. The machine code is not relocated. Instead, any relocations are emitted
+    /// into `relocs`.
+    ///
+    /// This function calls `compile` and `emit_to_memory`, taking care to resize `mem` as
+    /// needed, so it provides a safe interface.
+    pub fn compile_and_emit(
+        &mut self,
+        isa: &TargetIsa,
+        mem: &mut Vec<u8>,
+        relocs: &mut RelocSink,
+        traps: &mut TrapSink,
+    ) -> CodegenResult<()> {
+        let code_size = self.compile(isa)?;
+        let old_len = mem.len();
+        mem.resize(old_len + code_size as usize, 0);
+        unsafe { self.emit_to_memory(isa, mem.as_mut_ptr().add(old_len), relocs, traps) };
+        Ok(())
+    }
+
+    /// Compile the function.
+    ///
+    /// Run the function through all the passes necessary to generate code for the target ISA
+    /// represented by `isa`. This does not include the final step of emitting machine code into a
+    /// code sink.
+    ///
+    /// Returns the size of the function's code.
+    pub fn compile(&mut self, isa: &TargetIsa) -> CodegenResult<CodeOffset> {
+        let _tt = timing::compile();
+        self.verify_if(isa)?;
+
+        self.compute_cfg();
+        if isa.flags().opt_level() != OptLevel::Fastest {
+            self.preopt(isa)?;
+        }
+        if isa.flags().enable_nan_canonicalization() {
+            self.canonicalize_nans(isa)?;
+        }
+        self.legalize(isa)?;
+        if isa.flags().opt_level() != OptLevel::Fastest {
+            self.postopt(isa)?;
+        }
+        if isa.flags().opt_level() == OptLevel::Best {
+            self.compute_domtree();
+            self.compute_loop_analysis();
+            self.licm(isa)?;
+            self.simple_gvn(isa)?;
+        }
+        self.compute_domtree();
+        self.eliminate_unreachable_code(isa)?;
+        if isa.flags().opt_level() != OptLevel::Fastest {
+            self.dce(isa)?;
+        }
+        self.regalloc(isa)?;
+        self.prologue_epilogue(isa)?;
+        if isa.flags().opt_level() == OptLevel::Best {
+            self.shrink_instructions(isa)?;
+        }
+        self.relax_branches(isa)
+    }
+
+    /// Emit machine code directly into raw memory.
+    ///
+    /// Write all of the function's machine code to the memory at `mem`. The size of the machine
+    /// code is returned by `compile` above.
+    ///
+    /// The machine code is not relocated. Instead, any relocations are emitted into `relocs`.
+    ///
+    /// This function is unsafe since it does not perform bounds checking on the memory buffer,
+    /// and it can't guarantee that the `mem` pointer is valid.
+    pub unsafe fn emit_to_memory(
+        &self,
+        isa: &TargetIsa,
+        mem: *mut u8,
+        relocs: &mut RelocSink,
+        traps: &mut TrapSink,
+    ) {
+        let _tt = timing::binemit();
+        isa.emit_function_to_memory(&self.func, &mut MemoryCodeSink::new(mem, relocs, traps));
+    }
+
+    /// Run the verifier on the function.
+    ///
+    /// Also check that the dominator tree and control flow graph are consistent with the function.
+    pub fn verify<'a, FOI: Into<FlagsOrIsa<'a>>>(&self, fisa: FOI) -> VerifierResult<()> {
+        let mut errors = VerifierErrors::default();
+        let _ = verify_context(&self.func, &self.cfg, &self.domtree, fisa, &mut errors);
+
+        if errors.is_empty() {
+            Ok(())
+        } else {
+            Err(errors)
+        }
+    }
+
+    /// Run the verifier only if the `enable_verifier` setting is true.
+    pub fn verify_if<'a, FOI: Into<FlagsOrIsa<'a>>>(&self, fisa: FOI) -> CodegenResult<()> {
+        let fisa = fisa.into();
+        if fisa.flags.enable_verifier() {
+            self.verify(fisa)?;
+        }
+        Ok(())
+    }
+
+    /// Run the locations verifier on the function.
+    pub fn verify_locations(&self, isa: &TargetIsa) -> VerifierResult<()> {
+        let mut errors = VerifierErrors::default();
+        let _ = verify_locations(isa, &self.func, None, &mut errors);
+
+        if errors.is_empty() {
+            Ok(())
+        } else {
+            Err(errors)
+        }
+    }
+
+    /// Run the locations verifier only if the `enable_verifier` setting is true.
+    pub fn verify_locations_if(&self, isa: &TargetIsa) -> CodegenResult<()> {
+        if isa.flags().enable_verifier() {
+            self.verify_locations(isa)?;
+        }
+        Ok(())
+    }
+
+    /// Perform dead-code elimination on the function.
+    pub fn dce<'a, FOI: Into<FlagsOrIsa<'a>>>(&mut self, fisa: FOI) -> CodegenResult<()> {
+        do_dce(&mut self.func, &mut self.domtree);
+        self.verify_if(fisa)?;
+        Ok(())
+    }
+
+    /// Perform pre-legalization rewrites on the function.
+    pub fn preopt(&mut self, isa: &TargetIsa) -> CodegenResult<()> {
+        do_preopt(&mut self.func);
+        self.verify_if(isa)?;
+        Ok(())
+    }
+
+    /// Perform NaN canonicalizing rewrites on the function.
+    pub fn canonicalize_nans(&mut self, isa: &TargetIsa) -> CodegenResult<()> {
+        do_nan_canonicalization(&mut self.func);
+        self.verify_if(isa)
+    }
+
+    /// Run the legalizer for `isa` on the function.
+    pub fn legalize(&mut self, isa: &TargetIsa) -> CodegenResult<()> {
+        // Legalization invalidates the domtree and loop_analysis by mutating the CFG.
+        // TODO: Avoid doing this when legalization doesn't actually mutate the CFG.
+        self.domtree.clear();
+        self.loop_analysis.clear();
+        legalize_function(&mut self.func, &mut self.cfg, isa);
+        self.verify_if(isa)
+    }
+
+    /// Perform post-legalization rewrites on the function.
+    pub fn postopt(&mut self, isa: &TargetIsa) -> CodegenResult<()> {
+        do_postopt(&mut self.func, isa);
+        self.verify_if(isa)?;
+        Ok(())
+    }
+
+    /// Compute the control flow graph.
+    pub fn compute_cfg(&mut self) {
+        self.cfg.compute(&self.func)
+    }
+
+    /// Compute dominator tree.
+    pub fn compute_domtree(&mut self) {
+        self.domtree.compute(&self.func, &self.cfg)
+    }
+
+    /// Compute the loop analysis.
+    pub fn compute_loop_analysis(&mut self) {
+        self.loop_analysis
+            .compute(&self.func, &self.cfg, &self.domtree)
+    }
+
+    /// Compute the control flow graph and dominator tree.
+    pub fn flowgraph(&mut self) {
+        self.compute_cfg();
+        self.compute_domtree()
+    }
+
+    /// Perform simple GVN on the function.
+    pub fn simple_gvn<'a, FOI: Into<FlagsOrIsa<'a>>>(&mut self, fisa: FOI) -> CodegenResult<()> {
+        do_simple_gvn(&mut self.func, &mut self.domtree);
+        self.verify_if(fisa)
+    }
+
+    /// Perform LICM on the function.
+    pub fn licm(&mut self, isa: &TargetIsa) -> CodegenResult<()> {
+        do_licm(
+            isa,
+            &mut self.func,
+            &mut self.cfg,
+            &mut self.domtree,
+            &mut self.loop_analysis,
+        );
+        self.verify_if(isa)
+    }
+
+    /// Perform unreachable code elimination.
+    pub fn eliminate_unreachable_code<'a, FOI>(&mut self, fisa: FOI) -> CodegenResult<()>
+    where
+        FOI: Into<FlagsOrIsa<'a>>,
+    {
+        eliminate_unreachable_code(&mut self.func, &mut self.cfg, &self.domtree);
+        self.verify_if(fisa)
+    }
+
+    /// Run the register allocator.
+    pub fn regalloc(&mut self, isa: &TargetIsa) -> CodegenResult<()> {
+        self.regalloc
+            .run(isa, &mut self.func, &self.cfg, &mut self.domtree)
+    }
+
+    /// Insert prologue and epilogues after computing the stack frame layout.
+    pub fn prologue_epilogue(&mut self, isa: &TargetIsa) -> CodegenResult<()> {
+        isa.prologue_epilogue(&mut self.func)?;
+        self.verify_if(isa)?;
+        self.verify_locations_if(isa)?;
+        Ok(())
+    }
+
+    /// Run the instruction shrinking pass.
+    pub fn shrink_instructions(&mut self, isa: &TargetIsa) -> CodegenResult<()> {
+        shrink_instructions(&mut self.func, isa);
+        self.verify_if(isa)?;
+        self.verify_locations_if(isa)?;
+        Ok(())
+    }
+
+    /// Run the branch relaxation pass and return the final code size.
+    pub fn relax_branches(&mut self, isa: &TargetIsa) -> CodegenResult<CodeOffset> {
+        let code_size = relax_branches(&mut self.func, isa)?;
+        self.verify_if(isa)?;
+        self.verify_locations_if(isa)?;
+        Ok(code_size)
+    }
+}
--- a/cranelift/codegen/src/cursor.rs
+++ b/cranelift/codegen/src/cursor.rs
@@ -0,0 +1,765 @@
+//! Cursor library.
+//!
+//! This module defines cursor data types that can be used for inserting instructions.
+
+use crate::ir;
+use crate::isa::TargetIsa;
+
+/// The possible positions of a cursor.
+#[derive(Clone, Copy, PartialEq, Eq, Debug)]
+pub enum CursorPosition {
+    /// Cursor is not pointing anywhere. No instructions can be inserted.
+    Nowhere,
+    /// Cursor is pointing at an existing instruction.
+    /// New instructions will be inserted *before* the current instruction.
+    At(ir::Inst),
+    /// Cursor is before the beginning of an EBB. No instructions can be inserted. Calling
+    /// `next_inst()` will move to the first instruction in the EBB.
+    Before(ir::Ebb),
+    /// Cursor is pointing after the end of an EBB.
+    /// New instructions will be appended to the EBB.
+    After(ir::Ebb),
+}
+
+/// All cursor types implement the `Cursor` which provides common navigation operations.
+pub trait Cursor {
+    /// Get the current cursor position.
+    fn position(&self) -> CursorPosition;
+
+    /// Set the current position.
+    fn set_position(&mut self, pos: CursorPosition);
+
+    /// Get the source location that should be assigned to new instructions.
+    fn srcloc(&self) -> ir::SourceLoc;
+
+    /// Set the source location that should be assigned to new instructions.
+    fn set_srcloc(&mut self, srcloc: ir::SourceLoc);
+
+    /// Borrow a reference to the function layout that this cursor is navigating.
+    fn layout(&self) -> &ir::Layout;
+
+    /// Borrow a mutable reference to the function layout that this cursor is navigating.
+    fn layout_mut(&mut self) -> &mut ir::Layout;
+
+    /// Exchange this cursor for one with a set source location.
+    ///
+    /// This is intended to be used as a builder method:
+    ///
+    /// ```
+    /// # use cranelift_codegen::ir::{Function, Ebb, SourceLoc};
+    /// # use cranelift_codegen::cursor::{Cursor, FuncCursor};
+    /// fn edit_func(func: &mut Function, srcloc: SourceLoc) {
+    ///     let mut pos = FuncCursor::new(func).with_srcloc(srcloc);
+    ///
+    ///     // Use `pos`...
+    /// }
+    /// ```
+    fn with_srcloc(mut self, srcloc: ir::SourceLoc) -> Self
+    where
+        Self: Sized,
+    {
+        self.set_srcloc(srcloc);
+        self
+    }
+
+    /// Rebuild this cursor positioned at `pos`.
+    fn at_position(mut self, pos: CursorPosition) -> Self
+    where
+        Self: Sized,
+    {
+        self.set_position(pos);
+        self
+    }
+
+    /// Rebuild this cursor positioned at `inst`.
+    ///
+    /// This is intended to be used as a builder method:
+    ///
+    /// ```
+    /// # use cranelift_codegen::ir::{Function, Ebb, Inst};
+    /// # use cranelift_codegen::cursor::{Cursor, FuncCursor};
+    /// fn edit_func(func: &mut Function, inst: Inst) {
+    ///     let mut pos = FuncCursor::new(func).at_inst(inst);
+    ///
+    ///     // Use `pos`...
+    /// }
+    /// ```
+    fn at_inst(mut self, inst: ir::Inst) -> Self
+    where
+        Self: Sized,
+    {
+        self.goto_inst(inst);
+        self
+    }
+
+    /// Rebuild this cursor positioned at the first insertion point for `ebb`.
+    /// This differs from `at_first_inst` in that it doesn't assume that any
+    /// instructions have been inserted into `ebb` yet.
+    ///
+    /// This is intended to be used as a builder method:
+    ///
+    /// ```
+    /// # use cranelift_codegen::ir::{Function, Ebb, Inst};
+    /// # use cranelift_codegen::cursor::{Cursor, FuncCursor};
+    /// fn edit_func(func: &mut Function, ebb: Ebb) {
+    ///     let mut pos = FuncCursor::new(func).at_first_insertion_point(ebb);
+    ///
+    ///     // Use `pos`...
+    /// }
+    /// ```
+    fn at_first_insertion_point(mut self, ebb: ir::Ebb) -> Self
+    where
+        Self: Sized,
+    {
+        self.goto_first_insertion_point(ebb);
+        self
+    }
+
+    /// Rebuild this cursor positioned at the first instruction in `ebb`.
+    ///
+    /// This is intended to be used as a builder method:
+    ///
+    /// ```
+    /// # use cranelift_codegen::ir::{Function, Ebb, Inst};
+    /// # use cranelift_codegen::cursor::{Cursor, FuncCursor};
+    /// fn edit_func(func: &mut Function, ebb: Ebb) {
+    ///     let mut pos = FuncCursor::new(func).at_first_inst(ebb);
+    ///
+    ///     // Use `pos`...
+    /// }
+    /// ```
+    fn at_first_inst(mut self, ebb: ir::Ebb) -> Self
+    where
+        Self: Sized,
+    {
+        self.goto_first_inst(ebb);
+        self
+    }
+
+    /// Rebuild this cursor positioned at the last instruction in `ebb`.
+    ///
+    /// This is intended to be used as a builder method:
+    ///
+    /// ```
+    /// # use cranelift_codegen::ir::{Function, Ebb, Inst};
+    /// # use cranelift_codegen::cursor::{Cursor, FuncCursor};
+    /// fn edit_func(func: &mut Function, ebb: Ebb) {
+    ///     let mut pos = FuncCursor::new(func).at_last_inst(ebb);
+    ///
+    ///     // Use `pos`...
+    /// }
+    /// ```
+    fn at_last_inst(mut self, ebb: ir::Ebb) -> Self
+    where
+        Self: Sized,
+    {
+        self.goto_last_inst(ebb);
+        self
+    }
+
+    /// Rebuild this cursor positioned after `inst`.
+    ///
+    /// This is intended to be used as a builder method:
+    ///
+    /// ```
+    /// # use cranelift_codegen::ir::{Function, Ebb, Inst};
+    /// # use cranelift_codegen::cursor::{Cursor, FuncCursor};
+    /// fn edit_func(func: &mut Function, inst: Inst) {
+    ///     let mut pos = FuncCursor::new(func).after_inst(inst);
+    ///
+    ///     // Use `pos`...
+    /// }
+    /// ```
+    fn after_inst(mut self, inst: ir::Inst) -> Self
+    where
+        Self: Sized,
+    {
+        self.goto_after_inst(inst);
+        self
+    }
+
+    /// Rebuild this cursor positioned at the top of `ebb`.
+    ///
+    /// This is intended to be used as a builder method:
+    ///
+    /// ```
+    /// # use cranelift_codegen::ir::{Function, Ebb, Inst};
+    /// # use cranelift_codegen::cursor::{Cursor, FuncCursor};
+    /// fn edit_func(func: &mut Function, ebb: Ebb) {
+    ///     let mut pos = FuncCursor::new(func).at_top(ebb);
+    ///
+    ///     // Use `pos`...
+    /// }
+    /// ```
+    fn at_top(mut self, ebb: ir::Ebb) -> Self
+    where
+        Self: Sized,
+    {
+        self.goto_top(ebb);
+        self
+    }
+
+    /// Rebuild this cursor positioned at the bottom of `ebb`.
+    ///
+    /// This is intended to be used as a builder method:
+    ///
+    /// ```
+    /// # use cranelift_codegen::ir::{Function, Ebb, Inst};
+    /// # use cranelift_codegen::cursor::{Cursor, FuncCursor};
+    /// fn edit_func(func: &mut Function, ebb: Ebb) {
+    ///     let mut pos = FuncCursor::new(func).at_bottom(ebb);
+    ///
+    ///     // Use `pos`...
+    /// }
+    /// ```
+    fn at_bottom(mut self, ebb: ir::Ebb) -> Self
+    where
+        Self: Sized,
+    {
+        self.goto_bottom(ebb);
+        self
+    }
+
+    /// Get the EBB corresponding to the current position.
+    fn current_ebb(&self) -> Option<ir::Ebb> {
+        use self::CursorPosition::*;
+        match self.position() {
+            Nowhere => None,
+            At(inst) => self.layout().inst_ebb(inst),
+            Before(ebb) | After(ebb) => Some(ebb),
+        }
+    }
+
+    /// Get the instruction corresponding to the current position, if any.
+    fn current_inst(&self) -> Option<ir::Inst> {
+        use self::CursorPosition::*;
+        match self.position() {
+            At(inst) => Some(inst),
+            _ => None,
+        }
+    }
+
+    /// Go to the position after a specific instruction, which must be inserted
+    /// in the layout. New instructions will be inserted after `inst`.
+    fn goto_after_inst(&mut self, inst: ir::Inst) {
+        debug_assert!(self.layout().inst_ebb(inst).is_some());
+        let new_pos = if let Some(next) = self.layout().next_inst(inst) {
+            CursorPosition::At(next)
+        } else {
+            CursorPosition::After(
+                self.layout()
+                    .inst_ebb(inst)
+                    .expect("current instruction removed?"),
+            )
+        };
+        self.set_position(new_pos);
+    }
+
+    /// Go to a specific instruction which must be inserted in the layout.
+    /// New instructions will be inserted before `inst`.
+    fn goto_inst(&mut self, inst: ir::Inst) {
+        debug_assert!(self.layout().inst_ebb(inst).is_some());
+        self.set_position(CursorPosition::At(inst));
+    }
+
+    /// Go to the position for inserting instructions at the beginning of `ebb`,
+    /// which unlike `goto_first_inst` doesn't assume that any instructions have
+    /// been inserted into `ebb` yet.
+    fn goto_first_insertion_point(&mut self, ebb: ir::Ebb) {
+        if let Some(inst) = self.layout().first_inst(ebb) {
+            self.goto_inst(inst);
+        } else {
+            self.goto_bottom(ebb);
+        }
+    }
+
+    /// Go to the first instruction in `ebb`.
+    fn goto_first_inst(&mut self, ebb: ir::Ebb) {
+        let inst = self.layout().first_inst(ebb).expect("Empty EBB");
+        self.goto_inst(inst);
+    }
+
+    /// Go to the last instruction in `ebb`.
+    fn goto_last_inst(&mut self, ebb: ir::Ebb) {
+        let inst = self.layout().last_inst(ebb).expect("Empty EBB");
+        self.goto_inst(inst);
+    }
+
+    /// Go to the top of `ebb` which must be inserted into the layout.
+    /// At this position, instructions cannot be inserted, but `next_inst()` will move to the first
+    /// instruction in `ebb`.
+    fn goto_top(&mut self, ebb: ir::Ebb) {
+        debug_assert!(self.layout().is_ebb_inserted(ebb));
+        self.set_position(CursorPosition::Before(ebb));
+    }
+
+    /// Go to the bottom of `ebb` which must be inserted into the layout.
+    /// At this position, inserted instructions will be appended to `ebb`.
+    fn goto_bottom(&mut self, ebb: ir::Ebb) {
+        debug_assert!(self.layout().is_ebb_inserted(ebb));
+        self.set_position(CursorPosition::After(ebb));
+    }
+
+    /// Go to the top of the next EBB in layout order and return it.
+    ///
+    /// - If the cursor wasn't pointing at anything, go to the top of the first EBB in the
+    ///   function.
+    /// - If there are no more EBBs, leave the cursor pointing at nothing and return `None`.
+    ///
+    /// # Examples
+    ///
+    /// The `next_ebb()` method is intended for iterating over the EBBs in layout order:
+    ///
+    /// ```
+    /// # use cranelift_codegen::ir::{Function, Ebb};
+    /// # use cranelift_codegen::cursor::{Cursor, FuncCursor};
+    /// fn edit_func(func: &mut Function) {
+    ///     let mut cursor = FuncCursor::new(func);
+    ///     while let Some(ebb) = cursor.next_ebb() {
+    ///         // Edit ebb.
+    ///     }
+    /// }
+    /// ```
+    fn next_ebb(&mut self) -> Option<ir::Ebb> {
+        let next = if let Some(ebb) = self.current_ebb() {
+            self.layout().next_ebb(ebb)
+        } else {
+            self.layout().entry_block()
+        };
+        self.set_position(match next {
+            Some(ebb) => CursorPosition::Before(ebb),
+            None => CursorPosition::Nowhere,
+        });
+        next
+    }
+
+    /// Go to the bottom of the previous EBB in layout order and return it.
+    ///
+    /// - If the cursor wasn't pointing at anything, go to the bottom of the last EBB in the
+    ///   function.
+    /// - If there are no more EBBs, leave the cursor pointing at nothing and return `None`.
+    ///
+    /// # Examples
+    ///
+    /// The `prev_ebb()` method is intended for iterating over the EBBs in backwards layout order:
+    ///
+    /// ```
+    /// # use cranelift_codegen::ir::{Function, Ebb};
+    /// # use cranelift_codegen::cursor::{Cursor, FuncCursor};
+    /// fn edit_func(func: &mut Function) {
+    ///     let mut cursor = FuncCursor::new(func);
+    ///     while let Some(ebb) = cursor.prev_ebb() {
+    ///         // Edit ebb.
+    ///     }
+    /// }
+    /// ```
+    fn prev_ebb(&mut self) -> Option<ir::Ebb> {
+        let prev = if let Some(ebb) = self.current_ebb() {
+            self.layout().prev_ebb(ebb)
+        } else {
+            self.layout().last_ebb()
+        };
+        self.set_position(match prev {
+            Some(ebb) => CursorPosition::After(ebb),
+            None => CursorPosition::Nowhere,
+        });
+        prev
+    }
+
+    /// Move to the next instruction in the same EBB and return it.
+    ///
+    /// - If the cursor was positioned before an EBB, go to the first instruction in that EBB.
+    /// - If there are no more instructions in the EBB, go to the `After(ebb)` position and return
+    ///   `None`.
+    /// - If the cursor wasn't pointing anywhere, keep doing that.
+    ///
+    /// This method will never move the cursor to a different EBB.
+    ///
+    /// # Examples
+    ///
+    /// The `next_inst()` method is intended for iterating over the instructions in an EBB like
+    /// this:
+    ///
+    /// ```
+    /// # use cranelift_codegen::ir::{Function, Ebb};
+    /// # use cranelift_codegen::cursor::{Cursor, FuncCursor};
+    /// fn edit_ebb(func: &mut Function, ebb: Ebb) {
+    ///     let mut cursor = FuncCursor::new(func).at_top(ebb);
+    ///     while let Some(inst) = cursor.next_inst() {
+    ///         // Edit instructions...
+    ///     }
+    /// }
+    /// ```
+    /// The loop body can insert and remove instructions via the cursor.
+    ///
+    /// Iterating over all the instructions in a function looks like this:
+    ///
+    /// ```
+    /// # use cranelift_codegen::ir::{Function, Ebb};
+    /// # use cranelift_codegen::cursor::{Cursor, FuncCursor};
+    /// fn edit_func(func: &mut Function) {
+    ///     let mut cursor = FuncCursor::new(func);
+    ///     while let Some(ebb) = cursor.next_ebb() {
+    ///         while let Some(inst) = cursor.next_inst() {
+    ///             // Edit instructions...
+    ///         }
+    ///     }
+    /// }
+    /// ```
+    fn next_inst(&mut self) -> Option<ir::Inst> {
+        use self::CursorPosition::*;
+        match self.position() {
+            Nowhere | After(..) => None,
+            At(inst) => {
+                if let Some(next) = self.layout().next_inst(inst) {
+                    self.set_position(At(next));
+                    Some(next)
+                } else {
+                    let pos = After(
+                        self.layout()
+                            .inst_ebb(inst)
+                            .expect("current instruction removed?"),
+                    );
+                    self.set_position(pos);
+                    None
+                }
+            }
+            Before(ebb) => {
+                if let Some(next) = self.layout().first_inst(ebb) {
+                    self.set_position(At(next));
+                    Some(next)
+                } else {
+                    self.set_position(After(ebb));
+                    None
+                }
+            }
+        }
+    }
+
+    /// Move to the previous instruction in the same EBB and return it.
+    ///
+    /// - If the cursor was positioned after an EBB, go to the last instruction in that EBB.
+    /// - If there are no more instructions in the EBB, go to the `Before(ebb)` position and return
+    ///   `None`.
+    /// - If the cursor wasn't pointing anywhere, keep doing that.
+    ///
+    /// This method will never move the cursor to a different EBB.
+    ///
+    /// # Examples
+    ///
+    /// The `prev_inst()` method is intended for iterating backwards over the instructions in an
+    /// EBB like this:
+    ///
+    /// ```
+    /// # use cranelift_codegen::ir::{Function, Ebb};
+    /// # use cranelift_codegen::cursor::{Cursor, FuncCursor};
+    /// fn edit_ebb(func: &mut Function, ebb: Ebb) {
+    ///     let mut cursor = FuncCursor::new(func).at_bottom(ebb);
+    ///     while let Some(inst) = cursor.prev_inst() {
+    ///         // Edit instructions...
+    ///     }
+    /// }
+    /// ```
+    fn prev_inst(&mut self) -> Option<ir::Inst> {
+        use self::CursorPosition::*;
+        match self.position() {
+            Nowhere | Before(..) => None,
+            At(inst) => {
+                if let Some(prev) = self.layout().prev_inst(inst) {
+                    self.set_position(At(prev));
+                    Some(prev)
+                } else {
+                    let pos = Before(
+                        self.layout()
+                            .inst_ebb(inst)
+                            .expect("current instruction removed?"),
+                    );
+                    self.set_position(pos);
+                    None
+                }
+            }
+            After(ebb) => {
+                if let Some(prev) = self.layout().last_inst(ebb) {
+                    self.set_position(At(prev));
+                    Some(prev)
+                } else {
+                    self.set_position(Before(ebb));
+                    None
+                }
+            }
+        }
+    }
+
+    /// Insert an instruction at the current position.
+    ///
+    /// - If pointing at an instruction, the new instruction is inserted before the current
+    ///   instruction.
+    /// - If pointing at the bottom of an EBB, the new instruction is appended to the EBB.
+    /// - Otherwise panic.
+    ///
+    /// In either case, the cursor is not moved, such that repeated calls to `insert_inst()` causes
+    /// instructions to appear in insertion order in the EBB.
+    fn insert_inst(&mut self, inst: ir::Inst) {
+        use self::CursorPosition::*;
+        match self.position() {
+            Nowhere | Before(..) => panic!("Invalid insert_inst position"),
+            At(cur) => self.layout_mut().insert_inst(inst, cur),
+            After(ebb) => self.layout_mut().append_inst(inst, ebb),
+        }
+    }
+
+    /// Remove the instruction under the cursor.
+    ///
+    /// The cursor is left pointing at the position following the current instruction.
+    ///
+    /// Return the instruction that was removed.
+    fn remove_inst(&mut self) -> ir::Inst {
+        let inst = self.current_inst().expect("No instruction to remove");
+        self.next_inst();
+        self.layout_mut().remove_inst(inst);
+        inst
+    }
+
+    /// Remove the instruction under the cursor.
+    ///
+    /// The cursor is left pointing at the position preceding the current instruction.
+    ///
+    /// Return the instruction that was removed.
+    fn remove_inst_and_step_back(&mut self) -> ir::Inst {
+        let inst = self.current_inst().expect("No instruction to remove");
+        self.prev_inst();
+        self.layout_mut().remove_inst(inst);
+        inst
+    }
+
+    /// Insert an EBB at the current position and switch to it.
+    ///
+    /// As far as possible, this method behaves as if the EBB header were an instruction inserted
+    /// at the current position.
+    ///
+    /// - If the cursor is pointing at an existing instruction, *the current EBB is split in two*
+    ///   and the current instruction becomes the first instruction in the inserted EBB.
+    /// - If the cursor points at the bottom of an EBB, the new EBB is inserted after the current
+    ///   one, and moved to the bottom of the new EBB where instructions can be appended.
+    /// - If the cursor points to the top of an EBB, the new EBB is inserted above the current one.
+    /// - If the cursor is not pointing at anything, the new EBB is placed last in the layout.
+    ///
+    /// This means that it is always valid to call this method, and it always leaves the cursor in
+    /// a state that will insert instructions into the new EBB.
+    fn insert_ebb(&mut self, new_ebb: ir::Ebb) {
+        use self::CursorPosition::*;
+        match self.position() {
+            At(inst) => {
+                self.layout_mut().split_ebb(new_ebb, inst);
+                // All other cases move to `After(ebb)`, but in this case we'll stay `At(inst)`.
+                return;
+            }
+            Nowhere => self.layout_mut().append_ebb(new_ebb),
+            Before(ebb) => self.layout_mut().insert_ebb(new_ebb, ebb),
+            After(ebb) => self.layout_mut().insert_ebb_after(new_ebb, ebb),
+        }
+        // For everything but `At(inst)` we end up appending to the new EBB.
+        self.set_position(After(new_ebb));
+    }
+}
+
+/// Function cursor.
+///
+/// A `FuncCursor` holds a mutable reference to a whole `ir::Function` while keeping a position
+/// too. The function can be re-borrowed by accessing the public `cur.func` member.
+///
+/// This cursor is for use before legalization. The inserted instructions are not given an
+/// encoding.
+pub struct FuncCursor<'f> {
+    pos: CursorPosition,
+    srcloc: ir::SourceLoc,
+
+    /// The referenced function.
+    pub func: &'f mut ir::Function,
+}
+
+impl<'f> FuncCursor<'f> {
+    /// Create a new `FuncCursor` pointing nowhere.
+    pub fn new(func: &'f mut ir::Function) -> Self {
+        Self {
+            pos: CursorPosition::Nowhere,
+            srcloc: Default::default(),
+            func,
+        }
+    }
+
+    /// Use the source location of `inst` for future instructions.
+    pub fn use_srcloc(&mut self, inst: ir::Inst) {
+        self.srcloc = self.func.srclocs[inst];
+    }
+
+    /// Create an instruction builder that inserts an instruction at the current position.
+    pub fn ins(&mut self) -> ir::InsertBuilder<&mut FuncCursor<'f>> {
+        ir::InsertBuilder::new(self)
+    }
+}
+
+impl<'f> Cursor for FuncCursor<'f> {
+    fn position(&self) -> CursorPosition {
+        self.pos
+    }
+
+    fn set_position(&mut self, pos: CursorPosition) {
+        self.pos = pos
+    }
+
+    fn srcloc(&self) -> ir::SourceLoc {
+        self.srcloc
+    }
+
+    fn set_srcloc(&mut self, srcloc: ir::SourceLoc) {
+        self.srcloc = srcloc;
+    }
+
+    fn layout(&self) -> &ir::Layout {
+        &self.func.layout
+    }
+
+    fn layout_mut(&mut self) -> &mut ir::Layout {
+        &mut self.func.layout
+    }
+}
+
+impl<'c, 'f> ir::InstInserterBase<'c> for &'c mut FuncCursor<'f> {
+    fn data_flow_graph(&self) -> &ir::DataFlowGraph {
+        &self.func.dfg
+    }
+
+    fn data_flow_graph_mut(&mut self) -> &mut ir::DataFlowGraph {
+        &mut self.func.dfg
+    }
+
+    fn insert_built_inst(self, inst: ir::Inst, _: ir::Type) -> &'c mut ir::DataFlowGraph {
+        self.insert_inst(inst);
+        if !self.srcloc.is_default() {
+            self.func.srclocs[inst] = self.srcloc;
+        }
+        &mut self.func.dfg
+    }
+}
+
+/// Encoding cursor.
+///
+/// An `EncCursor` can be used to insert instructions that are immediately assigned an encoding.
+/// The cursor holds a mutable reference to the whole function which can be re-borrowed from the
+/// public `pos.func` member.
+pub struct EncCursor<'f> {
+    pos: CursorPosition,
+    srcloc: ir::SourceLoc,
+    built_inst: Option<ir::Inst>,
+
+    /// The referenced function.
+    pub func: &'f mut ir::Function,
+
+    /// The target ISA that will be used to encode instructions.
+    pub isa: &'f TargetIsa,
+}
+
+impl<'f> EncCursor<'f> {
+    /// Create a new `EncCursor` pointing nowhere.
+    pub fn new(func: &'f mut ir::Function, isa: &'f TargetIsa) -> Self {
+        Self {
+            pos: CursorPosition::Nowhere,
+            srcloc: Default::default(),
+            built_inst: None,
+            func,
+            isa,
+        }
+    }
+
+    /// Use the source location of `inst` for future instructions.
+    pub fn use_srcloc(&mut self, inst: ir::Inst) {
+        self.srcloc = self.func.srclocs[inst];
+    }
+
+    /// Create an instruction builder that will insert an encoded instruction at the current
+    /// position.
+    ///
+    /// The builder will panic if it is used to insert an instruction that can't be encoded for
+    /// `self.isa`.
+    pub fn ins(&mut self) -> ir::InsertBuilder<&mut EncCursor<'f>> {
+        ir::InsertBuilder::new(self)
+    }
+
+    /// Get the last built instruction.
+    ///
+    /// This returns the last instruction that was built using the `ins()` method on this cursor.
+    /// Panics if no instruction was built.
+    pub fn built_inst(&self) -> ir::Inst {
+        self.built_inst.expect("No instruction was inserted")
+    }
+
+    /// Return an object that can display `inst`.
+    ///
+    /// This is a convenience wrapper for the DFG equivalent.
+    pub fn display_inst(&self, inst: ir::Inst) -> ir::dfg::DisplayInst {
+        self.func.dfg.display_inst(inst, self.isa)
+    }
+}
+
+impl<'f> Cursor for EncCursor<'f> {
+    fn position(&self) -> CursorPosition {
+        self.pos
+    }
+
+    fn set_position(&mut self, pos: CursorPosition) {
+        self.pos = pos
+    }
+
+    fn srcloc(&self) -> ir::SourceLoc {
+        self.srcloc
+    }
+
+    fn set_srcloc(&mut self, srcloc: ir::SourceLoc) {
+        self.srcloc = srcloc;
+    }
+
+    fn layout(&self) -> &ir::Layout {
+        &self.func.layout
+    }
+
+    fn layout_mut(&mut self) -> &mut ir::Layout {
+        &mut self.func.layout
+    }
+}
+
+impl<'c, 'f> ir::InstInserterBase<'c> for &'c mut EncCursor<'f> {
+    fn data_flow_graph(&self) -> &ir::DataFlowGraph {
+        &self.func.dfg
+    }
+
+    fn data_flow_graph_mut(&mut self) -> &mut ir::DataFlowGraph {
+        &mut self.func.dfg
+    }
+
+    fn insert_built_inst(
+        self,
+        inst: ir::Inst,
+        ctrl_typevar: ir::Type,
+    ) -> &'c mut ir::DataFlowGraph {
+        // Insert the instruction and remember the reference.
+        self.insert_inst(inst);
+        self.built_inst = Some(inst);
+
+        if !self.srcloc.is_default() {
+            self.func.srclocs[inst] = self.srcloc;
+        }
+        // Assign an encoding.
+        // XXX Is there a way to describe this error to the user?
+        #[cfg_attr(feature = "cargo-clippy", allow(clippy::match_wild_err_arm))]
+        match self
+            .isa
+            .encode(&self.func, &self.func.dfg[inst], ctrl_typevar)
+        {
+            Ok(e) => self.func.encodings[inst] = e,
+            Err(_) => panic!("can't encode {}", self.display_inst(inst)),
+        }
+
+        &mut self.func.dfg
+    }
+}
--- a/cranelift/codegen/src/dbg.rs
+++ b/cranelift/codegen/src/dbg.rs
@@ -0,0 +1,28 @@
+//! Debug tracing helpers.
+use core::fmt;
+
+/// Prefix added to the log file names, just before the thread name or id.
+pub static LOG_FILENAME_PREFIX: &str = "cranelift.dbg.";
+
+/// Helper for printing lists.
+pub struct DisplayList<'a, T>(pub &'a [T])
+where
+    T: 'a + fmt::Display;
+
+impl<'a, T> fmt::Display for DisplayList<'a, T>
+where
+    T: 'a + fmt::Display,
+{
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self.0.split_first() {
+            None => write!(f, "[]"),
+            Some((first, rest)) => {
+                write!(f, "[{}", first)?;
+                for x in rest {
+                    write!(f, ", {}", x)?;
+                }
+                write!(f, "]")
+            }
+        }
+    }
+}
--- a/cranelift/codegen/src/dce.rs
+++ b/cranelift/codegen/src/dce.rs
@@ -0,0 +1,69 @@
+//! A Dead-Code Elimination (DCE) pass.
+//!
+//! Dead code here means instructions that have no side effects and have no
+//! result values used by other instructions.
+
+use crate::cursor::{Cursor, FuncCursor};
+use crate::dominator_tree::DominatorTree;
+use crate::entity::EntityRef;
+use crate::ir::instructions::InstructionData;
+use crate::ir::{DataFlowGraph, Function, Inst, Opcode};
+use crate::timing;
+
+/// Test whether the given opcode is unsafe to even consider for DCE.
+fn trivially_unsafe_for_dce(opcode: Opcode) -> bool {
+    opcode.is_call()
+        || opcode.is_branch()
+        || opcode.is_terminator()
+        || opcode.is_return()
+        || opcode.can_trap()
+        || opcode.other_side_effects()
+        || opcode.can_store()
+}
+
+/// Preserve instructions with used result values.
+fn any_inst_results_used(inst: Inst, live: &[bool], dfg: &DataFlowGraph) -> bool {
+    dfg.inst_results(inst).iter().any(|v| live[v.index()])
+}
+
+/// Load instructions without the `notrap` flag are defined to trap when
+/// operating on inaccessible memory, so we can't DCE them even if the
+/// loaded value is unused.
+fn is_load_with_defined_trapping(opcode: Opcode, data: &InstructionData) -> bool {
+    if !opcode.can_load() {
+        return false;
+    }
+    match *data {
+        InstructionData::StackLoad { .. } => false,
+        InstructionData::Load { flags, .. } => !flags.notrap(),
+        _ => true,
+    }
+}
+
+/// Perform DCE on `func`.
+pub fn do_dce(func: &mut Function, domtree: &mut DominatorTree) {
+    let _tt = timing::dce();
+    debug_assert!(domtree.is_valid());
+
+    let mut live = vec![false; func.dfg.num_values()];
+    for &ebb in domtree.cfg_postorder() {
+        let mut pos = FuncCursor::new(func).at_bottom(ebb);
+        while let Some(inst) = pos.prev_inst() {
+            {
+                let data = &pos.func.dfg[inst];
+                let opcode = data.opcode();
+                if trivially_unsafe_for_dce(opcode)
+                    || is_load_with_defined_trapping(opcode, &data)
+                    || any_inst_results_used(inst, &live, &pos.func.dfg)
+                {
+                    for arg in pos.func.dfg.inst_args(inst) {
+                        let v = pos.func.dfg.resolve_aliases(*arg);
+                        live[v.index()] = true;
+                    }
+                    continue;
+                }
+            }
+            pos.remove_inst();
+        }
+    }
+}
--- a/cranelift/codegen/src/divconst_magic_numbers.rs
+++ b/cranelift/codegen/src/divconst_magic_numbers.rs
@@ -0,0 +1,590 @@
+//! Compute "magic numbers" for division-by-constants transformations.
+//!
+//! Math helpers for division by (non-power-of-2) constants. This is based
+//! on the presentation in "Hacker's Delight" by Henry Warren, 2003. There
+//! are four cases: {unsigned, signed} x {32 bit, 64 bit}. The word size
+//! makes little difference, but the signed-vs-unsigned aspect has a large
+//! effect. Therefore everything is presented in the order U32 U64 S32 S64
+//! so as to emphasise the similarity of the U32 and U64 cases and the S32
+//! and S64 cases.
+
+// Structures to hold the "magic numbers" computed.
+
+#[derive(PartialEq, Debug)]
+pub struct MU32 {
+    pub mul_by: u32,
+    pub do_add: bool,
+    pub shift_by: i32,
+}
+
+#[derive(PartialEq, Debug)]
+pub struct MU64 {
+    pub mul_by: u64,
+    pub do_add: bool,
+    pub shift_by: i32,
+}
+
+#[derive(PartialEq, Debug)]
+pub struct MS32 {
+    pub mul_by: i32,
+    pub shift_by: i32,
+}
+
+#[derive(PartialEq, Debug)]
+pub struct MS64 {
+    pub mul_by: i64,
+    pub shift_by: i32,
+}
+
+// The actual "magic number" generators follow.
+
+pub fn magic_u32(d: u32) -> MU32 {
+    debug_assert_ne!(d, 0);
+    debug_assert_ne!(d, 1); // d==1 generates out of range shifts.
+
+    let mut do_add: bool = false;
+    let mut p: i32 = 31;
+    let nc: u32 = 0xFFFFFFFFu32 - u32::wrapping_neg(d) % d;
+    let mut q1: u32 = 0x80000000u32 / nc;
+    let mut r1: u32 = 0x80000000u32 - q1 * nc;
+    let mut q2: u32 = 0x7FFFFFFFu32 / d;
+    let mut r2: u32 = 0x7FFFFFFFu32 - q2 * d;
+    loop {
+        p = p + 1;
+        if r1 >= nc - r1 {
+            q1 = u32::wrapping_add(u32::wrapping_mul(2, q1), 1);
+            r1 = u32::wrapping_sub(u32::wrapping_mul(2, r1), nc);
+        } else {
+            q1 = 2 * q1;
+            r1 = 2 * r1;
+        }
+        if r2 + 1 >= d - r2 {
+            if q2 >= 0x7FFFFFFFu32 {
+                do_add = true;
+            }
+            q2 = 2 * q2 + 1;
+            r2 = u32::wrapping_sub(u32::wrapping_add(u32::wrapping_mul(2, r2), 1), d);
+        } else {
+            if q2 >= 0x80000000u32 {
+                do_add = true;
+            }
+            q2 = u32::wrapping_mul(2, q2);
+            r2 = 2 * r2 + 1;
+        }
+        let delta: u32 = d - 1 - r2;
+        if !(p < 64 && (q1 < delta || (q1 == delta && r1 == 0))) {
+            break;
+        }
+    }
+
+    MU32 {
+        mul_by: q2 + 1,
+        do_add: do_add,
+        shift_by: p - 32,
+    }
+}
+
+pub fn magic_u64(d: u64) -> MU64 {
+    debug_assert_ne!(d, 0);
+    debug_assert_ne!(d, 1); // d==1 generates out of range shifts.
+
+    let mut do_add: bool = false;
+    let mut p: i32 = 63;
+    let nc: u64 = 0xFFFFFFFFFFFFFFFFu64 - u64::wrapping_neg(d) % d;
+    let mut q1: u64 = 0x8000000000000000u64 / nc;
+    let mut r1: u64 = 0x8000000000000000u64 - q1 * nc;
+    let mut q2: u64 = 0x7FFFFFFFFFFFFFFFu64 / d;
+    let mut r2: u64 = 0x7FFFFFFFFFFFFFFFu64 - q2 * d;
+    loop {
+        p = p + 1;
+        if r1 >= nc - r1 {
+            q1 = u64::wrapping_add(u64::wrapping_mul(2, q1), 1);
+            r1 = u64::wrapping_sub(u64::wrapping_mul(2, r1), nc);
+        } else {
+            q1 = 2 * q1;
+            r1 = 2 * r1;
+        }
+        if r2 + 1 >= d - r2 {
+            if q2 >= 0x7FFFFFFFFFFFFFFFu64 {
+                do_add = true;
+            }
+            q2 = 2 * q2 + 1;
+            r2 = u64::wrapping_sub(u64::wrapping_add(u64::wrapping_mul(2, r2), 1), d);
+        } else {
+            if q2 >= 0x8000000000000000u64 {
+                do_add = true;
+            }
+            q2 = u64::wrapping_mul(2, q2);
+            r2 = 2 * r2 + 1;
+        }
+        let delta: u64 = d - 1 - r2;
+        if !(p < 128 && (q1 < delta || (q1 == delta && r1 == 0))) {
+            break;
+        }
+    }
+
+    MU64 {
+        mul_by: q2 + 1,
+        do_add: do_add,
+        shift_by: p - 64,
+    }
+}
+
+pub fn magic_s32(d: i32) -> MS32 {
+    debug_assert_ne!(d, -1);
+    debug_assert_ne!(d, 0);
+    debug_assert_ne!(d, 1);
+    let two31: u32 = 0x80000000u32;
+    let mut p: i32 = 31;
+    let ad: u32 = i32::wrapping_abs(d) as u32;
+    let t: u32 = two31 + ((d as u32) >> 31);
+    let anc: u32 = u32::wrapping_sub(t - 1, t % ad);
+    let mut q1: u32 = two31 / anc;
+    let mut r1: u32 = two31 - q1 * anc;
+    let mut q2: u32 = two31 / ad;
+    let mut r2: u32 = two31 - q2 * ad;
+    loop {
+        p = p + 1;
+        q1 = 2 * q1;
+        r1 = 2 * r1;
+        if r1 >= anc {
+            q1 = q1 + 1;
+            r1 = r1 - anc;
+        }
+        q2 = 2 * q2;
+        r2 = 2 * r2;
+        if r2 >= ad {
+            q2 = q2 + 1;
+            r2 = r2 - ad;
+        }
+        let delta: u32 = ad - r2;
+        if !(q1 < delta || (q1 == delta && r1 == 0)) {
+            break;
+        }
+    }
+
+    MS32 {
+        mul_by: (if d < 0 {
+            u32::wrapping_neg(q2 + 1)
+        } else {
+            q2 + 1
+        }) as i32,
+        shift_by: p - 32,
+    }
+}
+
+pub fn magic_s64(d: i64) -> MS64 {
+    debug_assert_ne!(d, -1);
+    debug_assert_ne!(d, 0);
+    debug_assert_ne!(d, 1);
+    let two63: u64 = 0x8000000000000000u64;
+    let mut p: i32 = 63;
+    let ad: u64 = i64::wrapping_abs(d) as u64;
+    let t: u64 = two63 + ((d as u64) >> 63);
+    let anc: u64 = u64::wrapping_sub(t - 1, t % ad);
+    let mut q1: u64 = two63 / anc;
+    let mut r1: u64 = two63 - q1 * anc;
+    let mut q2: u64 = two63 / ad;
+    let mut r2: u64 = two63 - q2 * ad;
+    loop {
+        p = p + 1;
+        q1 = 2 * q1;
+        r1 = 2 * r1;
+        if r1 >= anc {
+            q1 = q1 + 1;
+            r1 = r1 - anc;
+        }
+        q2 = 2 * q2;
+        r2 = 2 * r2;
+        if r2 >= ad {
+            q2 = q2 + 1;
+            r2 = r2 - ad;
+        }
+        let delta: u64 = ad - r2;
+        if !(q1 < delta || (q1 == delta && r1 == 0)) {
+            break;
+        }
+    }
+
+    MS64 {
+        mul_by: (if d < 0 {
+            u64::wrapping_neg(q2 + 1)
+        } else {
+            q2 + 1
+        }) as i64,
+        shift_by: p - 64,
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::{magic_s32, magic_s64, magic_u32, magic_u64};
+    use super::{MS32, MS64, MU32, MU64};
+
+    fn make_mu32(mul_by: u32, do_add: bool, shift_by: i32) -> MU32 {
+        MU32 {
+            mul_by,
+            do_add,
+            shift_by,
+        }
+    }
+
+    fn make_mu64(mul_by: u64, do_add: bool, shift_by: i32) -> MU64 {
+        MU64 {
+            mul_by,
+            do_add,
+            shift_by,
+        }
+    }
+
+    fn make_ms32(mul_by: i32, shift_by: i32) -> MS32 {
+        MS32 { mul_by, shift_by }
+    }
+
+    fn make_ms64(mul_by: i64, shift_by: i32) -> MS64 {
+        MS64 { mul_by, shift_by }
+    }
+
+    #[test]
+    fn test_magicU32() {
+        assert_eq!(magic_u32(2u32), make_mu32(0x80000000u32, false, 0));
+        assert_eq!(magic_u32(3u32), make_mu32(0xaaaaaaabu32, false, 1));
+        assert_eq!(magic_u32(4u32), make_mu32(0x40000000u32, false, 0));
+        assert_eq!(magic_u32(5u32), make_mu32(0xcccccccdu32, false, 2));
+        assert_eq!(magic_u32(6u32), make_mu32(0xaaaaaaabu32, false, 2));
+        assert_eq!(magic_u32(7u32), make_mu32(0x24924925u32, true, 3));
+        assert_eq!(magic_u32(9u32), make_mu32(0x38e38e39u32, false, 1));
+        assert_eq!(magic_u32(10u32), make_mu32(0xcccccccdu32, false, 3));
+        assert_eq!(magic_u32(11u32), make_mu32(0xba2e8ba3u32, false, 3));
+        assert_eq!(magic_u32(12u32), make_mu32(0xaaaaaaabu32, false, 3));
+        assert_eq!(magic_u32(25u32), make_mu32(0x51eb851fu32, false, 3));
+        assert_eq!(magic_u32(125u32), make_mu32(0x10624dd3u32, false, 3));
+        assert_eq!(magic_u32(625u32), make_mu32(0xd1b71759u32, false, 9));
+        assert_eq!(magic_u32(1337u32), make_mu32(0x88233b2bu32, true, 11));
+        assert_eq!(magic_u32(65535u32), make_mu32(0x80008001u32, false, 15));
+        assert_eq!(magic_u32(65536u32), make_mu32(0x00010000u32, false, 0));
+        assert_eq!(magic_u32(65537u32), make_mu32(0xffff0001u32, false, 16));
+        assert_eq!(magic_u32(31415927u32), make_mu32(0x445b4553u32, false, 23));
+        assert_eq!(
+            magic_u32(0xdeadbeefu32),
+            make_mu32(0x93275ab3u32, false, 31)
+        );
+        assert_eq!(
+            magic_u32(0xfffffffdu32),
+            make_mu32(0x40000001u32, false, 30)
+        );
+        assert_eq!(magic_u32(0xfffffffeu32), make_mu32(0x00000003u32, true, 32));
+        assert_eq!(
+            magic_u32(0xffffffffu32),
+            make_mu32(0x80000001u32, false, 31)
+        );
+    }
+    #[test]
+    fn test_magicU64() {
+        assert_eq!(magic_u64(2u64), make_mu64(0x8000000000000000u64, false, 0));
+        assert_eq!(magic_u64(3u64), make_mu64(0xaaaaaaaaaaaaaaabu64, false, 1));
+        assert_eq!(magic_u64(4u64), make_mu64(0x4000000000000000u64, false, 0));
+        assert_eq!(magic_u64(5u64), make_mu64(0xcccccccccccccccdu64, false, 2));
+        assert_eq!(magic_u64(6u64), make_mu64(0xaaaaaaaaaaaaaaabu64, false, 2));
+        assert_eq!(magic_u64(7u64), make_mu64(0x2492492492492493u64, true, 3));
+        assert_eq!(magic_u64(9u64), make_mu64(0xe38e38e38e38e38fu64, false, 3));
+        assert_eq!(magic_u64(10u64), make_mu64(0xcccccccccccccccdu64, false, 3));
+        assert_eq!(magic_u64(11u64), make_mu64(0x2e8ba2e8ba2e8ba3u64, false, 1));
+        assert_eq!(magic_u64(12u64), make_mu64(0xaaaaaaaaaaaaaaabu64, false, 3));
+        assert_eq!(magic_u64(25u64), make_mu64(0x47ae147ae147ae15u64, true, 5));
+        assert_eq!(magic_u64(125u64), make_mu64(0x0624dd2f1a9fbe77u64, true, 7));
+        assert_eq!(
+            magic_u64(625u64),
+            make_mu64(0x346dc5d63886594bu64, false, 7)
+        );
+        assert_eq!(
+            magic_u64(1337u64),
+            make_mu64(0xc4119d952866a139u64, false, 10)
+        );
+        assert_eq!(
+            magic_u64(31415927u64),
+            make_mu64(0x116d154b9c3d2f85u64, true, 25)
+        );
+        assert_eq!(
+            magic_u64(0x00000000deadbeefu64),
+            make_mu64(0x93275ab2dfc9094bu64, false, 31)
+        );
+        assert_eq!(
+            magic_u64(0x00000000fffffffdu64),
+            make_mu64(0x8000000180000005u64, false, 31)
+        );
+        assert_eq!(
+            magic_u64(0x00000000fffffffeu64),
+            make_mu64(0x0000000200000005u64, true, 32)
+        );
+        assert_eq!(
+            magic_u64(0x00000000ffffffffu64),
+            make_mu64(0x8000000080000001u64, false, 31)
+        );
+        assert_eq!(
+            magic_u64(0x0000000100000000u64),
+            make_mu64(0x0000000100000000u64, false, 0)
+        );
+        assert_eq!(
+            magic_u64(0x0000000100000001u64),
+            make_mu64(0xffffffff00000001u64, false, 32)
+        );
+        assert_eq!(
+            magic_u64(0x0ddc0ffeebadf00du64),
+            make_mu64(0x2788e9d394b77da1u64, true, 60)
+        );
+        assert_eq!(
+            magic_u64(0xfffffffffffffffdu64),
+            make_mu64(0x4000000000000001u64, false, 62)
+        );
+        assert_eq!(
+            magic_u64(0xfffffffffffffffeu64),
+            make_mu64(0x0000000000000003u64, true, 64)
+        );
+        assert_eq!(
+            magic_u64(0xffffffffffffffffu64),
+            make_mu64(0x8000000000000001u64, false, 63)
+        );
+    }
+    #[test]
+    fn test_magicS32() {
+        assert_eq!(
+            magic_s32(-0x80000000i32),
+            make_ms32(0x7fffffffu32 as i32, 30)
+        );
+        assert_eq!(
+            magic_s32(-0x7FFFFFFFi32),
+            make_ms32(0xbfffffffu32 as i32, 29)
+        );
+        assert_eq!(
+            magic_s32(-0x7FFFFFFEi32),
+            make_ms32(0x7ffffffdu32 as i32, 30)
+        );
+        assert_eq!(magic_s32(-31415927i32), make_ms32(0xbba4baadu32 as i32, 23));
+        assert_eq!(magic_s32(-1337i32), make_ms32(0x9df73135u32 as i32, 9));
+        assert_eq!(magic_s32(-256i32), make_ms32(0x7fffffffu32 as i32, 7));
+        assert_eq!(magic_s32(-5i32), make_ms32(0x99999999u32 as i32, 1));
+        assert_eq!(magic_s32(-3i32), make_ms32(0x55555555u32 as i32, 1));
+        assert_eq!(magic_s32(-2i32), make_ms32(0x7fffffffu32 as i32, 0));
+        assert_eq!(magic_s32(2i32), make_ms32(0x80000001u32 as i32, 0));
+        assert_eq!(magic_s32(3i32), make_ms32(0x55555556u32 as i32, 0));
+        assert_eq!(magic_s32(4i32), make_ms32(0x80000001u32 as i32, 1));
+        assert_eq!(magic_s32(5i32), make_ms32(0x66666667u32 as i32, 1));
+        assert_eq!(magic_s32(6i32), make_ms32(0x2aaaaaabu32 as i32, 0));
+        assert_eq!(magic_s32(7i32), make_ms32(0x92492493u32 as i32, 2));
+        assert_eq!(magic_s32(9i32), make_ms32(0x38e38e39u32 as i32, 1));
+        assert_eq!(magic_s32(10i32), make_ms32(0x66666667u32 as i32, 2));
+        assert_eq!(magic_s32(11i32), make_ms32(0x2e8ba2e9u32 as i32, 1));
+        assert_eq!(magic_s32(12i32), make_ms32(0x2aaaaaabu32 as i32, 1));
+        assert_eq!(magic_s32(25i32), make_ms32(0x51eb851fu32 as i32, 3));
+        assert_eq!(magic_s32(125i32), make_ms32(0x10624dd3u32 as i32, 3));
+        assert_eq!(magic_s32(625i32), make_ms32(0x68db8badu32 as i32, 8));
+        assert_eq!(magic_s32(1337i32), make_ms32(0x6208cecbu32 as i32, 9));
+        assert_eq!(magic_s32(31415927i32), make_ms32(0x445b4553u32 as i32, 23));
+        assert_eq!(
+            magic_s32(0x7ffffffei32),
+            make_ms32(0x80000003u32 as i32, 30)
+        );
+        assert_eq!(
+            magic_s32(0x7fffffffi32),
+            make_ms32(0x40000001u32 as i32, 29)
+        );
+    }
+    #[test]
+    fn test_magicS64() {
+        assert_eq!(
+            magic_s64(-0x8000000000000000i64),
+            make_ms64(0x7fffffffffffffffu64 as i64, 62)
+        );
+        assert_eq!(
+            magic_s64(-0x7FFFFFFFFFFFFFFFi64),
+            make_ms64(0xbfffffffffffffffu64 as i64, 61)
+        );
+        assert_eq!(
+            magic_s64(-0x7FFFFFFFFFFFFFFEi64),
+            make_ms64(0x7ffffffffffffffdu64 as i64, 62)
+        );
+        assert_eq!(
+            magic_s64(-0x0ddC0ffeeBadF00di64),
+            make_ms64(0x6c3b8b1635a4412fu64 as i64, 59)
+        );
+        assert_eq!(
+            magic_s64(-0x100000001i64),
+            make_ms64(0x800000007fffffffu64 as i64, 31)
+        );
+        assert_eq!(
+            magic_s64(-0x100000000i64),
+            make_ms64(0x7fffffffffffffffu64 as i64, 31)
+        );
+        assert_eq!(
+            magic_s64(-0xFFFFFFFFi64),
+            make_ms64(0x7fffffff7fffffffu64 as i64, 31)
+        );
+        assert_eq!(
+            magic_s64(-0xFFFFFFFEi64),
+            make_ms64(0x7ffffffefffffffdu64 as i64, 31)
+        );
+        assert_eq!(
+            magic_s64(-0xFFFFFFFDi64),
+            make_ms64(0x7ffffffe7ffffffbu64 as i64, 31)
+        );
+        assert_eq!(
+            magic_s64(-0xDeadBeefi64),
+            make_ms64(0x6cd8a54d2036f6b5u64 as i64, 31)
+        );
+        assert_eq!(
+            magic_s64(-31415927i64),
+            make_ms64(0x7749755a31e1683du64 as i64, 24)
+        );
+        assert_eq!(
+            magic_s64(-1337i64),
+            make_ms64(0x9df731356bccaf63u64 as i64, 9)
+        );
+        assert_eq!(
+            magic_s64(-256i64),
+            make_ms64(0x7fffffffffffffffu64 as i64, 7)
+        );
+        assert_eq!(magic_s64(-5i64), make_ms64(0x9999999999999999u64 as i64, 1));
+        assert_eq!(magic_s64(-3i64), make_ms64(0x5555555555555555u64 as i64, 1));
+        assert_eq!(magic_s64(-2i64), make_ms64(0x7fffffffffffffffu64 as i64, 0));
+        assert_eq!(magic_s64(2i64), make_ms64(0x8000000000000001u64 as i64, 0));
+        assert_eq!(magic_s64(3i64), make_ms64(0x5555555555555556u64 as i64, 0));
+        assert_eq!(magic_s64(4i64), make_ms64(0x8000000000000001u64 as i64, 1));
+        assert_eq!(magic_s64(5i64), make_ms64(0x6666666666666667u64 as i64, 1));
+        assert_eq!(magic_s64(6i64), make_ms64(0x2aaaaaaaaaaaaaabu64 as i64, 0));
+        assert_eq!(magic_s64(7i64), make_ms64(0x4924924924924925u64 as i64, 1));
+        assert_eq!(magic_s64(9i64), make_ms64(0x1c71c71c71c71c72u64 as i64, 0));
+        assert_eq!(magic_s64(10i64), make_ms64(0x6666666666666667u64 as i64, 2));
+        assert_eq!(magic_s64(11i64), make_ms64(0x2e8ba2e8ba2e8ba3u64 as i64, 1));
+        assert_eq!(magic_s64(12i64), make_ms64(0x2aaaaaaaaaaaaaabu64 as i64, 1));
+        assert_eq!(magic_s64(25i64), make_ms64(0xa3d70a3d70a3d70bu64 as i64, 4));
+        assert_eq!(
+            magic_s64(125i64),
+            make_ms64(0x20c49ba5e353f7cfu64 as i64, 4)
+        );
+        assert_eq!(
+            magic_s64(625i64),
+            make_ms64(0x346dc5d63886594bu64 as i64, 7)
+        );
+        assert_eq!(
+            magic_s64(1337i64),
+            make_ms64(0x6208ceca9433509du64 as i64, 9)
+        );
+        assert_eq!(
+            magic_s64(31415927i64),
+            make_ms64(0x88b68aa5ce1e97c3u64 as i64, 24)
+        );
+        assert_eq!(
+            magic_s64(0x00000000deadbeefi64),
+            make_ms64(0x93275ab2dfc9094bu64 as i64, 31)
+        );
+        assert_eq!(
+            magic_s64(0x00000000fffffffdi64),
+            make_ms64(0x8000000180000005u64 as i64, 31)
+        );
+        assert_eq!(
+            magic_s64(0x00000000fffffffei64),
+            make_ms64(0x8000000100000003u64 as i64, 31)
+        );
+        assert_eq!(
+            magic_s64(0x00000000ffffffffi64),
+            make_ms64(0x8000000080000001u64 as i64, 31)
+        );
+        assert_eq!(
+            magic_s64(0x0000000100000000i64),
+            make_ms64(0x8000000000000001u64 as i64, 31)
+        );
+        assert_eq!(
+            magic_s64(0x0000000100000001i64),
+            make_ms64(0x7fffffff80000001u64 as i64, 31)
+        );
+        assert_eq!(
+            magic_s64(0x0ddc0ffeebadf00di64),
+            make_ms64(0x93c474e9ca5bbed1u64 as i64, 59)
+        );
+        assert_eq!(
+            magic_s64(0x7ffffffffffffffdi64),
+            make_ms64(0x2000000000000001u64 as i64, 60)
+        );
+        assert_eq!(
+            magic_s64(0x7ffffffffffffffei64),
+            make_ms64(0x8000000000000003u64 as i64, 62)
+        );
+        assert_eq!(
+            magic_s64(0x7fffffffffffffffi64),
+            make_ms64(0x4000000000000001u64 as i64, 61)
+        );
+    }
+    #[test]
+    fn test_magic_generators_dont_panic() {
+        // The point of this is to check that the magic number generators
+        // don't panic with integer wraparounds, especially at boundary
+        // cases for their arguments. The actual results are thrown away.
+        let mut total: u64 = 0;
+        // Testing UP magic_u32
+        for x in 2..(200 * 1000u32) {
+            let m = magic_u32(x);
+            total = total ^ (m.mul_by as u64);
+            total = total + (m.shift_by as u64);
+            total = total - (if m.do_add { 123 } else { 456 });
+        }
+        assert_eq!(total, 1747815691);
+        // Testing DOWN magic_u32
+        for x in 0..(200 * 1000u32) {
+            let m = magic_u32(0xFFFF_FFFFu32 - x);
+            total = total ^ (m.mul_by as u64);
+            total = total + (m.shift_by as u64);
+            total = total - (if m.do_add { 123 } else { 456 });
+        }
+        assert_eq!(total, 2210292772);
+
+        // Testing UP magic_u64
+        for x in 2..(200 * 1000u64) {
+            let m = magic_u64(x);
+            total = total ^ m.mul_by;
+            total = total + (m.shift_by as u64);
+            total = total - (if m.do_add { 123 } else { 456 });
+        }
+        assert_eq!(total, 7430004084791260605);
+        // Testing DOWN magic_u64
+        for x in 0..(200 * 1000u64) {
+            let m = magic_u64(0xFFFF_FFFF_FFFF_FFFFu64 - x);
+            total = total ^ m.mul_by;
+            total = total + (m.shift_by as u64);
+            total = total - (if m.do_add { 123 } else { 456 });
+        }
+        assert_eq!(total, 7547519887519825919);
+
+        // Testing UP magic_s32
+        for x in 0..(200 * 1000i32) {
+            let m = magic_s32(-0x8000_0000i32 + x);
+            total = total ^ (m.mul_by as u64);
+            total = total + (m.shift_by as u64);
+        }
+        assert_eq!(total, 10899224186731671235);
+        // Testing DOWN magic_s32
+        for x in 0..(200 * 1000i32) {
+            let m = magic_s32(0x7FFF_FFFFi32 - x);
+            total = total ^ (m.mul_by as u64);
+            total = total + (m.shift_by as u64);
+        }
+        assert_eq!(total, 7547519887517897369);
+
+        // Testing UP magic_s64
+        for x in 0..(200 * 1000i64) {
+            let m = magic_s64(-0x8000_0000_0000_0000i64 + x);
+            total = total ^ (m.mul_by as u64);
+            total = total + (m.shift_by as u64);
+        }
+        assert_eq!(total, 8029756891368555163);
+        // Testing DOWN magic_s64
+        for x in 0..(200 * 1000i64) {
+            let m = magic_s64(0x7FFF_FFFF_FFFF_FFFFi64 - x);
+            total = total ^ (m.mul_by as u64);
+            total = total + (m.shift_by as u64);
+        }
+        // Force `total` -- and hence, the entire computation -- to
+        // be used, so that rustc can't optimise it out.
+        assert_eq!(total, 7547519887532559585u64);
+    }
+}
--- a/cranelift/codegen/src/dominator_tree.rs
+++ b/cranelift/codegen/src/dominator_tree.rs
@@ -0,0 +1,943 @@
+//! A Dominator Tree represented as mappings of Ebbs to their immediate dominator.
+
+use crate::entity::SecondaryMap;
+use crate::flowgraph::{BasicBlock, ControlFlowGraph};
+use crate::ir::instructions::BranchInfo;
+use crate::ir::{Ebb, ExpandedProgramPoint, Function, Inst, Layout, ProgramOrder, Value};
+use crate::packed_option::PackedOption;
+use crate::timing;
+use core::cmp;
+use core::cmp::Ordering;
+use core::mem;
+use std::vec::Vec;
+
+/// RPO numbers are not first assigned in a contiguous way but as multiples of STRIDE, to leave
+/// room for modifications of the dominator tree.
+const STRIDE: u32 = 4;
+
+/// Special RPO numbers used during `compute_postorder`.
+const DONE: u32 = 1;
+const SEEN: u32 = 2;
+
+/// Dominator tree node. We keep one of these per EBB.
+#[derive(Clone, Default)]
+struct DomNode {
+    /// Number of this node in a reverse post-order traversal of the CFG, starting from 1.
+    /// This number is monotonic in the reverse postorder but not contiguous, since we leave
+    /// holes for later localized modifications of the dominator tree.
+    /// Unreachable nodes get number 0, all others are positive.
+    rpo_number: u32,
+
+    /// The immediate dominator of this EBB, represented as the branch or jump instruction at the
+    /// end of the dominating basic block.
+    ///
+    /// This is `None` for unreachable blocks and the entry block which doesn't have an immediate
+    /// dominator.
+    idom: PackedOption<Inst>,
+}
+
+/// The dominator tree for a single function.
+pub struct DominatorTree {
+    nodes: SecondaryMap<Ebb, DomNode>,
+
+    /// CFG post-order of all reachable EBBs.
+    postorder: Vec<Ebb>,
+
+    /// Scratch memory used by `compute_postorder()`.
+    stack: Vec<Ebb>,
+
+    valid: bool,
+}
+
+/// Methods for querying the dominator tree.
+impl DominatorTree {
+    /// Is `ebb` reachable from the entry block?
+    pub fn is_reachable(&self, ebb: Ebb) -> bool {
+        self.nodes[ebb].rpo_number != 0
+    }
+
+    /// Get the CFG post-order of EBBs that was used to compute the dominator tree.
+    ///
+    /// Note that this post-order is not updated automatically when the CFG is modified. It is
+    /// computed from scratch and cached by `compute()`.
+    pub fn cfg_postorder(&self) -> &[Ebb] {
+        debug_assert!(self.is_valid());
+        &self.postorder
+    }
+
+    /// Returns the immediate dominator of `ebb`.
+    ///
+    /// The immediate dominator of an extended basic block is a basic block which we represent by
+    /// the branch or jump instruction at the end of the basic block. This does not have to be the
+    /// terminator of its EBB.
+    ///
+    /// A branch or jump is said to *dominate* `ebb` if all control flow paths from the function
+    /// entry to `ebb` must go through the branch.
+    ///
+    /// The *immediate dominator* is the dominator that is closest to `ebb`. All other dominators
+    /// also dominate the immediate dominator.
+    ///
+    /// This returns `None` if `ebb` is not reachable from the entry EBB, or if it is the entry EBB
+    /// which has no dominators.
+    pub fn idom(&self, ebb: Ebb) -> Option<Inst> {
+        self.nodes[ebb].idom.into()
+    }
+
+    /// Compare two EBBs relative to the reverse post-order.
+    fn rpo_cmp_ebb(&self, a: Ebb, b: Ebb) -> Ordering {
+        self.nodes[a].rpo_number.cmp(&self.nodes[b].rpo_number)
+    }
+
+    /// Compare two program points relative to a reverse post-order traversal of the control-flow
+    /// graph.
+    ///
+    /// Return `Ordering::Less` if `a` comes before `b` in the RPO.
+    ///
+    /// If `a` and `b` belong to the same EBB, compare their relative position in the EBB.
+    pub fn rpo_cmp<A, B>(&self, a: A, b: B, layout: &Layout) -> Ordering
+    where
+        A: Into<ExpandedProgramPoint>,
+        B: Into<ExpandedProgramPoint>,
+    {
+        let a = a.into();
+        let b = b.into();
+        self.rpo_cmp_ebb(layout.pp_ebb(a), layout.pp_ebb(b))
+            .then(layout.cmp(a, b))
+    }
+
+    /// Returns `true` if `a` dominates `b`.
+    ///
+    /// This means that every control-flow path from the function entry to `b` must go through `a`.
+    ///
+    /// Dominance is ill defined for unreachable blocks. This function can always determine
+    /// dominance for instructions in the same EBB, but otherwise returns `false` if either block
+    /// is unreachable.
+    ///
+    /// An instruction is considered to dominate itself.
+    pub fn dominates<A, B>(&self, a: A, b: B, layout: &Layout) -> bool
+    where
+        A: Into<ExpandedProgramPoint>,
+        B: Into<ExpandedProgramPoint>,
+    {
+        let a = a.into();
+        let b = b.into();
+        match a {
+            ExpandedProgramPoint::Ebb(ebb_a) => {
+                a == b || self.last_dominator(ebb_a, b, layout).is_some()
+            }
+            ExpandedProgramPoint::Inst(inst_a) => {
+                let ebb_a = layout.inst_ebb(inst_a).expect("Instruction not in layout.");
+                match self.last_dominator(ebb_a, b, layout) {
+                    Some(last) => layout.cmp(inst_a, last) != Ordering::Greater,
+                    None => false,
+                }
+            }
+        }
+    }
+
+    /// Find the last instruction in `a` that dominates `b`.
+    /// If no instructions in `a` dominate `b`, return `None`.
+    pub fn last_dominator<B>(&self, a: Ebb, b: B, layout: &Layout) -> Option<Inst>
+    where
+        B: Into<ExpandedProgramPoint>,
+    {
+        let (mut ebb_b, mut inst_b) = match b.into() {
+            ExpandedProgramPoint::Ebb(ebb) => (ebb, None),
+            ExpandedProgramPoint::Inst(inst) => (
+                layout.inst_ebb(inst).expect("Instruction not in layout."),
+                Some(inst),
+            ),
+        };
+        let rpo_a = self.nodes[a].rpo_number;
+
+        // Run a finger up the dominator tree from b until we see a.
+        // Do nothing if b is unreachable.
+        while rpo_a < self.nodes[ebb_b].rpo_number {
+            let idom = match self.idom(ebb_b) {
+                Some(idom) => idom,
+                None => return None, // a is unreachable, so we climbed past the entry
+            };
+            ebb_b = layout.inst_ebb(idom).expect("Dominator got removed.");
+            inst_b = Some(idom);
+        }
+        if a == ebb_b {
+            inst_b
+        } else {
+            None
+        }
+    }
+
+    /// Compute the common dominator of two basic blocks.
+    ///
+    /// Both basic blocks are assumed to be reachable.
+    pub fn common_dominator(
+        &self,
+        mut a: BasicBlock,
+        mut b: BasicBlock,
+        layout: &Layout,
+    ) -> BasicBlock {
+        loop {
+            match self.rpo_cmp_ebb(a.ebb, b.ebb) {
+                Ordering::Less => {
+                    // `a` comes before `b` in the RPO. Move `b` up.
+                    let idom = self.nodes[b.ebb].idom.expect("Unreachable basic block?");
+                    b = BasicBlock::new(
+                        layout.inst_ebb(idom).expect("Dangling idom instruction"),
+                        idom,
+                    );
+                }
+                Ordering::Greater => {
+                    // `b` comes before `a` in the RPO. Move `a` up.
+                    let idom = self.nodes[a.ebb].idom.expect("Unreachable basic block?");
+                    a = BasicBlock::new(
+                        layout.inst_ebb(idom).expect("Dangling idom instruction"),
+                        idom,
+                    );
+                }
+                Ordering::Equal => break,
+            }
+        }
+
+        debug_assert_eq!(
+            a.ebb, b.ebb,
+            "Unreachable block passed to common_dominator?"
+        );
+
+        // We're in the same EBB. The common dominator is the earlier instruction.
+        if layout.cmp(a.inst, b.inst) == Ordering::Less {
+            a
+        } else {
+            b
+        }
+    }
+}
+
+impl DominatorTree {
+    /// Allocate a new blank dominator tree. Use `compute` to compute the dominator tree for a
+    /// function.
+    pub fn new() -> Self {
+        Self {
+            nodes: SecondaryMap::new(),
+            postorder: Vec::new(),
+            stack: Vec::new(),
+            valid: false,
+        }
+    }
+
+    /// Allocate and compute a dominator tree.
+    pub fn with_function(func: &Function, cfg: &ControlFlowGraph) -> Self {
+        let mut domtree = Self::new();
+        domtree.compute(func, cfg);
+        domtree
+    }
+
+    /// Reset and compute a CFG post-order and dominator tree.
+    pub fn compute(&mut self, func: &Function, cfg: &ControlFlowGraph) {
+        let _tt = timing::domtree();
+        debug_assert!(cfg.is_valid());
+        self.compute_postorder(func);
+        self.compute_domtree(func, cfg);
+        self.valid = true;
+    }
+
+    /// Clear the data structures used to represent the dominator tree. This will leave the tree in
+    /// a state where `is_valid()` returns false.
+    pub fn clear(&mut self) {
+        self.nodes.clear();
+        self.postorder.clear();
+        debug_assert!(self.stack.is_empty());
+        self.valid = false;
+    }
+
+    /// Check if the dominator tree is in a valid state.
+    ///
+    /// Note that this doesn't perform any kind of validity checks. It simply checks if the
+    /// `compute()` method has been called since the last `clear()`. It does not check that the
+    /// dominator tree is consistent with the CFG.
+    pub fn is_valid(&self) -> bool {
+        self.valid
+    }
+
+    /// Reset all internal data structures and compute a post-order of the control flow graph.
+    ///
+    /// This leaves `rpo_number == 1` for all reachable EBBs, 0 for unreachable ones.
+    fn compute_postorder(&mut self, func: &Function) {
+        self.clear();
+        self.nodes.resize(func.dfg.num_ebbs());
+
+        // This algorithm is a depth first traversal (DFT) of the control flow graph, computing a
+        // post-order of the EBBs that are reachable form the entry block. A DFT post-order is not
+        // unique. The specific order we get is controlled by two factors:
+        //
+        // 1. The order each node's children are visited, and
+        // 2. The method used for pruning graph edges to get a tree.
+        //
+        // There are two ways of viewing the CFG as a graph:
+        //
+        // 1. Each EBB is a node, with outgoing edges for all the branches in the EBB.
+        // 2. Each basic block is a node, with outgoing edges for the single branch at the end of
+        //    the BB. (An EBB is a linear sequence of basic blocks).
+        //
+        // The first graph is a contraction of the second one. We want to compute an EBB post-order
+        // that is compatible both graph interpretations. That is, if you compute a BB post-order
+        // and then remove those BBs that do not correspond to EBB headers, you get a post-order of
+        // the EBB graph.
+        //
+        // Node child order:
+        //
+        //     In the BB graph, we always go down the fall-through path first and follow the branch
+        //     destination second.
+        //
+        //     In the EBB graph, this is equivalent to visiting EBB successors in a bottom-up
+        //     order, starting from the destination of the EBB's terminating jump, ending at the
+        //     destination of the first branch in the EBB.
+        //
+        // Edge pruning:
+        //
+        //     In the BB graph, we keep an edge to an EBB the first time we visit the *source* side
+        //     of the edge. Any subsequent edges to the same EBB are pruned.
+        //
+        //     The equivalent tree is reached in the EBB graph by keeping the first edge to an EBB
+        //     in a top-down traversal of the successors. (And then visiting edges in a bottom-up
+        //     order).
+        //
+        // This pruning method makes it possible to compute the DFT without storing lots of
+        // information about the progress through an EBB.
+
+        // During this algorithm only, use `rpo_number` to hold the following state:
+        //
+        //   0:    EBB has not yet been reached in the pre-order.
+        //   SEEN: EBB has been pushed on the stack but successors not yet pushed.
+        //   DONE: Successors pushed.
+
+        match func.layout.entry_block() {
+            Some(ebb) => {
+                self.stack.push(ebb);
+                self.nodes[ebb].rpo_number = SEEN;
+            }
+            None => return,
+        }
+
+        while let Some(ebb) = self.stack.pop() {
+            match self.nodes[ebb].rpo_number {
+                SEEN => {
+                    // This is the first time we pop the EBB, so we need to scan its successors and
+                    // then revisit it.
+                    self.nodes[ebb].rpo_number = DONE;
+                    self.stack.push(ebb);
+                    self.push_successors(func, ebb);
+                }
+                DONE => {
+                    // This is the second time we pop the EBB, so all successors have been
+                    // processed.
+                    self.postorder.push(ebb);
+                }
+                _ => unreachable!(),
+            }
+        }
+    }
+
+    /// Push `ebb` successors onto `self.stack`, filtering out those that have already been seen.
+    ///
+    /// The successors are pushed in program order which is important to get a split-invariant
+    /// post-order. Split-invariant means that if an EBB is split in two, we get the same
+    /// post-order except for the insertion of the new EBB header at the split point.
+    fn push_successors(&mut self, func: &Function, ebb: Ebb) {
+        for inst in func.layout.ebb_insts(ebb) {
+            match func.dfg.analyze_branch(inst) {
+                BranchInfo::SingleDest(succ, _) => self.push_if_unseen(succ),
+                BranchInfo::Table(jt, dest) => {
+                    for succ in func.jump_tables[jt].iter() {
+                        self.push_if_unseen(*succ);
+                    }
+                    if let Some(dest) = dest {
+                        self.push_if_unseen(dest);
+                    }
+                }
+                BranchInfo::NotABranch => {}
+            }
+        }
+    }
+
+    /// Push `ebb` onto `self.stack` if it has not already been seen.
+    fn push_if_unseen(&mut self, ebb: Ebb) {
+        if self.nodes[ebb].rpo_number == 0 {
+            self.nodes[ebb].rpo_number = SEEN;
+            self.stack.push(ebb);
+        }
+    }
+
+    /// Build a dominator tree from a control flow graph using Keith D. Cooper's
+    /// "Simple, Fast Dominator Algorithm."
+    fn compute_domtree(&mut self, func: &Function, cfg: &ControlFlowGraph) {
+        // During this algorithm, `rpo_number` has the following values:
+        //
+        // 0: EBB is not reachable.
+        // 1: EBB is reachable, but has not yet been visited during the first pass. This is set by
+        // `compute_postorder`.
+        // 2+: EBB is reachable and has an assigned RPO number.
+
+        // We'll be iterating over a reverse post-order of the CFG, skipping the entry block.
+        let (entry_block, postorder) = match self.postorder.as_slice().split_last() {
+            Some((&eb, rest)) => (eb, rest),
+            None => return,
+        };
+        debug_assert_eq!(Some(entry_block), func.layout.entry_block());
+
+        // Do a first pass where we assign RPO numbers to all reachable nodes.
+        self.nodes[entry_block].rpo_number = 2 * STRIDE;
+        for (rpo_idx, &ebb) in postorder.iter().rev().enumerate() {
+            // Update the current node and give it an RPO number.
+            // The entry block got 2, the rest start at 3 by multiples of STRIDE to leave
+            // room for future dominator tree modifications.
+            //
+            // Since `compute_idom` will only look at nodes with an assigned RPO number, the
+            // function will never see an uninitialized predecessor.
+            //
+            // Due to the nature of the post-order traversal, every node we visit will have at
+            // least one predecessor that has previously been visited during this RPO.
+            self.nodes[ebb] = DomNode {
+                idom: self.compute_idom(ebb, cfg, &func.layout).into(),
+                rpo_number: (rpo_idx as u32 + 3) * STRIDE,
+            }
+        }
+
+        // Now that we have RPO numbers for everything and initial immediate dominator estimates,
+        // iterate until convergence.
+        //
+        // If the function is free of irreducible control flow, this will exit after one iteration.
+        let mut changed = true;
+        while changed {
+            changed = false;
+            for &ebb in postorder.iter().rev() {
+                let idom = self.compute_idom(ebb, cfg, &func.layout).into();
+                if self.nodes[ebb].idom != idom {
+                    self.nodes[ebb].idom = idom;
+                    changed = true;
+                }
+            }
+        }
+    }
+
+    // Compute the immediate dominator for `ebb` using the current `idom` states for the reachable
+    // nodes.
+    fn compute_idom(&self, ebb: Ebb, cfg: &ControlFlowGraph, layout: &Layout) -> Inst {
+        // Get an iterator with just the reachable, already visited predecessors to `ebb`.
+        // Note that during the first pass, `rpo_number` is 1 for reachable blocks that haven't
+        // been visited yet, 0 for unreachable blocks.
+        let mut reachable_preds = cfg
+            .pred_iter(ebb)
+            .filter(|&BasicBlock { ebb: pred, .. }| self.nodes[pred].rpo_number > 1);
+
+        // The RPO must visit at least one predecessor before this node.
+        let mut idom = reachable_preds
+            .next()
+            .expect("EBB node must have one reachable predecessor");
+
+        for pred in reachable_preds {
+            idom = self.common_dominator(idom, pred, layout);
+        }
+
+        idom.inst
+    }
+}
+
+impl DominatorTree {
+    /// When splitting an `Ebb` using `Layout::split_ebb`, you can use this method to update
+    /// the dominator tree locally rather than recomputing it.
+    ///
+    /// `old_ebb` is the `Ebb` before splitting, and `new_ebb` is the `Ebb` which now contains
+    /// the second half of `old_ebb`. `split_jump_inst` is the terminator jump instruction of
+    /// `old_ebb` that points to `new_ebb`.
+    pub fn recompute_split_ebb(&mut self, old_ebb: Ebb, new_ebb: Ebb, split_jump_inst: Inst) {
+        if !self.is_reachable(old_ebb) {
+            // old_ebb is unreachable, it stays so and new_ebb is unreachable too
+            self.nodes[new_ebb] = Default::default();
+            return;
+        }
+        // We use the RPO comparison on the postorder list so we invert the operands of the
+        // comparison
+        let old_ebb_postorder_index = self
+            .postorder
+            .as_slice()
+            .binary_search_by(|probe| self.rpo_cmp_ebb(old_ebb, *probe))
+            .expect("the old ebb is not declared to the dominator tree");
+        let new_ebb_rpo = self.insert_after_rpo(old_ebb, old_ebb_postorder_index, new_ebb);
+        self.nodes[new_ebb] = DomNode {
+            rpo_number: new_ebb_rpo,
+            idom: Some(split_jump_inst).into(),
+        };
+    }
+
+    // Insert new_ebb just after ebb in the RPO. This function checks
+    // if there is a gap in rpo numbers; if yes it returns the number in the gap and if
+    // not it renumbers.
+    fn insert_after_rpo(&mut self, ebb: Ebb, ebb_postorder_index: usize, new_ebb: Ebb) -> u32 {
+        let ebb_rpo_number = self.nodes[ebb].rpo_number;
+        let inserted_rpo_number = ebb_rpo_number + 1;
+        // If there is no gaps in RPo numbers to insert this new number, we iterate
+        // forward in RPO numbers and backwards in the postorder list of EBBs, renumbering the Ebbs
+        // until we find a gap
+        for (&current_ebb, current_rpo) in self.postorder[0..ebb_postorder_index]
+            .iter()
+            .rev()
+            .zip(inserted_rpo_number + 1..)
+        {
+            if self.nodes[current_ebb].rpo_number < current_rpo {
+                // There is no gap, we renumber
+                self.nodes[current_ebb].rpo_number = current_rpo;
+            } else {
+                // There is a gap, we stop the renumbering and exit
+                break;
+            }
+        }
+        // TODO: insert in constant time?
+        self.postorder.insert(ebb_postorder_index, new_ebb);
+        inserted_rpo_number
+    }
+}
+
+/// Optional pre-order information that can be computed for a dominator tree.
+///
+/// This data structure is computed from a `DominatorTree` and provides:
+///
+/// - A forward traversable dominator tree through the `children()` iterator.
+/// - An ordering of EBBs according to a dominator tree pre-order.
+/// - Constant time dominance checks at the EBB granularity.
+///
+/// The information in this auxiliary data structure is not easy to update when the control flow
+/// graph changes, which is why it is kept separate.
+pub struct DominatorTreePreorder {
+    nodes: SecondaryMap<Ebb, ExtraNode>,
+
+    // Scratch memory used by `compute_postorder()`.
+    stack: Vec<Ebb>,
+}
+
+#[derive(Default, Clone)]
+struct ExtraNode {
+    /// First child node in the domtree.
+    child: PackedOption<Ebb>,
+
+    /// Next sibling node in the domtree. This linked list is ordered according to the CFG RPO.
+    sibling: PackedOption<Ebb>,
+
+    /// Sequence number for this node in a pre-order traversal of the dominator tree.
+    /// Unreachable blocks have number 0, the entry block is 1.
+    pre_number: u32,
+
+    /// Maximum `pre_number` for the sub-tree of the dominator tree that is rooted at this node.
+    /// This is always >= `pre_number`.
+    pre_max: u32,
+}
+
+/// Creating and computing the dominator tree pre-order.
+impl DominatorTreePreorder {
+    /// Create a new blank `DominatorTreePreorder`.
+    pub fn new() -> Self {
+        Self {
+            nodes: SecondaryMap::new(),
+            stack: Vec::new(),
+        }
+    }
+
+    /// Recompute this data structure to match `domtree`.
+    pub fn compute(&mut self, domtree: &DominatorTree, layout: &Layout) {
+        self.nodes.clear();
+        debug_assert_eq!(self.stack.len(), 0);
+
+        // Step 1: Populate the child and sibling links.
+        //
+        // By following the CFG post-order and pushing to the front of the lists, we make sure that
+        // sibling lists are ordered according to the CFG reverse post-order.
+        for &ebb in domtree.cfg_postorder() {
+            if let Some(idom_inst) = domtree.idom(ebb) {
+                let idom = layout.pp_ebb(idom_inst);
+                let sib = mem::replace(&mut self.nodes[idom].child, ebb.into());
+                self.nodes[ebb].sibling = sib;
+            } else {
+                // The only EBB without an immediate dominator is the entry.
+                self.stack.push(ebb);
+            }
+        }
+
+        // Step 2. Assign pre-order numbers from a DFS of the dominator tree.
+        debug_assert!(self.stack.len() <= 1);
+        let mut n = 0;
+        while let Some(ebb) = self.stack.pop() {
+            n += 1;
+            let node = &mut self.nodes[ebb];
+            node.pre_number = n;
+            node.pre_max = n;
+            if let Some(n) = node.sibling.expand() {
+                self.stack.push(n);
+            }
+            if let Some(n) = node.child.expand() {
+                self.stack.push(n);
+            }
+        }
+
+        // Step 3. Propagate the `pre_max` numbers up the tree.
+        // The CFG post-order is topologically ordered w.r.t. dominance so a node comes after all
+        // its dominator tree children.
+        for &ebb in domtree.cfg_postorder() {
+            if let Some(idom_inst) = domtree.idom(ebb) {
+                let idom = layout.pp_ebb(idom_inst);
+                let pre_max = cmp::max(self.nodes[ebb].pre_max, self.nodes[idom].pre_max);
+                self.nodes[idom].pre_max = pre_max;
+            }
+        }
+    }
+}
+
+/// An iterator that enumerates the direct children of an EBB in the dominator tree.
+pub struct ChildIter<'a> {
+    dtpo: &'a DominatorTreePreorder,
+    next: PackedOption<Ebb>,
+}
+
+impl<'a> Iterator for ChildIter<'a> {
+    type Item = Ebb;
+
+    fn next(&mut self) -> Option<Ebb> {
+        let n = self.next.expand();
+        if let Some(ebb) = n {
+            self.next = self.dtpo.nodes[ebb].sibling;
+        }
+        n
+    }
+}
+
+/// Query interface for the dominator tree pre-order.
+impl DominatorTreePreorder {
+    /// Get an iterator over the direct children of `ebb` in the dominator tree.
+    ///
+    /// These are the EBB's whose immediate dominator is an instruction in `ebb`, ordered according
+    /// to the CFG reverse post-order.
+    pub fn children(&self, ebb: Ebb) -> ChildIter {
+        ChildIter {
+            dtpo: self,
+            next: self.nodes[ebb].child,
+        }
+    }
+
+    /// Fast, constant time dominance check with EBB granularity.
+    ///
+    /// This computes the same result as `domtree.dominates(a, b)`, but in guaranteed fast constant
+    /// time. This is less general than the `DominatorTree` method because it only works with EBB
+    /// program points.
+    ///
+    /// An EBB is considered to dominate itself.
+    pub fn dominates(&self, a: Ebb, b: Ebb) -> bool {
+        let na = &self.nodes[a];
+        let nb = &self.nodes[b];
+        na.pre_number <= nb.pre_number && na.pre_max >= nb.pre_max
+    }
+
+    /// Compare two EBBs according to the dominator pre-order.
+    pub fn pre_cmp_ebb(&self, a: Ebb, b: Ebb) -> Ordering {
+        self.nodes[a].pre_number.cmp(&self.nodes[b].pre_number)
+    }
+
+    /// Compare two program points according to the dominator tree pre-order.
+    ///
+    /// This ordering of program points have the property that given a program point, pp, all the
+    /// program points dominated by pp follow immediately and contiguously after pp in the order.
+    pub fn pre_cmp<A, B>(&self, a: A, b: B, layout: &Layout) -> Ordering
+    where
+        A: Into<ExpandedProgramPoint>,
+        B: Into<ExpandedProgramPoint>,
+    {
+        let a = a.into();
+        let b = b.into();
+        self.pre_cmp_ebb(layout.pp_ebb(a), layout.pp_ebb(b))
+            .then(layout.cmp(a, b))
+    }
+
+    /// Compare two value defs according to the dominator tree pre-order.
+    ///
+    /// Two values defined at the same program point are compared according to their parameter or
+    /// result order.
+    ///
+    /// This is a total ordering of the values in the function.
+    pub fn pre_cmp_def(&self, a: Value, b: Value, func: &Function) -> Ordering {
+        let da = func.dfg.value_def(a);
+        let db = func.dfg.value_def(b);
+        self.pre_cmp(da, db, &func.layout)
+            .then_with(|| da.num().cmp(&db.num()))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::cursor::{Cursor, FuncCursor};
+    use crate::flowgraph::ControlFlowGraph;
+    use crate::ir::types::*;
+    use crate::ir::{Function, InstBuilder, TrapCode};
+    use crate::settings;
+    use crate::verifier::{verify_context, VerifierErrors};
+
+    #[test]
+    fn empty() {
+        let func = Function::new();
+        let cfg = ControlFlowGraph::with_function(&func);
+        debug_assert!(cfg.is_valid());
+        let dtree = DominatorTree::with_function(&func, &cfg);
+        assert_eq!(0, dtree.nodes.keys().count());
+        assert_eq!(dtree.cfg_postorder(), &[]);
+
+        let mut dtpo = DominatorTreePreorder::new();
+        dtpo.compute(&dtree, &func.layout);
+    }
+
+    #[test]
+    fn unreachable_node() {
+        let mut func = Function::new();
+        let ebb0 = func.dfg.make_ebb();
+        let v0 = func.dfg.append_ebb_param(ebb0, I32);
+        let ebb1 = func.dfg.make_ebb();
+        let ebb2 = func.dfg.make_ebb();
+
+        let mut cur = FuncCursor::new(&mut func);
+
+        cur.insert_ebb(ebb0);
+        cur.ins().brnz(v0, ebb2, &[]);
+        cur.ins().trap(TrapCode::User(0));
+
+        cur.insert_ebb(ebb1);
+        let v1 = cur.ins().iconst(I32, 1);
+        let v2 = cur.ins().iadd(v0, v1);
+        cur.ins().jump(ebb0, &[v2]);
+
+        cur.insert_ebb(ebb2);
+        cur.ins().return_(&[v0]);
+
+        let cfg = ControlFlowGraph::with_function(cur.func);
+        let dt = DominatorTree::with_function(cur.func, &cfg);
+
+        // Fall-through-first, prune-at-source DFT:
+        //
+        // ebb0 {
+        //   brnz ebb2 {
+        //     trap
+        //     ebb2 {
+        //       return
+        //     } ebb2
+        // } ebb0
+        assert_eq!(dt.cfg_postorder(), &[ebb2, ebb0]);
+
+        let v2_def = cur.func.dfg.value_def(v2).unwrap_inst();
+        assert!(!dt.dominates(v2_def, ebb0, &cur.func.layout));
+        assert!(!dt.dominates(ebb0, v2_def, &cur.func.layout));
+
+        let mut dtpo = DominatorTreePreorder::new();
+        dtpo.compute(&dt, &cur.func.layout);
+        assert!(dtpo.dominates(ebb0, ebb0));
+        assert!(!dtpo.dominates(ebb0, ebb1));
+        assert!(dtpo.dominates(ebb0, ebb2));
+        assert!(!dtpo.dominates(ebb1, ebb0));
+        assert!(dtpo.dominates(ebb1, ebb1));
+        assert!(!dtpo.dominates(ebb1, ebb2));
+        assert!(!dtpo.dominates(ebb2, ebb0));
+        assert!(!dtpo.dominates(ebb2, ebb1));
+        assert!(dtpo.dominates(ebb2, ebb2));
+    }
+
+    #[test]
+    fn non_zero_entry_block() {
+        let mut func = Function::new();
+        let ebb0 = func.dfg.make_ebb();
+        let ebb1 = func.dfg.make_ebb();
+        let ebb2 = func.dfg.make_ebb();
+        let ebb3 = func.dfg.make_ebb();
+        let cond = func.dfg.append_ebb_param(ebb3, I32);
+
+        let mut cur = FuncCursor::new(&mut func);
+
+        cur.insert_ebb(ebb3);
+        let jmp_ebb3_ebb1 = cur.ins().jump(ebb1, &[]);
+
+        cur.insert_ebb(ebb1);
+        let br_ebb1_ebb0 = cur.ins().brnz(cond, ebb0, &[]);
+        let jmp_ebb1_ebb2 = cur.ins().jump(ebb2, &[]);
+
+        cur.insert_ebb(ebb2);
+        cur.ins().jump(ebb0, &[]);
+
+        cur.insert_ebb(ebb0);
+
+        let cfg = ControlFlowGraph::with_function(cur.func);
+        let dt = DominatorTree::with_function(cur.func, &cfg);
+
+        // Fall-through-first, prune-at-source DFT:
+        //
+        // ebb3 {
+        //   ebb3:jump ebb1 {
+        //     ebb1 {
+        //       ebb1:brnz ebb0 {
+        //         ebb1:jump ebb2 {
+        //           ebb2 {
+        //             ebb2:jump ebb0 (seen)
+        //           } ebb2
+        //         } ebb1:jump ebb2
+        //         ebb0 {
+        //         } ebb0
+        //       } ebb1:brnz ebb0
+        //     } ebb1
+        //   } ebb3:jump ebb1
+        // } ebb3
+
+        assert_eq!(dt.cfg_postorder(), &[ebb2, ebb0, ebb1, ebb3]);
+
+        assert_eq!(cur.func.layout.entry_block().unwrap(), ebb3);
+        assert_eq!(dt.idom(ebb3), None);
+        assert_eq!(dt.idom(ebb1).unwrap(), jmp_ebb3_ebb1);
+        assert_eq!(dt.idom(ebb2).unwrap(), jmp_ebb1_ebb2);
+        assert_eq!(dt.idom(ebb0).unwrap(), br_ebb1_ebb0);
+
+        assert!(dt.dominates(br_ebb1_ebb0, br_ebb1_ebb0, &cur.func.layout));
+        assert!(!dt.dominates(br_ebb1_ebb0, jmp_ebb3_ebb1, &cur.func.layout));
+        assert!(dt.dominates(jmp_ebb3_ebb1, br_ebb1_ebb0, &cur.func.layout));
+
+        assert_eq!(dt.rpo_cmp(ebb3, ebb3, &cur.func.layout), Ordering::Equal);
+        assert_eq!(dt.rpo_cmp(ebb3, ebb1, &cur.func.layout), Ordering::Less);
+        assert_eq!(
+            dt.rpo_cmp(ebb3, jmp_ebb3_ebb1, &cur.func.layout),
+            Ordering::Less
+        );
+        assert_eq!(
+            dt.rpo_cmp(jmp_ebb3_ebb1, jmp_ebb1_ebb2, &cur.func.layout),
+            Ordering::Less
+        );
+    }
+
+    #[test]
+    fn backwards_layout() {
+        let mut func = Function::new();
+        let ebb0 = func.dfg.make_ebb();
+        let ebb1 = func.dfg.make_ebb();
+        let ebb2 = func.dfg.make_ebb();
+
+        let mut cur = FuncCursor::new(&mut func);
+
+        cur.insert_ebb(ebb0);
+        let jmp02 = cur.ins().jump(ebb2, &[]);
+
+        cur.insert_ebb(ebb1);
+        let trap = cur.ins().trap(TrapCode::User(5));
+
+        cur.insert_ebb(ebb2);
+        let jmp21 = cur.ins().jump(ebb1, &[]);
+
+        let cfg = ControlFlowGraph::with_function(cur.func);
+        let dt = DominatorTree::with_function(cur.func, &cfg);
+
+        assert_eq!(cur.func.layout.entry_block(), Some(ebb0));
+        assert_eq!(dt.idom(ebb0), None);
+        assert_eq!(dt.idom(ebb1), Some(jmp21));
+        assert_eq!(dt.idom(ebb2), Some(jmp02));
+
+        assert!(dt.dominates(ebb0, ebb0, &cur.func.layout));
+        assert!(dt.dominates(ebb0, jmp02, &cur.func.layout));
+        assert!(dt.dominates(ebb0, ebb1, &cur.func.layout));
+        assert!(dt.dominates(ebb0, trap, &cur.func.layout));
+        assert!(dt.dominates(ebb0, ebb2, &cur.func.layout));
+        assert!(dt.dominates(ebb0, jmp21, &cur.func.layout));
+
+        assert!(!dt.dominates(jmp02, ebb0, &cur.func.layout));
+        assert!(dt.dominates(jmp02, jmp02, &cur.func.layout));
+        assert!(dt.dominates(jmp02, ebb1, &cur.func.layout));
+        assert!(dt.dominates(jmp02, trap, &cur.func.layout));
+        assert!(dt.dominates(jmp02, ebb2, &cur.func.layout));
+        assert!(dt.dominates(jmp02, jmp21, &cur.func.layout));
+
+        assert!(!dt.dominates(ebb1, ebb0, &cur.func.layout));
+        assert!(!dt.dominates(ebb1, jmp02, &cur.func.layout));
+        assert!(dt.dominates(ebb1, ebb1, &cur.func.layout));
+        assert!(dt.dominates(ebb1, trap, &cur.func.layout));
+        assert!(!dt.dominates(ebb1, ebb2, &cur.func.layout));
+        assert!(!dt.dominates(ebb1, jmp21, &cur.func.layout));
+
+        assert!(!dt.dominates(trap, ebb0, &cur.func.layout));
+        assert!(!dt.dominates(trap, jmp02, &cur.func.layout));
+        assert!(!dt.dominates(trap, ebb1, &cur.func.layout));
+        assert!(dt.dominates(trap, trap, &cur.func.layout));
+        assert!(!dt.dominates(trap, ebb2, &cur.func.layout));
+        assert!(!dt.dominates(trap, jmp21, &cur.func.layout));
+
+        assert!(!dt.dominates(ebb2, ebb0, &cur.func.layout));
+        assert!(!dt.dominates(ebb2, jmp02, &cur.func.layout));
+        assert!(dt.dominates(ebb2, ebb1, &cur.func.layout));
+        assert!(dt.dominates(ebb2, trap, &cur.func.layout));
+        assert!(dt.dominates(ebb2, ebb2, &cur.func.layout));
+        assert!(dt.dominates(ebb2, jmp21, &cur.func.layout));
+
+        assert!(!dt.dominates(jmp21, ebb0, &cur.func.layout));
+        assert!(!dt.dominates(jmp21, jmp02, &cur.func.layout));
+        assert!(dt.dominates(jmp21, ebb1, &cur.func.layout));
+        assert!(dt.dominates(jmp21, trap, &cur.func.layout));
+        assert!(!dt.dominates(jmp21, ebb2, &cur.func.layout));
+        assert!(dt.dominates(jmp21, jmp21, &cur.func.layout));
+    }
+
+    #[test]
+    fn renumbering() {
+        let mut func = Function::new();
+        let entry = func.dfg.make_ebb();
+        let ebb0 = func.dfg.make_ebb();
+        let ebb100 = func.dfg.make_ebb();
+
+        let mut cur = FuncCursor::new(&mut func);
+
+        cur.insert_ebb(entry);
+        cur.ins().jump(ebb0, &[]);
+
+        cur.insert_ebb(ebb0);
+        let cond = cur.ins().iconst(I32, 0);
+        let inst2 = cur.ins().brz(cond, ebb0, &[]);
+        let inst3 = cur.ins().brz(cond, ebb0, &[]);
+        let inst4 = cur.ins().brz(cond, ebb0, &[]);
+        let inst5 = cur.ins().brz(cond, ebb0, &[]);
+        cur.ins().jump(ebb100, &[]);
+        cur.insert_ebb(ebb100);
+        cur.ins().return_(&[]);
+
+        let mut cfg = ControlFlowGraph::with_function(cur.func);
+        let mut dt = DominatorTree::with_function(cur.func, &cfg);
+
+        let ebb1 = cur.func.dfg.make_ebb();
+        cur.func.layout.split_ebb(ebb1, inst2);
+        cur.goto_bottom(ebb0);
+        let middle_jump_inst = cur.ins().jump(ebb1, &[]);
+
+        dt.recompute_split_ebb(ebb0, ebb1, middle_jump_inst);
+
+        let ebb2 = cur.func.dfg.make_ebb();
+        cur.func.layout.split_ebb(ebb2, inst3);
+        cur.goto_bottom(ebb1);
+        let middle_jump_inst = cur.ins().jump(ebb2, &[]);
+        dt.recompute_split_ebb(ebb1, ebb2, middle_jump_inst);
+
+        let ebb3 = cur.func.dfg.make_ebb();
+        cur.func.layout.split_ebb(ebb3, inst4);
+        cur.goto_bottom(ebb2);
+        let middle_jump_inst = cur.ins().jump(ebb3, &[]);
+        dt.recompute_split_ebb(ebb2, ebb3, middle_jump_inst);
+
+        let ebb4 = cur.func.dfg.make_ebb();
+        cur.func.layout.split_ebb(ebb4, inst5);
+        cur.goto_bottom(ebb3);
+        let middle_jump_inst = cur.ins().jump(ebb4, &[]);
+        dt.recompute_split_ebb(ebb3, ebb4, middle_jump_inst);
+
+        cfg.compute(cur.func);
+
+        let flags = settings::Flags::new(settings::builder());
+        let mut errors = VerifierErrors::default();
+
+        verify_context(cur.func, &cfg, &dt, &flags, &mut errors).unwrap();
+
+        assert!(errors.0.is_empty());
+    }
+}
--- a/cranelift/codegen/src/flowgraph.rs
+++ b/cranelift/codegen/src/flowgraph.rs
@@ -0,0 +1,350 @@
+//! A control flow graph represented as mappings of extended basic blocks to their predecessors
+//! and successors.
+//!
+//! Successors are represented as extended basic blocks while predecessors are represented by basic
+//! blocks. Basic blocks are denoted by tuples of EBB and branch/jump instructions. Each
+//! predecessor tuple corresponds to the end of a basic block.
+//!
+//! ```c
+//!     Ebb0:
+//!         ...          ; beginning of basic block
+//!
+//!         ...
+//!
+//!         brz vx, Ebb1 ; end of basic block
+//!
+//!         ...          ; beginning of basic block
+//!
+//!         ...
+//!
+//!         jmp Ebb2     ; end of basic block
+//! ```
+//!
+//! Here `Ebb1` and `Ebb2` would each have a single predecessor denoted as `(Ebb0, brz)`
+//! and `(Ebb0, jmp Ebb2)` respectively.
+
+use crate::bforest;
+use crate::entity::SecondaryMap;
+use crate::ir::instructions::BranchInfo;
+use crate::ir::{Ebb, Function, Inst};
+use crate::timing;
+use core::mem;
+
+/// A basic block denoted by its enclosing Ebb and last instruction.
+#[derive(PartialEq, Eq)]
+pub struct BasicBlock {
+    /// Enclosing Ebb key.
+    pub ebb: Ebb,
+    /// Last instruction in the basic block.
+    pub inst: Inst,
+}
+
+impl BasicBlock {
+    /// Convenient method to construct new BasicBlock.
+    pub fn new(ebb: Ebb, inst: Inst) -> Self {
+        Self { ebb, inst }
+    }
+}
+
+/// A container for the successors and predecessors of some Ebb.
+#[derive(Clone, Default)]
+struct CFGNode {
+    /// Instructions that can branch or jump to this EBB.
+    ///
+    /// This maps branch instruction -> predecessor EBB which is redundant since the EBB containing
+    /// the branch instruction is available from the `layout.inst_ebb()` method. We store the
+    /// redundant information because:
+    ///
+    /// 1. Many `pred_iter()` consumers want the EBB anyway, so it is handily available.
+    /// 2. The `invalidate_ebb_successors()` may be called *after* branches have been removed from
+    ///    their EBB, but we still need to remove them form the old EBB predecessor map.
+    ///
+    /// The redundant EBB stored here is always consistent with the CFG successor lists, even after
+    /// the IR has been edited.
+    pub predecessors: bforest::Map<Inst, Ebb>,
+
+    /// Set of EBBs that are the targets of branches and jumps in this EBB.
+    /// The set is ordered by EBB number, indicated by the `()` comparator type.
+    pub successors: bforest::Set<Ebb>,
+}
+
+/// The Control Flow Graph maintains a mapping of ebbs to their predecessors
+/// and successors where predecessors are basic blocks and successors are
+/// extended basic blocks.
+pub struct ControlFlowGraph {
+    data: SecondaryMap<Ebb, CFGNode>,
+    pred_forest: bforest::MapForest<Inst, Ebb>,
+    succ_forest: bforest::SetForest<Ebb>,
+    valid: bool,
+}
+
+impl ControlFlowGraph {
+    /// Allocate a new blank control flow graph.
+    pub fn new() -> Self {
+        Self {
+            data: SecondaryMap::new(),
+            valid: false,
+            pred_forest: bforest::MapForest::new(),
+            succ_forest: bforest::SetForest::new(),
+        }
+    }
+
+    /// Clear all data structures in this control flow graph.
+    pub fn clear(&mut self) {
+        self.data.clear();
+        self.pred_forest.clear();
+        self.succ_forest.clear();
+        self.valid = false;
+    }
+
+    /// Allocate and compute the control flow graph for `func`.
+    pub fn with_function(func: &Function) -> Self {
+        let mut cfg = Self::new();
+        cfg.compute(func);
+        cfg
+    }
+
+    /// Compute the control flow graph of `func`.
+    ///
+    /// This will clear and overwrite any information already stored in this data structure.
+    pub fn compute(&mut self, func: &Function) {
+        let _tt = timing::flowgraph();
+        self.clear();
+        self.data.resize(func.dfg.num_ebbs());
+
+        for ebb in &func.layout {
+            self.compute_ebb(func, ebb);
+        }
+
+        self.valid = true;
+    }
+
+    fn compute_ebb(&mut self, func: &Function, ebb: Ebb) {
+        for inst in func.layout.ebb_insts(ebb) {
+            match func.dfg.analyze_branch(inst) {
+                BranchInfo::SingleDest(dest, _) => {
+                    self.add_edge(ebb, inst, dest);
+                }
+                BranchInfo::Table(jt, dest) => {
+                    if let Some(dest) = dest {
+                        self.add_edge(ebb, inst, dest);
+                    }
+                    for dest in func.jump_tables[jt].iter() {
+                        self.add_edge(ebb, inst, *dest);
+                    }
+                }
+                BranchInfo::NotABranch => {}
+            }
+        }
+    }
+
+    fn invalidate_ebb_successors(&mut self, ebb: Ebb) {
+        // Temporarily take ownership because we need mutable access to self.data inside the loop.
+        // Unfortunately borrowck cannot see that our mut accesses to predecessors don't alias
+        // our iteration over successors.
+        let mut successors = mem::replace(&mut self.data[ebb].successors, Default::default());
+        for succ in successors.iter(&self.succ_forest) {
+            self.data[succ]
+                .predecessors
+                .retain(&mut self.pred_forest, |_, &mut e| e != ebb);
+        }
+        successors.clear(&mut self.succ_forest);
+    }
+
+    /// Recompute the control flow graph of `ebb`.
+    ///
+    /// This is for use after modifying instructions within a specific EBB. It recomputes all edges
+    /// from `ebb` while leaving edges to `ebb` intact. Its functionality a subset of that of the
+    /// more expensive `compute`, and should be used when we know we don't need to recompute the CFG
+    /// from scratch, but rather that our changes have been restricted to specific EBBs.
+    pub fn recompute_ebb(&mut self, func: &Function, ebb: Ebb) {
+        debug_assert!(self.is_valid());
+        self.invalidate_ebb_successors(ebb);
+        self.compute_ebb(func, ebb);
+    }
+
+    fn add_edge(&mut self, from: Ebb, from_inst: Inst, to: Ebb) {
+        self.data[from]
+            .successors
+            .insert(to, &mut self.succ_forest, &());
+        self.data[to]
+            .predecessors
+            .insert(from_inst, from, &mut self.pred_forest, &());
+    }
+
+    /// Get an iterator over the CFG predecessors to `ebb`.
+    pub fn pred_iter(&self, ebb: Ebb) -> PredIter {
+        PredIter(self.data[ebb].predecessors.iter(&self.pred_forest))
+    }
+
+    /// Get an iterator over the CFG successors to `ebb`.
+    pub fn succ_iter(&self, ebb: Ebb) -> SuccIter {
+        debug_assert!(self.is_valid());
+        self.data[ebb].successors.iter(&self.succ_forest)
+    }
+
+    /// Check if the CFG is in a valid state.
+    ///
+    /// Note that this doesn't perform any kind of validity checks. It simply checks if the
+    /// `compute()` method has been called since the last `clear()`. It does not check that the
+    /// CFG is consistent with the function.
+    pub fn is_valid(&self) -> bool {
+        self.valid
+    }
+}
+
+/// An iterator over EBB predecessors. The iterator type is `BasicBlock`.
+///
+/// Each predecessor is an instruction that branches to the EBB.
+pub struct PredIter<'a>(bforest::MapIter<'a, Inst, Ebb>);
+
+impl<'a> Iterator for PredIter<'a> {
+    type Item = BasicBlock;
+
+    fn next(&mut self) -> Option<BasicBlock> {
+        self.0.next().map(|(i, e)| BasicBlock::new(e, i))
+    }
+}
+
+/// An iterator over EBB successors. The iterator type is `Ebb`.
+pub type SuccIter<'a> = bforest::SetIter<'a, Ebb>;
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::cursor::{Cursor, FuncCursor};
+    use crate::ir::{types, Function, InstBuilder};
+    use std::vec::Vec;
+
+    #[test]
+    fn empty() {
+        let func = Function::new();
+        ControlFlowGraph::with_function(&func);
+    }
+
+    #[test]
+    fn no_predecessors() {
+        let mut func = Function::new();
+        let ebb0 = func.dfg.make_ebb();
+        let ebb1 = func.dfg.make_ebb();
+        let ebb2 = func.dfg.make_ebb();
+        func.layout.append_ebb(ebb0);
+        func.layout.append_ebb(ebb1);
+        func.layout.append_ebb(ebb2);
+
+        let cfg = ControlFlowGraph::with_function(&func);
+
+        let mut fun_ebbs = func.layout.ebbs();
+        for ebb in func.layout.ebbs() {
+            assert_eq!(ebb, fun_ebbs.next().unwrap());
+            assert_eq!(cfg.pred_iter(ebb).count(), 0);
+            assert_eq!(cfg.succ_iter(ebb).count(), 0);
+        }
+    }
+
+    #[test]
+    fn branches_and_jumps() {
+        let mut func = Function::new();
+        let ebb0 = func.dfg.make_ebb();
+        let cond = func.dfg.append_ebb_param(ebb0, types::I32);
+        let ebb1 = func.dfg.make_ebb();
+        let ebb2 = func.dfg.make_ebb();
+
+        let br_ebb0_ebb2;
+        let br_ebb1_ebb1;
+        let jmp_ebb0_ebb1;
+        let jmp_ebb1_ebb2;
+
+        {
+            let mut cur = FuncCursor::new(&mut func);
+
+            cur.insert_ebb(ebb0);
+            br_ebb0_ebb2 = cur.ins().brnz(cond, ebb2, &[]);
+            jmp_ebb0_ebb1 = cur.ins().jump(ebb1, &[]);
+
+            cur.insert_ebb(ebb1);
+            br_ebb1_ebb1 = cur.ins().brnz(cond, ebb1, &[]);
+            jmp_ebb1_ebb2 = cur.ins().jump(ebb2, &[]);
+
+            cur.insert_ebb(ebb2);
+        }
+
+        let mut cfg = ControlFlowGraph::with_function(&func);
+
+        {
+            let ebb0_predecessors = cfg.pred_iter(ebb0).collect::<Vec<_>>();
+            let ebb1_predecessors = cfg.pred_iter(ebb1).collect::<Vec<_>>();
+            let ebb2_predecessors = cfg.pred_iter(ebb2).collect::<Vec<_>>();
+
+            let ebb0_successors = cfg.succ_iter(ebb0).collect::<Vec<_>>();
+            let ebb1_successors = cfg.succ_iter(ebb1).collect::<Vec<_>>();
+            let ebb2_successors = cfg.succ_iter(ebb2).collect::<Vec<_>>();
+
+            assert_eq!(ebb0_predecessors.len(), 0);
+            assert_eq!(ebb1_predecessors.len(), 2);
+            assert_eq!(ebb2_predecessors.len(), 2);
+
+            assert_eq!(
+                ebb1_predecessors.contains(&BasicBlock::new(ebb0, jmp_ebb0_ebb1)),
+                true
+            );
+            assert_eq!(
+                ebb1_predecessors.contains(&BasicBlock::new(ebb1, br_ebb1_ebb1)),
+                true
+            );
+            assert_eq!(
+                ebb2_predecessors.contains(&BasicBlock::new(ebb0, br_ebb0_ebb2)),
+                true
+            );
+            assert_eq!(
+                ebb2_predecessors.contains(&BasicBlock::new(ebb1, jmp_ebb1_ebb2)),
+                true
+            );
+
+            assert_eq!(ebb0_successors, [ebb1, ebb2]);
+            assert_eq!(ebb1_successors, [ebb1, ebb2]);
+            assert_eq!(ebb2_successors, []);
+        }
+
+        // Change some instructions and recompute ebb0
+        func.dfg.replace(br_ebb0_ebb2).brnz(cond, ebb1, &[]);
+        func.dfg.replace(jmp_ebb0_ebb1).return_(&[]);
+        cfg.recompute_ebb(&mut func, ebb0);
+        let br_ebb0_ebb1 = br_ebb0_ebb2;
+
+        {
+            let ebb0_predecessors = cfg.pred_iter(ebb0).collect::<Vec<_>>();
+            let ebb1_predecessors = cfg.pred_iter(ebb1).collect::<Vec<_>>();
+            let ebb2_predecessors = cfg.pred_iter(ebb2).collect::<Vec<_>>();
+
+            let ebb0_successors = cfg.succ_iter(ebb0);
+            let ebb1_successors = cfg.succ_iter(ebb1);
+            let ebb2_successors = cfg.succ_iter(ebb2);
+
+            assert_eq!(ebb0_predecessors.len(), 0);
+            assert_eq!(ebb1_predecessors.len(), 2);
+            assert_eq!(ebb2_predecessors.len(), 1);
+
+            assert_eq!(
+                ebb1_predecessors.contains(&BasicBlock::new(ebb0, br_ebb0_ebb1)),
+                true
+            );
+            assert_eq!(
+                ebb1_predecessors.contains(&BasicBlock::new(ebb1, br_ebb1_ebb1)),
+                true
+            );
+            assert_eq!(
+                ebb2_predecessors.contains(&BasicBlock::new(ebb0, br_ebb0_ebb2)),
+                false
+            );
+            assert_eq!(
+                ebb2_predecessors.contains(&BasicBlock::new(ebb1, jmp_ebb1_ebb2)),
+                true
+            );
+
+            assert_eq!(ebb0_successors.collect::<Vec<_>>(), [ebb1]);
+            assert_eq!(ebb1_successors.collect::<Vec<_>>(), [ebb1, ebb2]);
+            assert_eq!(ebb2_successors.collect::<Vec<_>>(), []);
+        }
+    }
+}
--- a/cranelift/codegen/src/fx.rs
+++ b/cranelift/codegen/src/fx.rs
@@ -0,0 +1,111 @@
+// This file is taken from the Rust compiler: src/librustc_data_structures/fx.rs
+
+// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use super::{HashMap, HashSet};
+use core::default::Default;
+use core::hash::{BuildHasherDefault, Hash, Hasher};
+use core::ops::BitXor;
+
+pub type FxHashMap<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher>>;
+pub type FxHashSet<V> = HashSet<V, BuildHasherDefault<FxHasher>>;
+
+#[allow(non_snake_case)]
+pub fn FxHashMap<K: Hash + Eq, V>() -> FxHashMap<K, V> {
+    HashMap::default()
+}
+
+#[allow(non_snake_case)]
+pub fn FxHashSet<V: Hash + Eq>() -> FxHashSet<V> {
+    HashSet::default()
+}
+
+/// A speedy hash algorithm for use within rustc. The hashmap in liballoc
+/// by default uses SipHash which isn't quite as speedy as we want. In the
+/// compiler we're not really worried about DOS attempts, so we use a fast
+/// non-cryptographic hash.
+///
+/// This is the same as the algorithm used by Firefox -- which is a homespun
+/// one not based on any widely-known algorithm -- though modified to produce
+/// 64-bit hash values instead of 32-bit hash values. It consistently
+/// out-performs an FNV-based hash within rustc itself -- the collision rate is
+/// similar or slightly worse than FNV, but the speed of the hash function
+/// itself is much higher because it works on up to 8 bytes at a time.
+pub struct FxHasher {
+    hash: usize,
+}
+
+#[cfg(target_pointer_width = "32")]
+const K: usize = 0x9e3779b9;
+#[cfg(target_pointer_width = "64")]
+const K: usize = 0x517cc1b727220a95;
+
+impl Default for FxHasher {
+    #[inline]
+    fn default() -> Self {
+        Self { hash: 0 }
+    }
+}
+
+impl FxHasher {
+    #[inline]
+    fn add_to_hash(&mut self, i: usize) {
+        self.hash = self.hash.rotate_left(5).bitxor(i).wrapping_mul(K);
+    }
+}
+
+impl Hasher for FxHasher {
+    #[inline]
+    fn write(&mut self, bytes: &[u8]) {
+        for byte in bytes {
+            let i = *byte;
+            self.add_to_hash(i as usize);
+        }
+    }
+
+    #[inline]
+    fn write_u8(&mut self, i: u8) {
+        self.add_to_hash(i as usize);
+    }
+
+    #[inline]
+    fn write_u16(&mut self, i: u16) {
+        self.add_to_hash(i as usize);
+    }
+
+    #[inline]
+    fn write_u32(&mut self, i: u32) {
+        self.add_to_hash(i as usize);
+    }
+
+    #[cfg(target_pointer_width = "32")]
+    #[inline]
+    fn write_u64(&mut self, i: u64) {
+        self.add_to_hash(i as usize);
+        self.add_to_hash((i >> 32) as usize);
+    }
+
+    #[cfg(target_pointer_width = "64")]
+    #[inline]
+    fn write_u64(&mut self, i: u64) {
+        self.add_to_hash(i as usize);
+    }
+
+    #[inline]
+    fn write_usize(&mut self, i: usize) {
+        self.add_to_hash(i);
+    }
+
+    #[inline]
+    fn finish(&self) -> u64 {
+        self.hash as u64
+    }
+}
--- a/cranelift/codegen/src/ir/builder.rs
+++ b/cranelift/codegen/src/ir/builder.rs
@@ -0,0 +1,266 @@
+//! Cranelift instruction builder.
+//!
+//! A `Builder` provides a convenient interface for inserting instructions into a Cranelift
+//! function. Many of its methods are generated from the meta language instruction definitions.
+
+use crate::ir;
+use crate::ir::types;
+use crate::ir::{DataFlowGraph, InstructionData};
+use crate::ir::{Inst, Opcode, Type, Value};
+use crate::isa;
+
+/// Base trait for instruction builders.
+///
+/// The `InstBuilderBase` trait provides the basic functionality required by the methods of the
+/// generated `InstBuilder` trait. These methods should not normally be used directly. Use the
+/// methods in the `InstBuilder` trait instead.
+///
+/// Any data type that implements `InstBuilderBase` also gets all the methods of the `InstBuilder`
+/// trait.
+pub trait InstBuilderBase<'f>: Sized {
+    /// Get an immutable reference to the data flow graph that will hold the constructed
+    /// instructions.
+    fn data_flow_graph(&self) -> &DataFlowGraph;
+    /// Get a mutable reference to the data flow graph that will hold the constructed
+    /// instructions.
+    fn data_flow_graph_mut(&mut self) -> &mut DataFlowGraph;
+
+    /// Insert an instruction and return a reference to it, consuming the builder.
+    ///
+    /// The result types may depend on a controlling type variable. For non-polymorphic
+    /// instructions with multiple results, pass `INVALID` for the `ctrl_typevar` argument.
+    fn build(self, data: InstructionData, ctrl_typevar: Type) -> (Inst, &'f mut DataFlowGraph);
+}
+
+// Include trait code generated by `cranelift-codegen/meta-python/gen_instr.py`.
+//
+// This file defines the `InstBuilder` trait as an extension of `InstBuilderBase` with methods per
+// instruction format and per opcode.
+include!(concat!(env!("OUT_DIR"), "/inst_builder.rs"));
+
+/// Any type implementing `InstBuilderBase` gets all the `InstBuilder` methods for free.
+impl<'f, T: InstBuilderBase<'f>> InstBuilder<'f> for T {}
+
+/// Base trait for instruction inserters.
+///
+/// This is an alternative base trait for an instruction builder to implement.
+///
+/// An instruction inserter can be adapted into an instruction builder by wrapping it in an
+/// `InsertBuilder`. This provides some common functionality for instruction builders that insert
+/// new instructions, as opposed to the `ReplaceBuilder` which overwrites existing instructions.
+pub trait InstInserterBase<'f>: Sized {
+    /// Get an immutable reference to the data flow graph.
+    fn data_flow_graph(&self) -> &DataFlowGraph;
+
+    /// Get a mutable reference to the data flow graph.
+    fn data_flow_graph_mut(&mut self) -> &mut DataFlowGraph;
+
+    /// Insert a new instruction which belongs to the DFG.
+    fn insert_built_inst(self, inst: Inst, ctrl_typevar: Type) -> &'f mut DataFlowGraph;
+}
+
+use core::marker::PhantomData;
+
+/// Builder that inserts an instruction at the current position.
+///
+/// An `InsertBuilder` is a wrapper for an `InstInserterBase` that turns it into an instruction
+/// builder with some additional facilities for creating instructions that reuse existing values as
+/// their results.
+pub struct InsertBuilder<'f, IIB: InstInserterBase<'f>> {
+    inserter: IIB,
+    unused: PhantomData<&'f u32>,
+}
+
+impl<'f, IIB: InstInserterBase<'f>> InsertBuilder<'f, IIB> {
+    /// Create a new builder which inserts instructions at `pos`.
+    /// The `dfg` and `pos.layout` references should be from the same `Function`.
+    pub fn new(inserter: IIB) -> Self {
+        Self {
+            inserter,
+            unused: PhantomData,
+        }
+    }
+
+    /// Reuse result values in `reuse`.
+    ///
+    /// Convert this builder into one that will reuse the provided result values instead of
+    /// allocating new ones. The provided values for reuse must not be attached to anything. Any
+    /// missing result values will be allocated as normal.
+    ///
+    /// The `reuse` argument is expected to be an array of `Option<Value>`.
+    pub fn with_results<Array>(self, reuse: Array) -> InsertReuseBuilder<'f, IIB, Array>
+    where
+        Array: AsRef<[Option<Value>]>,
+    {
+        InsertReuseBuilder {
+            inserter: self.inserter,
+            reuse,
+            unused: PhantomData,
+        }
+    }
+
+    /// Reuse a single result value.
+    ///
+    /// Convert this into a builder that will reuse `v` as the single result value. The reused
+    /// result value `v` must not be attached to anything.
+    ///
+    /// This method should only be used when building an instruction with exactly one result. Use
+    /// `with_results()` for the more general case.
+    pub fn with_result(self, v: Value) -> InsertReuseBuilder<'f, IIB, [Option<Value>; 1]> {
+        // TODO: Specialize this to return a different builder that just attaches `v` instead of
+        // calling `make_inst_results_reusing()`.
+        self.with_results([Some(v)])
+    }
+}
+
+impl<'f, IIB: InstInserterBase<'f>> InstBuilderBase<'f> for InsertBuilder<'f, IIB> {
+    fn data_flow_graph(&self) -> &DataFlowGraph {
+        self.inserter.data_flow_graph()
+    }
+
+    fn data_flow_graph_mut(&mut self) -> &mut DataFlowGraph {
+        self.inserter.data_flow_graph_mut()
+    }
+
+    fn build(mut self, data: InstructionData, ctrl_typevar: Type) -> (Inst, &'f mut DataFlowGraph) {
+        let inst;
+        {
+            let dfg = self.inserter.data_flow_graph_mut();
+            inst = dfg.make_inst(data);
+            dfg.make_inst_results(inst, ctrl_typevar);
+        }
+        (inst, self.inserter.insert_built_inst(inst, ctrl_typevar))
+    }
+}
+
+/// Builder that inserts a new instruction like `InsertBuilder`, but reusing result values.
+pub struct InsertReuseBuilder<'f, IIB, Array>
+where
+    IIB: InstInserterBase<'f>,
+    Array: AsRef<[Option<Value>]>,
+{
+    inserter: IIB,
+    reuse: Array,
+    unused: PhantomData<&'f u32>,
+}
+
+impl<'f, IIB, Array> InstBuilderBase<'f> for InsertReuseBuilder<'f, IIB, Array>
+where
+    IIB: InstInserterBase<'f>,
+    Array: AsRef<[Option<Value>]>,
+{
+    fn data_flow_graph(&self) -> &DataFlowGraph {
+        self.inserter.data_flow_graph()
+    }
+
+    fn data_flow_graph_mut(&mut self) -> &mut DataFlowGraph {
+        self.inserter.data_flow_graph_mut()
+    }
+
+    fn build(mut self, data: InstructionData, ctrl_typevar: Type) -> (Inst, &'f mut DataFlowGraph) {
+        let inst;
+        {
+            let dfg = self.inserter.data_flow_graph_mut();
+            inst = dfg.make_inst(data);
+            // Make an `Iterator<Item = Option<Value>>`.
+            let ru = self.reuse.as_ref().iter().cloned();
+            dfg.make_inst_results_reusing(inst, ctrl_typevar, ru);
+        }
+        (inst, self.inserter.insert_built_inst(inst, ctrl_typevar))
+    }
+}
+
+/// Instruction builder that replaces an existing instruction.
+///
+/// The inserted instruction will have the same `Inst` number as the old one.
+///
+/// If the old instruction still has result values attached, it is assumed that the new instruction
+/// produces the same number and types of results. The old result values are preserved. If the
+/// replacement instruction format does not support multiple results, the builder panics. It is a
+/// bug to leave result values dangling.
+pub struct ReplaceBuilder<'f> {
+    dfg: &'f mut DataFlowGraph,
+    inst: Inst,
+}
+
+impl<'f> ReplaceBuilder<'f> {
+    /// Create a `ReplaceBuilder` that will overwrite `inst`.
+    pub fn new(dfg: &'f mut DataFlowGraph, inst: Inst) -> Self {
+        Self { dfg, inst }
+    }
+}
+
+impl<'f> InstBuilderBase<'f> for ReplaceBuilder<'f> {
+    fn data_flow_graph(&self) -> &DataFlowGraph {
+        self.dfg
+    }
+
+    fn data_flow_graph_mut(&mut self) -> &mut DataFlowGraph {
+        self.dfg
+    }
+
+    fn build(self, data: InstructionData, ctrl_typevar: Type) -> (Inst, &'f mut DataFlowGraph) {
+        // Splat the new instruction on top of the old one.
+        self.dfg[self.inst] = data;
+
+        if !self.dfg.has_results(self.inst) {
+            // The old result values were either detached or non-existent.
+            // Construct new ones.
+            self.dfg.make_inst_results(self.inst, ctrl_typevar);
+        }
+
+        (self.inst, self.dfg)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::cursor::{Cursor, FuncCursor};
+    use crate::ir::condcodes::*;
+    use crate::ir::types::*;
+    use crate::ir::{Function, InstBuilder, ValueDef};
+
+    #[test]
+    fn types() {
+        let mut func = Function::new();
+        let ebb0 = func.dfg.make_ebb();
+        let arg0 = func.dfg.append_ebb_param(ebb0, I32);
+        let mut pos = FuncCursor::new(&mut func);
+        pos.insert_ebb(ebb0);
+
+        // Explicit types.
+        let v0 = pos.ins().iconst(I32, 3);
+        assert_eq!(pos.func.dfg.value_type(v0), I32);
+
+        // Inferred from inputs.
+        let v1 = pos.ins().iadd(arg0, v0);
+        assert_eq!(pos.func.dfg.value_type(v1), I32);
+
+        // Formula.
+        let cmp = pos.ins().icmp(IntCC::Equal, arg0, v0);
+        assert_eq!(pos.func.dfg.value_type(cmp), B1);
+    }
+
+    #[test]
+    fn reuse_results() {
+        let mut func = Function::new();
+        let ebb0 = func.dfg.make_ebb();
+        let arg0 = func.dfg.append_ebb_param(ebb0, I32);
+        let mut pos = FuncCursor::new(&mut func);
+        pos.insert_ebb(ebb0);
+
+        let v0 = pos.ins().iadd_imm(arg0, 17);
+        assert_eq!(pos.func.dfg.value_type(v0), I32);
+        let iadd = pos.prev_inst().unwrap();
+        assert_eq!(pos.func.dfg.value_def(v0), ValueDef::Result(iadd, 0));
+
+        // Detach v0 and reuse it for a different instruction.
+        pos.func.dfg.clear_results(iadd);
+        let v0b = pos.ins().with_result(v0).iconst(I32, 3);
+        assert_eq!(v0, v0b);
+        assert_eq!(pos.current_inst(), Some(iadd));
+        let iconst = pos.prev_inst().unwrap();
+        assert!(iadd != iconst);
+        assert_eq!(pos.func.dfg.value_def(v0), ValueDef::Result(iconst, 0));
+    }
+}
--- a/cranelift/codegen/src/ir/condcodes.rs
+++ b/cranelift/codegen/src/ir/condcodes.rs
@@ -0,0 +1,358 @@
+//! Condition codes for the Cranelift code generator.
+//!
+//! A condition code here is an enumerated type that determined how to compare two numbers. There
+//! are different rules for comparing integers and floating point numbers, so they use different
+//! condition codes.
+
+use core::fmt::{self, Display, Formatter};
+use core::str::FromStr;
+
+/// Common traits of condition codes.
+pub trait CondCode: Copy {
+    /// Get the inverse condition code of `self`.
+    ///
+    /// The inverse condition code produces the opposite result for all comparisons.
+    /// That is, `cmp CC, x, y` is true if and only if `cmp CC.inverse(), x, y` is false.
+    #[must_use]
+    fn inverse(self) -> Self;
+
+    /// Get the reversed condition code for `self`.
+    ///
+    /// The reversed condition code produces the same result as swapping `x` and `y` in the
+    /// comparison. That is, `cmp CC, x, y` is the same as `cmp CC.reverse(), y, x`.
+    #[must_use]
+    fn reverse(self) -> Self;
+}
+
+/// Condition code for comparing integers.
+///
+/// This condition code is used by the `icmp` instruction to compare integer values. There are
+/// separate codes for comparing the integers as signed or unsigned numbers where it makes a
+/// difference.
+#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
+pub enum IntCC {
+    /// `==`.
+    Equal,
+    /// `!=`.
+    NotEqual,
+    /// Signed `<`.
+    SignedLessThan,
+    /// Signed `>=`.
+    SignedGreaterThanOrEqual,
+    /// Signed `>`.
+    SignedGreaterThan,
+    /// Signed `<=`.
+    SignedLessThanOrEqual,
+    /// Unsigned `<`.
+    UnsignedLessThan,
+    /// Unsigned `>=`.
+    UnsignedGreaterThanOrEqual,
+    /// Unsigned `>`.
+    UnsignedGreaterThan,
+    /// Unsigned `<=`.
+    UnsignedLessThanOrEqual,
+}
+
+impl CondCode for IntCC {
+    fn inverse(self) -> Self {
+        use self::IntCC::*;
+        match self {
+            Equal => NotEqual,
+            NotEqual => Equal,
+            SignedLessThan => SignedGreaterThanOrEqual,
+            SignedGreaterThanOrEqual => SignedLessThan,
+            SignedGreaterThan => SignedLessThanOrEqual,
+            SignedLessThanOrEqual => SignedGreaterThan,
+            UnsignedLessThan => UnsignedGreaterThanOrEqual,
+            UnsignedGreaterThanOrEqual => UnsignedLessThan,
+            UnsignedGreaterThan => UnsignedLessThanOrEqual,
+            UnsignedLessThanOrEqual => UnsignedGreaterThan,
+        }
+    }
+
+    fn reverse(self) -> Self {
+        use self::IntCC::*;
+        match self {
+            Equal => Equal,
+            NotEqual => NotEqual,
+            SignedGreaterThan => SignedLessThan,
+            SignedGreaterThanOrEqual => SignedLessThanOrEqual,
+            SignedLessThan => SignedGreaterThan,
+            SignedLessThanOrEqual => SignedGreaterThanOrEqual,
+            UnsignedGreaterThan => UnsignedLessThan,
+            UnsignedGreaterThanOrEqual => UnsignedLessThanOrEqual,
+            UnsignedLessThan => UnsignedGreaterThan,
+            UnsignedLessThanOrEqual => UnsignedGreaterThanOrEqual,
+        }
+    }
+}
+
+impl Display for IntCC {
+    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+        use self::IntCC::*;
+        f.write_str(match *self {
+            Equal => "eq",
+            NotEqual => "ne",
+            SignedGreaterThan => "sgt",
+            SignedGreaterThanOrEqual => "sge",
+            SignedLessThan => "slt",
+            SignedLessThanOrEqual => "sle",
+            UnsignedGreaterThan => "ugt",
+            UnsignedGreaterThanOrEqual => "uge",
+            UnsignedLessThan => "ult",
+            UnsignedLessThanOrEqual => "ule",
+        })
+    }
+}
+
+impl FromStr for IntCC {
+    type Err = ();
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        use self::IntCC::*;
+        match s {
+            "eq" => Ok(Equal),
+            "ne" => Ok(NotEqual),
+            "sge" => Ok(SignedGreaterThanOrEqual),
+            "sgt" => Ok(SignedGreaterThan),
+            "sle" => Ok(SignedLessThanOrEqual),
+            "slt" => Ok(SignedLessThan),
+            "uge" => Ok(UnsignedGreaterThanOrEqual),
+            "ugt" => Ok(UnsignedGreaterThan),
+            "ule" => Ok(UnsignedLessThanOrEqual),
+            "ult" => Ok(UnsignedLessThan),
+            _ => Err(()),
+        }
+    }
+}
+
+/// Condition code for comparing floating point numbers.
+///
+/// This condition code is used by the `fcmp` instruction to compare floating point values. Two
+/// IEEE floating point values relate in exactly one of four ways:
+///
+/// 1. `UN` - unordered when either value is NaN.
+/// 2. `EQ` - equal numerical value.
+/// 3. `LT` - `x` is less than `y`.
+/// 4. `GT` - `x` is greater than `y`.
+///
+/// Note that `0.0` and `-0.0` relate as `EQ` because they both represent the number 0.
+///
+/// The condition codes described here are used to produce a single boolean value from the
+/// comparison. The 14 condition codes here cover every possible combination of the relation above
+/// except the impossible `!UN & !EQ & !LT & !GT` and the always true `UN | EQ | LT | GT`.
+#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
+pub enum FloatCC {
+    /// EQ | LT | GT
+    Ordered,
+    /// UN
+    Unordered,
+
+    /// EQ
+    Equal,
+    /// The C '!=' operator is the inverse of '==': `NotEqual`.
+    /// UN | LT | GT
+    NotEqual,
+    /// LT | GT
+    OrderedNotEqual,
+    /// UN | EQ
+    UnorderedOrEqual,
+
+    /// LT
+    LessThan,
+    /// LT | EQ
+    LessThanOrEqual,
+    /// GT
+    GreaterThan,
+    /// GT | EQ
+    GreaterThanOrEqual,
+
+    /// UN | LT
+    UnorderedOrLessThan,
+    /// UN | LT | EQ
+    UnorderedOrLessThanOrEqual,
+    /// UN | GT
+    UnorderedOrGreaterThan,
+    /// UN | GT | EQ
+    UnorderedOrGreaterThanOrEqual,
+}
+
+impl CondCode for FloatCC {
+    fn inverse(self) -> Self {
+        use self::FloatCC::*;
+        match self {
+            Ordered => Unordered,
+            Unordered => Ordered,
+            Equal => NotEqual,
+            NotEqual => Equal,
+            OrderedNotEqual => UnorderedOrEqual,
+            UnorderedOrEqual => OrderedNotEqual,
+            LessThan => UnorderedOrGreaterThanOrEqual,
+            LessThanOrEqual => UnorderedOrGreaterThan,
+            GreaterThan => UnorderedOrLessThanOrEqual,
+            GreaterThanOrEqual => UnorderedOrLessThan,
+            UnorderedOrLessThan => GreaterThanOrEqual,
+            UnorderedOrLessThanOrEqual => GreaterThan,
+            UnorderedOrGreaterThan => LessThanOrEqual,
+            UnorderedOrGreaterThanOrEqual => LessThan,
+        }
+    }
+    fn reverse(self) -> Self {
+        use self::FloatCC::*;
+        match self {
+            Ordered => Ordered,
+            Unordered => Unordered,
+            Equal => Equal,
+            NotEqual => NotEqual,
+            OrderedNotEqual => OrderedNotEqual,
+            UnorderedOrEqual => UnorderedOrEqual,
+            LessThan => GreaterThan,
+            LessThanOrEqual => GreaterThanOrEqual,
+            GreaterThan => LessThan,
+            GreaterThanOrEqual => LessThanOrEqual,
+            UnorderedOrLessThan => UnorderedOrGreaterThan,
+            UnorderedOrLessThanOrEqual => UnorderedOrGreaterThanOrEqual,
+            UnorderedOrGreaterThan => UnorderedOrLessThan,
+            UnorderedOrGreaterThanOrEqual => UnorderedOrLessThanOrEqual,
+        }
+    }
+}
+
+impl Display for FloatCC {
+    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+        use self::FloatCC::*;
+        f.write_str(match *self {
+            Ordered => "ord",
+            Unordered => "uno",
+            Equal => "eq",
+            NotEqual => "ne",
+            OrderedNotEqual => "one",
+            UnorderedOrEqual => "ueq",
+            LessThan => "lt",
+            LessThanOrEqual => "le",
+            GreaterThan => "gt",
+            GreaterThanOrEqual => "ge",
+            UnorderedOrLessThan => "ult",
+            UnorderedOrLessThanOrEqual => "ule",
+            UnorderedOrGreaterThan => "ugt",
+            UnorderedOrGreaterThanOrEqual => "uge",
+        })
+    }
+}
+
+impl FromStr for FloatCC {
+    type Err = ();
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        use self::FloatCC::*;
+        match s {
+            "ord" => Ok(Ordered),
+            "uno" => Ok(Unordered),
+            "eq" => Ok(Equal),
+            "ne" => Ok(NotEqual),
+            "one" => Ok(OrderedNotEqual),
+            "ueq" => Ok(UnorderedOrEqual),
+            "lt" => Ok(LessThan),
+            "le" => Ok(LessThanOrEqual),
+            "gt" => Ok(GreaterThan),
+            "ge" => Ok(GreaterThanOrEqual),
+            "ult" => Ok(UnorderedOrLessThan),
+            "ule" => Ok(UnorderedOrLessThanOrEqual),
+            "ugt" => Ok(UnorderedOrGreaterThan),
+            "uge" => Ok(UnorderedOrGreaterThanOrEqual),
+            _ => Err(()),
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::string::ToString;
+
+    static INT_ALL: [IntCC; 10] = [
+        IntCC::Equal,
+        IntCC::NotEqual,
+        IntCC::SignedLessThan,
+        IntCC::SignedGreaterThanOrEqual,
+        IntCC::SignedGreaterThan,
+        IntCC::SignedLessThanOrEqual,
+        IntCC::UnsignedLessThan,
+        IntCC::UnsignedGreaterThanOrEqual,
+        IntCC::UnsignedGreaterThan,
+        IntCC::UnsignedLessThanOrEqual,
+    ];
+
+    #[test]
+    fn int_inverse() {
+        for r in &INT_ALL {
+            let cc = *r;
+            let inv = cc.inverse();
+            assert!(cc != inv);
+            assert_eq!(inv.inverse(), cc);
+        }
+    }
+
+    #[test]
+    fn int_reverse() {
+        for r in &INT_ALL {
+            let cc = *r;
+            let rev = cc.reverse();
+            assert_eq!(rev.reverse(), cc);
+        }
+    }
+
+    #[test]
+    fn int_display() {
+        for r in &INT_ALL {
+            let cc = *r;
+            assert_eq!(cc.to_string().parse(), Ok(cc));
+        }
+        assert_eq!("bogus".parse::<IntCC>(), Err(()));
+    }
+
+    static FLOAT_ALL: [FloatCC; 14] = [
+        FloatCC::Ordered,
+        FloatCC::Unordered,
+        FloatCC::Equal,
+        FloatCC::NotEqual,
+        FloatCC::OrderedNotEqual,
+        FloatCC::UnorderedOrEqual,
+        FloatCC::LessThan,
+        FloatCC::LessThanOrEqual,
+        FloatCC::GreaterThan,
+        FloatCC::GreaterThanOrEqual,
+        FloatCC::UnorderedOrLessThan,
+        FloatCC::UnorderedOrLessThanOrEqual,
+        FloatCC::UnorderedOrGreaterThan,
+        FloatCC::UnorderedOrGreaterThanOrEqual,
+    ];
+
+    #[test]
+    fn float_inverse() {
+        for r in &FLOAT_ALL {
+            let cc = *r;
+            let inv = cc.inverse();
+            assert!(cc != inv);
+            assert_eq!(inv.inverse(), cc);
+        }
+    }
+
+    #[test]
+    fn float_reverse() {
+        for r in &FLOAT_ALL {
+            let cc = *r;
+            let rev = cc.reverse();
+            assert_eq!(rev.reverse(), cc);
+        }
+    }
+
+    #[test]
+    fn float_display() {
+        for r in &FLOAT_ALL {
+            let cc = *r;
+            assert_eq!(cc.to_string().parse(), Ok(cc));
+        }
+        assert_eq!("bogus".parse::<FloatCC>(), Err(()));
+    }
+}
--- a/cranelift/codegen/src/ir/dfg.rs
+++ b/cranelift/codegen/src/ir/dfg.rs
--- a/cranelift/codegen/src/ir/entities.rs
+++ b/cranelift/codegen/src/ir/entities.rs
@@ -0,0 +1,330 @@
+//! Cranelift IR entity references.
+//!
+//! Instructions in Cranelift IR need to reference other entities in the function. This can be other
+//! parts of the function like extended basic blocks or stack slots, or it can be external entities
+//! that are declared in the function preamble in the text format.
+//!
+//! These entity references in instruction operands are not implemented as Rust references both
+//! because Rust's ownership and mutability rules make it difficult, and because 64-bit pointers
+//! take up a lot of space, and we want a compact in-memory representation. Instead, entity
+//! references are structs wrapping a `u32` index into a table in the `Function` main data
+//! structure. There is a separate index type for each entity type, so we don't lose type safety.
+//!
+//! The `entities` module defines public types for the entity references along with constants
+//! representing an invalid reference. We prefer to use `Option<EntityRef>` whenever possible, but
+//! unfortunately that type is twice as large as the 32-bit index type on its own. Thus, compact
+//! data structures use the `PackedOption<EntityRef>` representation, while function arguments and
+//! return values prefer the more Rust-like `Option<EntityRef>` variant.
+//!
+//! The entity references all implement the `Display` trait in a way that matches the textual IR
+//! format.
+
+use crate::entity::entity_impl;
+use core::fmt;
+use core::u32;
+
+/// An opaque reference to an extended basic block in a function.
+#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
+pub struct Ebb(u32);
+entity_impl!(Ebb, "ebb");
+
+impl Ebb {
+    /// Create a new EBB reference from its number. This corresponds to the `ebbNN` representation.
+    ///
+    /// This method is for use by the parser.
+    pub fn with_number(n: u32) -> Option<Self> {
+        if n < u32::MAX {
+            Some(Ebb(n))
+        } else {
+            None
+        }
+    }
+}
+
+/// An opaque reference to an SSA value.
+#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
+pub struct Value(u32);
+entity_impl!(Value, "v");
+
+impl Value {
+    /// Create a value from its number representation.
+    /// This is the number in the `vNN` notation.
+    ///
+    /// This method is for use by the parser.
+    pub fn with_number(n: u32) -> Option<Self> {
+        if n < u32::MAX / 2 {
+            Some(Value(n))
+        } else {
+            None
+        }
+    }
+}
+
+/// An opaque reference to an instruction in a function.
+#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
+pub struct Inst(u32);
+entity_impl!(Inst, "inst");
+
+/// An opaque reference to a stack slot.
+#[derive(Copy, Clone, PartialEq, Eq, Hash)]
+pub struct StackSlot(u32);
+entity_impl!(StackSlot, "ss");
+
+impl StackSlot {
+    /// Create a new stack slot reference from its number.
+    ///
+    /// This method is for use by the parser.
+    pub fn with_number(n: u32) -> Option<Self> {
+        if n < u32::MAX {
+            Some(StackSlot(n))
+        } else {
+            None
+        }
+    }
+}
+
+/// An opaque reference to a global value.
+#[derive(Copy, Clone, PartialEq, Eq, Hash)]
+pub struct GlobalValue(u32);
+entity_impl!(GlobalValue, "gv");
+
+impl GlobalValue {
+    /// Create a new global value reference from its number.
+    ///
+    /// This method is for use by the parser.
+    pub fn with_number(n: u32) -> Option<Self> {
+        if n < u32::MAX {
+            Some(GlobalValue(n))
+        } else {
+            None
+        }
+    }
+}
+
+/// An opaque reference to a jump table.
+#[derive(Copy, Clone, PartialEq, Eq, Hash)]
+pub struct JumpTable(u32);
+entity_impl!(JumpTable, "jt");
+
+impl JumpTable {
+    /// Create a new jump table reference from its number.
+    ///
+    /// This method is for use by the parser.
+    pub fn with_number(n: u32) -> Option<Self> {
+        if n < u32::MAX {
+            Some(JumpTable(n))
+        } else {
+            None
+        }
+    }
+}
+
+/// A reference to an external function.
+#[derive(Copy, Clone, PartialEq, Eq, Hash)]
+pub struct FuncRef(u32);
+entity_impl!(FuncRef, "fn");
+
+impl FuncRef {
+    /// Create a new external function reference from its number.
+    ///
+    /// This method is for use by the parser.
+    pub fn with_number(n: u32) -> Option<Self> {
+        if n < u32::MAX {
+            Some(FuncRef(n))
+        } else {
+            None
+        }
+    }
+}
+
+/// A reference to a function signature.
+#[derive(Copy, Clone, PartialEq, Eq, Hash)]
+pub struct SigRef(u32);
+entity_impl!(SigRef, "sig");
+
+impl SigRef {
+    /// Create a new function signature reference from its number.
+    ///
+    /// This method is for use by the parser.
+    pub fn with_number(n: u32) -> Option<Self> {
+        if n < u32::MAX {
+            Some(SigRef(n))
+        } else {
+            None
+        }
+    }
+}
+
+/// A reference to a heap.
+#[derive(Copy, Clone, PartialEq, Eq, Hash)]
+pub struct Heap(u32);
+entity_impl!(Heap, "heap");
+
+impl Heap {
+    /// Create a new heap reference from its number.
+    ///
+    /// This method is for use by the parser.
+    pub fn with_number(n: u32) -> Option<Self> {
+        if n < u32::MAX {
+            Some(Heap(n))
+        } else {
+            None
+        }
+    }
+}
+
+/// A reference to a table.
+#[derive(Copy, Clone, PartialEq, Eq, Hash)]
+pub struct Table(u32);
+entity_impl!(Table, "table");
+
+impl Table {
+    /// Create a new table reference from its number.
+    ///
+    /// This method is for use by the parser.
+    pub fn with_number(n: u32) -> Option<Self> {
+        if n < u32::MAX {
+            Some(Table(n))
+        } else {
+            None
+        }
+    }
+}
+
+/// A reference to any of the entities defined in this module.
+#[derive(Copy, Clone, PartialEq, Eq, Hash)]
+pub enum AnyEntity {
+    /// The whole function.
+    Function,
+    /// An extended basic block.
+    Ebb(Ebb),
+    /// An instruction.
+    Inst(Inst),
+    /// An SSA value.
+    Value(Value),
+    /// A stack slot.
+    StackSlot(StackSlot),
+    /// A Global value.
+    GlobalValue(GlobalValue),
+    /// A jump table.
+    JumpTable(JumpTable),
+    /// An external function.
+    FuncRef(FuncRef),
+    /// A function call signature.
+    SigRef(SigRef),
+    /// A heap.
+    Heap(Heap),
+    /// A table.
+    Table(Table),
+}
+
+impl fmt::Display for AnyEntity {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match *self {
+            AnyEntity::Function => write!(f, "function"),
+            AnyEntity::Ebb(r) => r.fmt(f),
+            AnyEntity::Inst(r) => r.fmt(f),
+            AnyEntity::Value(r) => r.fmt(f),
+            AnyEntity::StackSlot(r) => r.fmt(f),
+            AnyEntity::GlobalValue(r) => r.fmt(f),
+            AnyEntity::JumpTable(r) => r.fmt(f),
+            AnyEntity::FuncRef(r) => r.fmt(f),
+            AnyEntity::SigRef(r) => r.fmt(f),
+            AnyEntity::Heap(r) => r.fmt(f),
+            AnyEntity::Table(r) => r.fmt(f),
+        }
+    }
+}
+
+impl fmt::Debug for AnyEntity {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        (self as &fmt::Display).fmt(f)
+    }
+}
+
+impl From<Ebb> for AnyEntity {
+    fn from(r: Ebb) -> Self {
+        AnyEntity::Ebb(r)
+    }
+}
+
+impl From<Inst> for AnyEntity {
+    fn from(r: Inst) -> Self {
+        AnyEntity::Inst(r)
+    }
+}
+
+impl From<Value> for AnyEntity {
+    fn from(r: Value) -> Self {
+        AnyEntity::Value(r)
+    }
+}
+
+impl From<StackSlot> for AnyEntity {
+    fn from(r: StackSlot) -> Self {
+        AnyEntity::StackSlot(r)
+    }
+}
+
+impl From<GlobalValue> for AnyEntity {
+    fn from(r: GlobalValue) -> Self {
+        AnyEntity::GlobalValue(r)
+    }
+}
+
+impl From<JumpTable> for AnyEntity {
+    fn from(r: JumpTable) -> Self {
+        AnyEntity::JumpTable(r)
+    }
+}
+
+impl From<FuncRef> for AnyEntity {
+    fn from(r: FuncRef) -> Self {
+        AnyEntity::FuncRef(r)
+    }
+}
+
+impl From<SigRef> for AnyEntity {
+    fn from(r: SigRef) -> Self {
+        AnyEntity::SigRef(r)
+    }
+}
+
+impl From<Heap> for AnyEntity {
+    fn from(r: Heap) -> Self {
+        AnyEntity::Heap(r)
+    }
+}
+
+impl From<Table> for AnyEntity {
+    fn from(r: Table) -> Self {
+        AnyEntity::Table(r)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use core::u32;
+    use std::string::ToString;
+
+    #[test]
+    fn value_with_number() {
+        assert_eq!(Value::with_number(0).unwrap().to_string(), "v0");
+        assert_eq!(Value::with_number(1).unwrap().to_string(), "v1");
+
+        assert_eq!(Value::with_number(u32::MAX / 2), None);
+        assert!(Value::with_number(u32::MAX / 2 - 1).is_some());
+    }
+
+    #[test]
+    fn memory() {
+        use crate::packed_option::PackedOption;
+        use core::mem;
+        // This is the whole point of `PackedOption`.
+        assert_eq!(
+            mem::size_of::<Value>(),
+            mem::size_of::<PackedOption<Value>>()
+        );
+    }
+}
--- a/cranelift/codegen/src/ir/extfunc.rs
+++ b/cranelift/codegen/src/ir/extfunc.rs
@@ -0,0 +1,405 @@
+//! External function calls.
+//!
+//! To a Cranelift function, all functions are "external". Directly called functions must be
+//! declared in the preamble, and all function calls must have a signature.
+//!
+//! This module declares the data types used to represent external functions and call signatures.
+
+use crate::ir::{ArgumentLoc, ExternalName, SigRef, Type};
+use crate::isa::{CallConv, RegInfo, RegUnit};
+use core::fmt;
+use core::str::FromStr;
+use std::vec::Vec;
+
+/// Function signature.
+///
+/// The function signature describes the types of formal parameters and return values along with
+/// other details that are needed to call a function correctly.
+///
+/// A signature can optionally include ISA-specific ABI information which specifies exactly how
+/// arguments and return values are passed.
+#[derive(Clone, Debug, PartialEq, Eq, Hash)]
+pub struct Signature {
+    /// The arguments passed to the function.
+    pub params: Vec<AbiParam>,
+    /// Values returned from the function.
+    pub returns: Vec<AbiParam>,
+
+    /// Calling convention.
+    pub call_conv: CallConv,
+}
+
+impl Signature {
+    /// Create a new blank signature.
+    pub fn new(call_conv: CallConv) -> Self {
+        Self {
+            params: Vec::new(),
+            returns: Vec::new(),
+            call_conv,
+        }
+    }
+
+    /// Clear the signature so it is identical to a fresh one returned by `new()`.
+    pub fn clear(&mut self, call_conv: CallConv) {
+        self.params.clear();
+        self.returns.clear();
+        self.call_conv = call_conv;
+    }
+
+    /// Return an object that can display `self` with correct register names.
+    pub fn display<'a, R: Into<Option<&'a RegInfo>>>(&'a self, regs: R) -> DisplaySignature<'a> {
+        DisplaySignature(self, regs.into())
+    }
+
+    /// Find the index of a presumed unique special-purpose parameter.
+    pub fn special_param_index(&self, purpose: ArgumentPurpose) -> Option<usize> {
+        self.params.iter().rposition(|arg| arg.purpose == purpose)
+    }
+}
+
+/// Wrapper type capable of displaying a `Signature` with correct register names.
+pub struct DisplaySignature<'a>(&'a Signature, Option<&'a RegInfo>);
+
+fn write_list(f: &mut fmt::Formatter, args: &[AbiParam], regs: Option<&RegInfo>) -> fmt::Result {
+    match args.split_first() {
+        None => {}
+        Some((first, rest)) => {
+            write!(f, "{}", first.display(regs))?;
+            for arg in rest {
+                write!(f, ", {}", arg.display(regs))?;
+            }
+        }
+    }
+    Ok(())
+}
+
+impl<'a> fmt::Display for DisplaySignature<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "(")?;
+        write_list(f, &self.0.params, self.1)?;
+        write!(f, ")")?;
+        if !self.0.returns.is_empty() {
+            write!(f, " -> ")?;
+            write_list(f, &self.0.returns, self.1)?;
+        }
+        write!(f, " {}", self.0.call_conv)
+    }
+}
+
+impl fmt::Display for Signature {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        self.display(None).fmt(f)
+    }
+}
+
+/// Function parameter or return value descriptor.
+///
+/// This describes the value type being passed to or from a function along with flags that affect
+/// how the argument is passed.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub struct AbiParam {
+    /// Type of the argument value.
+    pub value_type: Type,
+    /// Special purpose of argument, or `Normal`.
+    pub purpose: ArgumentPurpose,
+    /// Method for extending argument to a full register.
+    pub extension: ArgumentExtension,
+
+    /// ABI-specific location of this argument, or `Unassigned` for arguments that have not yet
+    /// been legalized.
+    pub location: ArgumentLoc,
+}
+
+impl AbiParam {
+    /// Create a parameter with default flags.
+    pub fn new(vt: Type) -> Self {
+        Self {
+            value_type: vt,
+            extension: ArgumentExtension::None,
+            purpose: ArgumentPurpose::Normal,
+            location: Default::default(),
+        }
+    }
+
+    /// Create a special-purpose parameter that is not (yet) bound to a specific register.
+    pub fn special(vt: Type, purpose: ArgumentPurpose) -> Self {
+        Self {
+            value_type: vt,
+            extension: ArgumentExtension::None,
+            purpose,
+            location: Default::default(),
+        }
+    }
+
+    /// Create a parameter for a special-purpose register.
+    pub fn special_reg(vt: Type, purpose: ArgumentPurpose, regunit: RegUnit) -> Self {
+        Self {
+            value_type: vt,
+            extension: ArgumentExtension::None,
+            purpose,
+            location: ArgumentLoc::Reg(regunit),
+        }
+    }
+
+    /// Convert `self` to a parameter with the `uext` flag set.
+    pub fn uext(self) -> Self {
+        debug_assert!(self.value_type.is_int(), "uext on {} arg", self.value_type);
+        Self {
+            extension: ArgumentExtension::Uext,
+            ..self
+        }
+    }
+
+    /// Convert `self` to a parameter type with the `sext` flag set.
+    pub fn sext(self) -> Self {
+        debug_assert!(self.value_type.is_int(), "sext on {} arg", self.value_type);
+        Self {
+            extension: ArgumentExtension::Sext,
+            ..self
+        }
+    }
+
+    /// Return an object that can display `self` with correct register names.
+    pub fn display<'a, R: Into<Option<&'a RegInfo>>>(&'a self, regs: R) -> DisplayAbiParam<'a> {
+        DisplayAbiParam(self, regs.into())
+    }
+}
+
+/// Wrapper type capable of displaying a `AbiParam` with correct register names.
+pub struct DisplayAbiParam<'a>(&'a AbiParam, Option<&'a RegInfo>);
+
+impl<'a> fmt::Display for DisplayAbiParam<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{}", self.0.value_type)?;
+        match self.0.extension {
+            ArgumentExtension::None => {}
+            ArgumentExtension::Uext => write!(f, " uext")?,
+            ArgumentExtension::Sext => write!(f, " sext")?,
+        }
+        if self.0.purpose != ArgumentPurpose::Normal {
+            write!(f, " {}", self.0.purpose)?;
+        }
+
+        if self.0.location.is_assigned() {
+            write!(f, " [{}]", self.0.location.display(self.1))?;
+        }
+
+        Ok(())
+    }
+}
+
+impl fmt::Display for AbiParam {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        self.display(None).fmt(f)
+    }
+}
+
+/// Function argument extension options.
+///
+/// On some architectures, small integer function arguments are extended to the width of a
+/// general-purpose register.
+#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]
+pub enum ArgumentExtension {
+    /// No extension, high bits are indeterminate.
+    None,
+    /// Unsigned extension: high bits in register are 0.
+    Uext,
+    /// Signed extension: high bits in register replicate sign bit.
+    Sext,
+}
+
+/// The special purpose of a function argument.
+///
+/// Function arguments and return values are used to pass user program values between functions,
+/// but they are also used to represent special registers with significance to the ABI such as
+/// frame pointers and callee-saved registers.
+///
+/// The argument purpose is used to indicate any special meaning of an argument or return value.
+#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]
+pub enum ArgumentPurpose {
+    /// A normal user program value passed to or from a function.
+    Normal,
+
+    /// Struct return pointer.
+    ///
+    /// When a function needs to return more data than will fit in registers, the caller passes a
+    /// pointer to a memory location where the return value can be written. In some ABIs, this
+    /// struct return pointer is passed in a specific register.
+    ///
+    /// This argument kind can also appear as a return value for ABIs that require a function with
+    /// a `StructReturn` pointer argument to also return that pointer in a register.
+    StructReturn,
+
+    /// The link register.
+    ///
+    /// Most RISC architectures implement calls by saving the return address in a designated
+    /// register rather than pushing it on the stack. This is represented with a `Link` argument.
+    ///
+    /// Similarly, some return instructions expect the return address in a register represented as
+    /// a `Link` return value.
+    Link,
+
+    /// The frame pointer.
+    ///
+    /// This indicates the frame pointer register which has a special meaning in some ABIs.
+    ///
+    /// The frame pointer appears as an argument and as a return value since it is a callee-saved
+    /// register.
+    FramePointer,
+
+    /// A callee-saved register.
+    ///
+    /// Some calling conventions have registers that must be saved by the callee. These registers
+    /// are represented as `CalleeSaved` arguments and return values.
+    CalleeSaved,
+
+    /// A VM context pointer.
+    ///
+    /// This is a pointer to a context struct containing details about the current sandbox. It is
+    /// used as a base pointer for `vmctx` global values.
+    VMContext,
+
+    /// A signature identifier.
+    ///
+    /// This is a special-purpose argument used to identify the calling convention expected by the
+    /// caller in an indirect call. The callee can verify that the expected signature ID matches.
+    SignatureId,
+
+    /// A stack limit pointer.
+    ///
+    /// This is a pointer to a stack limit. It is used to check the current stack pointer
+    /// against. Can only appear once in a signature.
+    StackLimit,
+}
+
+/// Text format names of the `ArgumentPurpose` variants.
+static PURPOSE_NAMES: [&str; 8] = [
+    "normal",
+    "sret",
+    "link",
+    "fp",
+    "csr",
+    "vmctx",
+    "sigid",
+    "stack_limit",
+];
+
+impl fmt::Display for ArgumentPurpose {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        f.write_str(PURPOSE_NAMES[*self as usize])
+    }
+}
+
+impl FromStr for ArgumentPurpose {
+    type Err = ();
+    fn from_str(s: &str) -> Result<Self, ()> {
+        match s {
+            "normal" => Ok(ArgumentPurpose::Normal),
+            "sret" => Ok(ArgumentPurpose::StructReturn),
+            "link" => Ok(ArgumentPurpose::Link),
+            "fp" => Ok(ArgumentPurpose::FramePointer),
+            "csr" => Ok(ArgumentPurpose::CalleeSaved),
+            "vmctx" => Ok(ArgumentPurpose::VMContext),
+            "sigid" => Ok(ArgumentPurpose::SignatureId),
+            "stack_limit" => Ok(ArgumentPurpose::StackLimit),
+            _ => Err(()),
+        }
+    }
+}
+
+/// An external function.
+///
+/// Information about a function that can be called directly with a direct `call` instruction.
+#[derive(Clone, Debug)]
+pub struct ExtFuncData {
+    /// Name of the external function.
+    pub name: ExternalName,
+    /// Call signature of function.
+    pub signature: SigRef,
+    /// Will this function be defined nearby, such that it will always be a certain distance away,
+    /// after linking? If so, references to it can avoid going through a GOT or PLT. Note that
+    /// symbols meant to be preemptible cannot be considered colocated.
+    pub colocated: bool,
+}
+
+impl fmt::Display for ExtFuncData {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        if self.colocated {
+            write!(f, "colocated ")?;
+        }
+        write!(f, "{} {}", self.name, self.signature)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::ir::types::{B8, F32, I32};
+    use std::string::ToString;
+
+    #[test]
+    fn argument_type() {
+        let t = AbiParam::new(I32);
+        assert_eq!(t.to_string(), "i32");
+        let mut t = t.uext();
+        assert_eq!(t.to_string(), "i32 uext");
+        assert_eq!(t.sext().to_string(), "i32 sext");
+        t.purpose = ArgumentPurpose::StructReturn;
+        assert_eq!(t.to_string(), "i32 uext sret");
+    }
+
+    #[test]
+    fn argument_purpose() {
+        let all_purpose = [
+            ArgumentPurpose::Normal,
+            ArgumentPurpose::StructReturn,
+            ArgumentPurpose::Link,
+            ArgumentPurpose::FramePointer,
+            ArgumentPurpose::CalleeSaved,
+            ArgumentPurpose::VMContext,
+            ArgumentPurpose::SignatureId,
+            ArgumentPurpose::StackLimit,
+        ];
+        for (&e, &n) in all_purpose.iter().zip(PURPOSE_NAMES.iter()) {
+            assert_eq!(e.to_string(), n);
+            assert_eq!(Ok(e), n.parse());
+        }
+    }
+
+    #[test]
+    fn call_conv() {
+        for &cc in &[
+            CallConv::Fast,
+            CallConv::Cold,
+            CallConv::SystemV,
+            CallConv::WindowsFastcall,
+            CallConv::Baldrdash,
+        ] {
+            assert_eq!(Ok(cc), cc.to_string().parse())
+        }
+    }
+
+    #[test]
+    fn signatures() {
+        let mut sig = Signature::new(CallConv::Baldrdash);
+        assert_eq!(sig.to_string(), "() baldrdash");
+        sig.params.push(AbiParam::new(I32));
+        assert_eq!(sig.to_string(), "(i32) baldrdash");
+        sig.returns.push(AbiParam::new(F32));
+        assert_eq!(sig.to_string(), "(i32) -> f32 baldrdash");
+        sig.params.push(AbiParam::new(I32.by(4).unwrap()));
+        assert_eq!(sig.to_string(), "(i32, i32x4) -> f32 baldrdash");
+        sig.returns.push(AbiParam::new(B8));
+        assert_eq!(sig.to_string(), "(i32, i32x4) -> f32, b8 baldrdash");
+
+        // Order does not matter.
+        sig.params[0].location = ArgumentLoc::Stack(24);
+        sig.params[1].location = ArgumentLoc::Stack(8);
+
+        // Writing ABI-annotated signatures.
+        assert_eq!(
+            sig.to_string(),
+            "(i32 [24], i32x4 [8]) -> f32, b8 baldrdash"
+        );
+    }
+}
--- a/cranelift/codegen/src/ir/extname.rs
+++ b/cranelift/codegen/src/ir/extname.rs
@@ -0,0 +1,163 @@
+//! External names.
+//!
+//! These are identifiers for declaring entities defined outside the current
+//! function. The name of an external declaration doesn't have any meaning to
+//! Cranelift, which compiles functions independently.
+
+use crate::ir::LibCall;
+use core::cmp;
+use core::fmt::{self, Write};
+use core::str::FromStr;
+
+const TESTCASE_NAME_LENGTH: usize = 16;
+
+/// The name of an external is either a reference to a user-defined symbol
+/// table, or a short sequence of ascii bytes so that test cases do not have
+/// to keep track of a symbol table.
+///
+/// External names are primarily used as keys by code using Cranelift to map
+/// from a `cranelift_codegen::ir::FuncRef` or similar to additional associated
+/// data.
+///
+/// External names can also serve as a primitive testing and debugging tool.
+/// In particular, many `.clif` test files use function names to identify
+/// functions.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum ExternalName {
+    /// A name in a user-defined symbol table. Cranelift does not interpret
+    /// these numbers in any way.
+    User {
+        /// Arbitrary.
+        namespace: u32,
+        /// Arbitrary.
+        index: u32,
+    },
+    /// A test case function name of up to 10 ascii characters. This is
+    /// not intended to be used outside test cases.
+    TestCase {
+        /// How many of the bytes in `ascii` are valid?
+        length: u8,
+        /// Ascii bytes of the name.
+        ascii: [u8; TESTCASE_NAME_LENGTH],
+    },
+    /// A well-known runtime library function.
+    LibCall(LibCall),
+}
+
+impl ExternalName {
+    /// Creates a new external name from a sequence of bytes. Caller is expected
+    /// to guarantee bytes are only ascii alphanumeric or `_`.
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// # use cranelift_codegen::ir::ExternalName;
+    /// // Create `ExternalName` from a string.
+    /// let name = ExternalName::testcase("hello");
+    /// assert_eq!(name.to_string(), "%hello");
+    /// ```
+    pub fn testcase<T: AsRef<[u8]>>(v: T) -> Self {
+        let vec = v.as_ref();
+        let len = cmp::min(vec.len(), TESTCASE_NAME_LENGTH);
+        let mut bytes = [0u8; TESTCASE_NAME_LENGTH];
+        bytes[0..len].copy_from_slice(&vec[0..len]);
+
+        ExternalName::TestCase {
+            length: len as u8,
+            ascii: bytes,
+        }
+    }
+
+    /// Create a new external name from user-provided integer indices.
+    ///
+    /// # Examples
+    /// ```rust
+    /// # use cranelift_codegen::ir::ExternalName;
+    /// // Create `ExternalName` from integer indices
+    /// let name = ExternalName::user(123, 456);
+    /// assert_eq!(name.to_string(), "u123:456");
+    /// ```
+    pub fn user(namespace: u32, index: u32) -> Self {
+        ExternalName::User { namespace, index }
+    }
+}
+
+impl Default for ExternalName {
+    fn default() -> Self {
+        Self::user(0, 0)
+    }
+}
+
+impl fmt::Display for ExternalName {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match *self {
+            ExternalName::User { namespace, index } => write!(f, "u{}:{}", namespace, index),
+            ExternalName::TestCase { length, ascii } => {
+                f.write_char('%')?;
+                for byte in ascii.iter().take(length as usize) {
+                    f.write_char(*byte as char)?;
+                }
+                Ok(())
+            }
+            ExternalName::LibCall(lc) => write!(f, "%{}", lc),
+        }
+    }
+}
+
+impl FromStr for ExternalName {
+    type Err = ();
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        // Try to parse as a libcall name, otherwise it's a test case.
+        match s.parse() {
+            Ok(lc) => Ok(ExternalName::LibCall(lc)),
+            Err(_) => Ok(Self::testcase(s.as_bytes())),
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::ExternalName;
+    use crate::ir::LibCall;
+    use core::u32;
+    use std::string::ToString;
+
+    #[test]
+    fn display_testcase() {
+        assert_eq!(ExternalName::testcase("").to_string(), "%");
+        assert_eq!(ExternalName::testcase("x").to_string(), "%x");
+        assert_eq!(ExternalName::testcase("x_1").to_string(), "%x_1");
+        assert_eq!(
+            ExternalName::testcase("longname12345678").to_string(),
+            "%longname12345678"
+        );
+        // Constructor will silently drop bytes beyond the 16th
+        assert_eq!(
+            ExternalName::testcase("longname123456789").to_string(),
+            "%longname12345678"
+        );
+    }
+
+    #[test]
+    fn display_user() {
+        assert_eq!(ExternalName::user(0, 0).to_string(), "u0:0");
+        assert_eq!(ExternalName::user(1, 1).to_string(), "u1:1");
+        assert_eq!(
+            ExternalName::user(u32::MAX, u32::MAX).to_string(),
+            "u4294967295:4294967295"
+        );
+    }
+
+    #[test]
+    fn parsing() {
+        assert_eq!(
+            "FloorF32".parse(),
+            Ok(ExternalName::LibCall(LibCall::FloorF32))
+        );
+        assert_eq!(
+            ExternalName::LibCall(LibCall::FloorF32).to_string(),
+            "%FloorF32"
+        );
+    }
+}
--- a/cranelift/codegen/src/ir/function.rs
+++ b/cranelift/codegen/src/ir/function.rs
@@ -0,0 +1,252 @@
+//! Intermediate representation of a function.
+//!
+//! The `Function` struct defined in this module owns all of its extended basic blocks and
+//! instructions.
+
+use crate::binemit::CodeOffset;
+use crate::entity::{PrimaryMap, SecondaryMap};
+use crate::ir;
+use crate::ir::{DataFlowGraph, ExternalName, Layout, Signature};
+use crate::ir::{
+    Ebb, ExtFuncData, FuncRef, GlobalValue, GlobalValueData, Heap, HeapData, JumpTable,
+    JumpTableData, SigRef, StackSlot, StackSlotData, Table, TableData,
+};
+use crate::ir::{EbbOffsets, InstEncodings, SourceLocs, StackSlots, ValueLocations};
+use crate::ir::{JumpTableOffsets, JumpTables};
+use crate::isa::{CallConv, EncInfo, Encoding, Legalize, TargetIsa};
+use crate::regalloc::RegDiversions;
+use crate::write::write_function;
+use core::fmt;
+
+/// A function.
+///
+/// Functions can be cloned, but it is not a very fast operation.
+/// The clone will have all the same entity numbers as the original.
+#[derive(Clone)]
+pub struct Function {
+    /// Name of this function. Mostly used by `.clif` files.
+    pub name: ExternalName,
+
+    /// Signature of this function.
+    pub signature: Signature,
+
+    /// Stack slots allocated in this function.
+    pub stack_slots: StackSlots,
+
+    /// Global values referenced.
+    pub global_values: PrimaryMap<ir::GlobalValue, ir::GlobalValueData>,
+
+    /// Heaps referenced.
+    pub heaps: PrimaryMap<ir::Heap, ir::HeapData>,
+
+    /// Tables referenced.
+    pub tables: PrimaryMap<ir::Table, ir::TableData>,
+
+    /// Jump tables used in this function.
+    pub jump_tables: JumpTables,
+
+    /// Data flow graph containing the primary definition of all instructions, EBBs and values.
+    pub dfg: DataFlowGraph,
+
+    /// Layout of EBBs and instructions in the function body.
+    pub layout: Layout,
+
+    /// Encoding recipe and bits for the legal instructions.
+    /// Illegal instructions have the `Encoding::default()` value.
+    pub encodings: InstEncodings,
+
+    /// Location assigned to every value.
+    pub locations: ValueLocations,
+
+    /// Code offsets of the EBB headers.
+    ///
+    /// This information is only transiently available after the `binemit::relax_branches` function
+    /// computes it, and it can easily be recomputed by calling that function. It is not included
+    /// in the textual IR format.
+    pub offsets: EbbOffsets,
+
+    /// Code offsets of Jump Table headers.
+    pub jt_offsets: JumpTableOffsets,
+
+    /// Source locations.
+    ///
+    /// Track the original source location for each instruction. The source locations are not
+    /// interpreted by Cranelift, only preserved.
+    pub srclocs: SourceLocs,
+}
+
+impl Function {
+    /// Create a function with the given name and signature.
+    pub fn with_name_signature(name: ExternalName, sig: Signature) -> Self {
+        Self {
+            name,
+            signature: sig,
+            stack_slots: StackSlots::new(),
+            global_values: PrimaryMap::new(),
+            heaps: PrimaryMap::new(),
+            tables: PrimaryMap::new(),
+            jump_tables: PrimaryMap::new(),
+            dfg: DataFlowGraph::new(),
+            layout: Layout::new(),
+            encodings: SecondaryMap::new(),
+            locations: SecondaryMap::new(),
+            offsets: SecondaryMap::new(),
+            jt_offsets: SecondaryMap::new(),
+            srclocs: SecondaryMap::new(),
+        }
+    }
+
+    /// Clear all data structures in this function.
+    pub fn clear(&mut self) {
+        self.signature.clear(CallConv::Fast);
+        self.stack_slots.clear();
+        self.global_values.clear();
+        self.heaps.clear();
+        self.tables.clear();
+        self.jump_tables.clear();
+        self.dfg.clear();
+        self.layout.clear();
+        self.encodings.clear();
+        self.locations.clear();
+        self.offsets.clear();
+        self.srclocs.clear();
+    }
+
+    /// Create a new empty, anonymous function with a Fast calling convention.
+    pub fn new() -> Self {
+        Self::with_name_signature(ExternalName::default(), Signature::new(CallConv::Fast))
+    }
+
+    /// Creates a jump table in the function, to be used by `br_table` instructions.
+    pub fn create_jump_table(&mut self, data: JumpTableData) -> JumpTable {
+        self.jump_tables.push(data)
+    }
+
+    /// Creates a stack slot in the function, to be used by `stack_load`, `stack_store` and
+    /// `stack_addr` instructions.
+    pub fn create_stack_slot(&mut self, data: StackSlotData) -> StackSlot {
+        self.stack_slots.push(data)
+    }
+
+    /// Adds a signature which can later be used to declare an external function import.
+    pub fn import_signature(&mut self, signature: Signature) -> SigRef {
+        self.dfg.signatures.push(signature)
+    }
+
+    /// Declare an external function import.
+    pub fn import_function(&mut self, data: ExtFuncData) -> FuncRef {
+        self.dfg.ext_funcs.push(data)
+    }
+
+    /// Declares a global value accessible to the function.
+    pub fn create_global_value(&mut self, data: GlobalValueData) -> GlobalValue {
+        self.global_values.push(data)
+    }
+
+    /// Declares a heap accessible to the function.
+    pub fn create_heap(&mut self, data: HeapData) -> Heap {
+        self.heaps.push(data)
+    }
+
+    /// Declares a table accessible to the function.
+    pub fn create_table(&mut self, data: TableData) -> Table {
+        self.tables.push(data)
+    }
+
+    /// Return an object that can display this function with correct ISA-specific annotations.
+    pub fn display<'a, I: Into<Option<&'a TargetIsa>>>(&'a self, isa: I) -> DisplayFunction<'a> {
+        DisplayFunction(self, isa.into())
+    }
+
+    /// Find a presumed unique special-purpose function parameter value.
+    ///
+    /// Returns the value of the last `purpose` parameter, or `None` if no such parameter exists.
+    pub fn special_param(&self, purpose: ir::ArgumentPurpose) -> Option<ir::Value> {
+        let entry = self.layout.entry_block().expect("Function is empty");
+        self.signature
+            .special_param_index(purpose)
+            .map(|i| self.dfg.ebb_params(entry)[i])
+    }
+
+    /// Get an iterator over the instructions in `ebb`, including offsets and encoded instruction
+    /// sizes.
+    ///
+    /// The iterator returns `(offset, inst, size)` tuples, where `offset` if the offset in bytes
+    /// from the beginning of the function to the instruction, and `size` is the size of the
+    /// instruction in bytes, or 0 for unencoded instructions.
+    ///
+    /// This function can only be used after the code layout has been computed by the
+    /// `binemit::relax_branches()` function.
+    pub fn inst_offsets<'a>(&'a self, ebb: Ebb, encinfo: &EncInfo) -> InstOffsetIter<'a> {
+        assert!(
+            !self.offsets.is_empty(),
+            "Code layout must be computed first"
+        );
+        InstOffsetIter {
+            encinfo: encinfo.clone(),
+            func: self,
+            divert: RegDiversions::new(),
+            encodings: &self.encodings,
+            offset: self.offsets[ebb],
+            iter: self.layout.ebb_insts(ebb),
+        }
+    }
+
+    /// Wrapper around `encode` which assigns `inst` the resulting encoding.
+    pub fn update_encoding(&mut self, inst: ir::Inst, isa: &TargetIsa) -> Result<(), Legalize> {
+        self.encode(inst, isa).map(|e| self.encodings[inst] = e)
+    }
+
+    /// Wrapper around `TargetIsa::encode` for encoding an existing instruction
+    /// in the `Function`.
+    pub fn encode(&self, inst: ir::Inst, isa: &TargetIsa) -> Result<Encoding, Legalize> {
+        isa.encode(&self, &self.dfg[inst], self.dfg.ctrl_typevar(inst))
+    }
+}
+
+/// Wrapper type capable of displaying a `Function` with correct ISA annotations.
+pub struct DisplayFunction<'a>(&'a Function, Option<&'a TargetIsa>);
+
+impl<'a> fmt::Display for DisplayFunction<'a> {
+    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+        write_function(fmt, self.0, self.1)
+    }
+}
+
+impl fmt::Display for Function {
+    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+        write_function(fmt, self, None)
+    }
+}
+
+impl fmt::Debug for Function {
+    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+        write_function(fmt, self, None)
+    }
+}
+
+/// Iterator returning instruction offsets and sizes: `(offset, inst, size)`.
+pub struct InstOffsetIter<'a> {
+    encinfo: EncInfo,
+    divert: RegDiversions,
+    func: &'a Function,
+    encodings: &'a InstEncodings,
+    offset: CodeOffset,
+    iter: ir::layout::Insts<'a>,
+}
+
+impl<'a> Iterator for InstOffsetIter<'a> {
+    type Item = (CodeOffset, ir::Inst, CodeOffset);
+
+    fn next(&mut self) -> Option<Self::Item> {
+        self.iter.next().map(|inst| {
+            self.divert.apply(&self.func.dfg[inst]);
+            let byte_size =
+                self.encinfo
+                    .byte_size(self.encodings[inst], inst, &self.divert, self.func);
+            let offset = self.offset;
+            self.offset += byte_size;
+            (offset, inst, byte_size)
+        })
+    }
+}
--- a/cranelift/codegen/src/ir/globalvalue.rs
+++ b/cranelift/codegen/src/ir/globalvalue.rs
@@ -0,0 +1,132 @@
+//! Global values.
+
+use crate::ir::immediates::{Imm64, Offset32};
+use crate::ir::{ExternalName, GlobalValue, Type};
+use crate::isa::TargetIsa;
+use core::fmt;
+
+/// Information about a global value declaration.
+#[derive(Clone)]
+pub enum GlobalValueData {
+    /// Value is the address of the VM context struct.
+    VMContext,
+
+    /// Value is pointed to by another global value.
+    ///
+    /// The `base` global value is assumed to contain a pointer. This global value is computed
+    /// by loading from memory at that pointer value. The memory must be accessible, and
+    /// naturally aligned to hold a value of the type. The data at this address is assumed
+    /// to never change while the current function is executing.
+    Load {
+        /// The base pointer global value.
+        base: GlobalValue,
+
+        /// Offset added to the base pointer before doing the load.
+        offset: Offset32,
+
+        /// Type of the loaded value.
+        global_type: Type,
+
+        /// Specifies whether the memory that this refers to is readonly, allowing for the
+        /// elimination of redundant loads.
+        readonly: bool,
+    },
+
+    /// Value is an offset from another global value.
+    IAddImm {
+        /// The base pointer global value.
+        base: GlobalValue,
+
+        /// Byte offset to be added to the value.
+        offset: Imm64,
+
+        /// Type of the iadd.
+        global_type: Type,
+    },
+
+    /// Value is symbolic, meaning it's a name which will be resolved to an
+    /// actual value later (eg. by linking). Cranelift itself does not interpret
+    /// this name; it's used by embedders to link with other data structures.
+    ///
+    /// For now, symbolic values always have pointer type, and represent
+    /// addresses, however in the future they could be used to represent other
+    /// things as well.
+    Symbol {
+        /// The symbolic name.
+        name: ExternalName,
+
+        /// Offset from the symbol. This can be used instead of IAddImm to represent folding an
+        /// offset into a symbol.
+        offset: Imm64,
+
+        /// Will this symbol be defined nearby, such that it will always be a certain distance
+        /// away, after linking? If so, references to it can avoid going through a GOT. Note that
+        /// symbols meant to be preemptible cannot be colocated.
+        colocated: bool,
+    },
+}
+
+impl GlobalValueData {
+    /// Assume that `self` is an `GlobalValueData::Symbol` and return its name.
+    pub fn symbol_name(&self) -> &ExternalName {
+        match *self {
+            GlobalValueData::Symbol { ref name, .. } => name,
+            _ => panic!("only symbols have names"),
+        }
+    }
+
+    /// Return the type of this global.
+    pub fn global_type(&self, isa: &TargetIsa) -> Type {
+        match *self {
+            GlobalValueData::VMContext { .. } | GlobalValueData::Symbol { .. } => {
+                isa.pointer_type()
+            }
+            GlobalValueData::IAddImm { global_type, .. }
+            | GlobalValueData::Load { global_type, .. } => global_type,
+        }
+    }
+}
+
+impl fmt::Display for GlobalValueData {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match *self {
+            GlobalValueData::VMContext => write!(f, "vmctx"),
+            GlobalValueData::Load {
+                base,
+                offset,
+                global_type,
+                readonly,
+            } => write!(
+                f,
+                "load.{} notrap aligned {}{}{}",
+                global_type,
+                if readonly { "readonly " } else { "" },
+                base,
+                offset
+            ),
+            GlobalValueData::IAddImm {
+                global_type,
+                base,
+                offset,
+            } => write!(f, "iadd_imm.{} {}, {}", global_type, base, offset),
+            GlobalValueData::Symbol {
+                ref name,
+                offset,
+                colocated,
+            } => {
+                if colocated {
+                    write!(f, "colocated ")?;
+                }
+                write!(f, "symbol {}", name)?;
+                let offset_val: i64 = offset.into();
+                if offset_val > 0 {
+                    write!(f, "+")?;
+                }
+                if offset_val != 0 {
+                    write!(f, "{}", offset)?;
+                }
+                Ok(())
+            }
+        }
+    }
+}
--- a/cranelift/codegen/src/ir/heap.rs
+++ b/cranelift/codegen/src/ir/heap.rs
@@ -0,0 +1,62 @@
+//! Heaps.
+
+use crate::ir::immediates::Uimm64;
+use crate::ir::{GlobalValue, Type};
+use core::fmt;
+
+/// Information about a heap declaration.
+#[derive(Clone)]
+pub struct HeapData {
+    /// The address of the start of the heap's storage.
+    pub base: GlobalValue,
+
+    /// Guaranteed minimum heap size in bytes. Heap accesses before `min_size` don't need bounds
+    /// checking.
+    pub min_size: Uimm64,
+
+    /// Size in bytes of the offset-guard pages following the heap.
+    pub offset_guard_size: Uimm64,
+
+    /// Heap style, with additional style-specific info.
+    pub style: HeapStyle,
+
+    /// The index type for the heap.
+    pub index_type: Type,
+}
+
+/// Style of heap including style-specific information.
+#[derive(Clone)]
+pub enum HeapStyle {
+    /// A dynamic heap can be relocated to a different base address when it is grown.
+    Dynamic {
+        /// Global value providing the current bound of the heap in bytes.
+        bound_gv: GlobalValue,
+    },
+
+    /// A static heap has a fixed base address and a number of not-yet-allocated pages before the
+    /// offset-guard pages.
+    Static {
+        /// Heap bound in bytes. The offset-guard pages are allocated after the bound.
+        bound: Uimm64,
+    },
+}
+
+impl fmt::Display for HeapData {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        f.write_str(match self.style {
+            HeapStyle::Dynamic { .. } => "dynamic",
+            HeapStyle::Static { .. } => "static",
+        })?;
+
+        write!(f, " {}, min {}", self.base, self.min_size)?;
+        match self.style {
+            HeapStyle::Dynamic { bound_gv } => write!(f, ", bound {}", bound_gv)?,
+            HeapStyle::Static { bound } => write!(f, ", bound {}", bound)?,
+        }
+        write!(
+            f,
+            ", offset_guard {}, index_type {}",
+            self.offset_guard_size, self.index_type
+        )
+    }
+}
--- a/cranelift/codegen/src/ir/immediates.rs
+++ b/cranelift/codegen/src/ir/immediates.rs
--- a/cranelift/codegen/src/ir/instructions.rs
+++ b/cranelift/codegen/src/ir/instructions.rs
@@ -0,0 +1,702 @@
+//! Instruction formats and opcodes.
+//!
+//! The `instructions` module contains definitions for instruction formats, opcodes, and the
+//! in-memory representation of IR instructions.
+//!
+//! A large part of this module is auto-generated from the instruction descriptions in the meta
+//! directory.
+
+use core::fmt::{self, Display, Formatter};
+use core::ops::{Deref, DerefMut};
+use core::str::FromStr;
+use std::vec::Vec;
+
+use crate::ir;
+use crate::ir::types;
+use crate::ir::{Ebb, FuncRef, JumpTable, SigRef, Type, Value};
+use crate::isa;
+
+use crate::bitset::BitSet;
+use crate::entity;
+use crate::ref_slice::{ref_slice, ref_slice_mut};
+
+/// Some instructions use an external list of argument values because there is not enough space in
+/// the 16-byte `InstructionData` struct. These value lists are stored in a memory pool in
+/// `dfg.value_lists`.
+pub type ValueList = entity::EntityList<Value>;
+
+/// Memory pool for holding value lists. See `ValueList`.
+pub type ValueListPool = entity::ListPool<Value>;
+
+// Include code generated by `cranelift-codegen/meta-python/gen_instr.py`. This file contains:
+//
+// - The `pub enum InstructionFormat` enum with all the instruction formats.
+// - The `pub enum InstructionData` enum with all the instruction data fields.
+// - The `pub enum Opcode` definition with all known opcodes,
+// - The `const OPCODE_FORMAT: [InstructionFormat; N]` table.
+// - The private `fn opcode_name(Opcode) -> &'static str` function, and
+// - The hash table `const OPCODE_HASH_TABLE: [Opcode; N]`.
+//
+// For value type constraints:
+//
+// - The `const OPCODE_CONSTRAINTS : [OpcodeConstraints; N]` table.
+// - The `const TYPE_SETS : [ValueTypeSet; N]` table.
+// - The `const OPERAND_CONSTRAINTS : [OperandConstraint; N]` table.
+//
+include!(concat!(env!("OUT_DIR"), "/opcodes.rs"));
+
+impl Display for Opcode {
+    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+        write!(f, "{}", opcode_name(*self))
+    }
+}
+
+impl Opcode {
+    /// Get the instruction format for this opcode.
+    pub fn format(self) -> InstructionFormat {
+        OPCODE_FORMAT[self as usize - 1]
+    }
+
+    /// Get the constraint descriptor for this opcode.
+    /// Panic if this is called on `NotAnOpcode`.
+    pub fn constraints(self) -> OpcodeConstraints {
+        OPCODE_CONSTRAINTS[self as usize - 1]
+    }
+}
+
+// This trait really belongs in cranelift-reader where it is used by the `.clif` file parser, but since
+// it critically depends on the `opcode_name()` function which is needed here anyway, it lives in
+// this module. This also saves us from running the build script twice to generate code for the two
+// separate crates.
+impl FromStr for Opcode {
+    type Err = &'static str;
+
+    /// Parse an Opcode name from a string.
+    fn from_str(s: &str) -> Result<Self, &'static str> {
+        use crate::constant_hash::{probe, simple_hash, Table};
+
+        impl<'a> Table<&'a str> for [Option<Opcode>] {
+            fn len(&self) -> usize {
+                self.len()
+            }
+
+            fn key(&self, idx: usize) -> Option<&'a str> {
+                self[idx].map(opcode_name)
+            }
+        }
+
+        match probe::<&str, [Option<Self>]>(&OPCODE_HASH_TABLE, s, simple_hash(s)) {
+            Err(_) => Err("Unknown opcode"),
+            // We unwrap here because probe() should have ensured that the entry
+            // at this index is not None.
+            Ok(i) => Ok(OPCODE_HASH_TABLE[i].unwrap()),
+        }
+    }
+}
+
+/// A variable list of `Value` operands used for function call arguments and passing arguments to
+/// basic blocks.
+#[derive(Clone, Debug)]
+pub struct VariableArgs(Vec<Value>);
+
+impl VariableArgs {
+    /// Create an empty argument list.
+    pub fn new() -> Self {
+        VariableArgs(Vec::new())
+    }
+
+    /// Add an argument to the end.
+    pub fn push(&mut self, v: Value) {
+        self.0.push(v)
+    }
+
+    /// Check if the list is empty.
+    pub fn is_empty(&self) -> bool {
+        self.0.is_empty()
+    }
+
+    /// Convert this to a value list in `pool` with `fixed` prepended.
+    pub fn into_value_list(self, fixed: &[Value], pool: &mut ValueListPool) -> ValueList {
+        let mut vlist = ValueList::default();
+        vlist.extend(fixed.iter().cloned(), pool);
+        vlist.extend(self.0, pool);
+        vlist
+    }
+}
+
+// Coerce `VariableArgs` into a `&[Value]` slice.
+impl Deref for VariableArgs {
+    type Target = [Value];
+
+    fn deref(&self) -> &[Value] {
+        &self.0
+    }
+}
+
+impl DerefMut for VariableArgs {
+    fn deref_mut(&mut self) -> &mut [Value] {
+        &mut self.0
+    }
+}
+
+impl Display for VariableArgs {
+    fn fmt(&self, fmt: &mut Formatter) -> fmt::Result {
+        for (i, val) in self.0.iter().enumerate() {
+            if i == 0 {
+                write!(fmt, "{}", val)?;
+            } else {
+                write!(fmt, ", {}", val)?;
+            }
+        }
+        Ok(())
+    }
+}
+
+impl Default for VariableArgs {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+/// Analyzing an instruction.
+///
+/// Avoid large matches on instruction formats by using the methods defined here to examine
+/// instructions.
+impl InstructionData {
+    /// Return information about the destination of a branch or jump instruction.
+    ///
+    /// Any instruction that can transfer control to another EBB reveals its possible destinations
+    /// here.
+    pub fn analyze_branch<'a>(&'a self, pool: &'a ValueListPool) -> BranchInfo<'a> {
+        match *self {
+            InstructionData::Jump {
+                destination,
+                ref args,
+                ..
+            } => BranchInfo::SingleDest(destination, args.as_slice(pool)),
+            InstructionData::BranchInt {
+                destination,
+                ref args,
+                ..
+            }
+            | InstructionData::BranchFloat {
+                destination,
+                ref args,
+                ..
+            }
+            | InstructionData::Branch {
+                destination,
+                ref args,
+                ..
+            } => BranchInfo::SingleDest(destination, &args.as_slice(pool)[1..]),
+            InstructionData::BranchIcmp {
+                destination,
+                ref args,
+                ..
+            } => BranchInfo::SingleDest(destination, &args.as_slice(pool)[2..]),
+            InstructionData::BranchTable {
+                table, destination, ..
+            } => BranchInfo::Table(table, Some(destination)),
+            InstructionData::IndirectJump { table, .. } => BranchInfo::Table(table, None),
+            _ => {
+                debug_assert!(!self.opcode().is_branch());
+                BranchInfo::NotABranch
+            }
+        }
+    }
+
+    /// Get the single destination of this branch instruction, if it is a single destination
+    /// branch or jump.
+    ///
+    /// Multi-destination branches like `br_table` return `None`.
+    pub fn branch_destination(&self) -> Option<Ebb> {
+        match *self {
+            InstructionData::Jump { destination, .. }
+            | InstructionData::Branch { destination, .. }
+            | InstructionData::BranchInt { destination, .. }
+            | InstructionData::BranchFloat { destination, .. }
+            | InstructionData::BranchIcmp { destination, .. } => Some(destination),
+            InstructionData::BranchTable { .. } | InstructionData::IndirectJump { .. } => None,
+            _ => {
+                debug_assert!(!self.opcode().is_branch());
+                None
+            }
+        }
+    }
+
+    /// Get a mutable reference to the single destination of this branch instruction, if it is a
+    /// single destination branch or jump.
+    ///
+    /// Multi-destination branches like `br_table` return `None`.
+    pub fn branch_destination_mut(&mut self) -> Option<&mut Ebb> {
+        match *self {
+            InstructionData::Jump {
+                ref mut destination,
+                ..
+            }
+            | InstructionData::Branch {
+                ref mut destination,
+                ..
+            }
+            | InstructionData::BranchInt {
+                ref mut destination,
+                ..
+            }
+            | InstructionData::BranchFloat {
+                ref mut destination,
+                ..
+            }
+            | InstructionData::BranchIcmp {
+                ref mut destination,
+                ..
+            } => Some(destination),
+            InstructionData::BranchTable { .. } => None,
+            _ => {
+                debug_assert!(!self.opcode().is_branch());
+                None
+            }
+        }
+    }
+
+    /// Return information about a call instruction.
+    ///
+    /// Any instruction that can call another function reveals its call signature here.
+    pub fn analyze_call<'a>(&'a self, pool: &'a ValueListPool) -> CallInfo<'a> {
+        match *self {
+            InstructionData::Call {
+                func_ref, ref args, ..
+            } => CallInfo::Direct(func_ref, args.as_slice(pool)),
+            InstructionData::CallIndirect {
+                sig_ref, ref args, ..
+            } => CallInfo::Indirect(sig_ref, &args.as_slice(pool)[1..]),
+            _ => {
+                debug_assert!(!self.opcode().is_call());
+                CallInfo::NotACall
+            }
+        }
+    }
+}
+
+/// Information about branch and jump instructions.
+pub enum BranchInfo<'a> {
+    /// This is not a branch or jump instruction.
+    /// This instruction will not transfer control to another EBB in the function, but it may still
+    /// affect control flow by returning or trapping.
+    NotABranch,
+
+    /// This is a branch or jump to a single destination EBB, possibly taking value arguments.
+    SingleDest(Ebb, &'a [Value]),
+
+    /// This is a jump table branch which can have many destination EBBs and maybe one default EBB.
+    Table(JumpTable, Option<Ebb>),
+}
+
+/// Information about call instructions.
+pub enum CallInfo<'a> {
+    /// This is not a call instruction.
+    NotACall,
+
+    /// This is a direct call to an external function declared in the preamble. See
+    /// `DataFlowGraph.ext_funcs`.
+    Direct(FuncRef, &'a [Value]),
+
+    /// This is an indirect call with the specified signature. See `DataFlowGraph.signatures`.
+    Indirect(SigRef, &'a [Value]),
+}
+
+/// Value type constraints for a given opcode.
+///
+/// The `InstructionFormat` determines the constraints on most operands, but `Value` operands and
+/// results are not determined by the format. Every `Opcode` has an associated
+/// `OpcodeConstraints` object that provides the missing details.
+#[derive(Clone, Copy)]
+pub struct OpcodeConstraints {
+    /// Flags for this opcode encoded as a bit field:
+    ///
+    /// Bits 0-2:
+    ///     Number of fixed result values. This does not include `variable_args` results as are
+    ///     produced by call instructions.
+    ///
+    /// Bit 3:
+    ///     This opcode is polymorphic and the controlling type variable can be inferred from the
+    ///     designated input operand. This is the `typevar_operand` index given to the
+    ///     `InstructionFormat` meta language object. When this bit is not set, the controlling
+    ///     type variable must be the first output value instead.
+    ///
+    /// Bit 4:
+    ///     This opcode is polymorphic and the controlling type variable does *not* appear as the
+    ///     first result type.
+    ///
+    /// Bits 5-7:
+    ///     Number of fixed value arguments. The minimum required number of value operands.
+    flags: u8,
+
+    /// Permitted set of types for the controlling type variable as an index into `TYPE_SETS`.
+    typeset_offset: u8,
+
+    /// Offset into `OPERAND_CONSTRAINT` table of the descriptors for this opcode. The first
+    /// `num_fixed_results()` entries describe the result constraints, then follows constraints for
+    /// the fixed `Value` input operands. (`num_fixed_value_arguments()` of them).
+    constraint_offset: u16,
+}
+
+impl OpcodeConstraints {
+    /// Can the controlling type variable for this opcode be inferred from the designated value
+    /// input operand?
+    /// This also implies that this opcode is polymorphic.
+    pub fn use_typevar_operand(self) -> bool {
+        (self.flags & 0x8) != 0
+    }
+
+    /// Is it necessary to look at the designated value input operand in order to determine the
+    /// controlling type variable, or is it good enough to use the first return type?
+    ///
+    /// Most polymorphic instructions produce a single result with the type of the controlling type
+    /// variable. A few polymorphic instructions either don't produce any results, or produce
+    /// results with a fixed type. These instructions return `true`.
+    pub fn requires_typevar_operand(self) -> bool {
+        (self.flags & 0x10) != 0
+    }
+
+    /// Get the number of *fixed* result values produced by this opcode.
+    /// This does not include `variable_args` produced by calls.
+    pub fn num_fixed_results(self) -> usize {
+        (self.flags & 0x7) as usize
+    }
+
+    /// Get the number of *fixed* input values required by this opcode.
+    ///
+    /// This does not include `variable_args` arguments on call and branch instructions.
+    ///
+    /// The number of fixed input values is usually implied by the instruction format, but
+    /// instruction formats that use a `ValueList` put both fixed and variable arguments in the
+    /// list. This method returns the *minimum* number of values required in the value list.
+    pub fn num_fixed_value_arguments(self) -> usize {
+        ((self.flags >> 5) & 0x7) as usize
+    }
+
+    /// Get the offset into `TYPE_SETS` for the controlling type variable.
+    /// Returns `None` if the instruction is not polymorphic.
+    fn typeset_offset(self) -> Option<usize> {
+        let offset = usize::from(self.typeset_offset);
+        if offset < TYPE_SETS.len() {
+            Some(offset)
+        } else {
+            None
+        }
+    }
+
+    /// Get the offset into OPERAND_CONSTRAINTS where the descriptors for this opcode begin.
+    fn constraint_offset(self) -> usize {
+        self.constraint_offset as usize
+    }
+
+    /// Get the value type of result number `n`, having resolved the controlling type variable to
+    /// `ctrl_type`.
+    pub fn result_type(self, n: usize, ctrl_type: Type) -> Type {
+        debug_assert!(n < self.num_fixed_results(), "Invalid result index");
+        if let ResolvedConstraint::Bound(t) =
+            OPERAND_CONSTRAINTS[self.constraint_offset() + n].resolve(ctrl_type)
+        {
+            t
+        } else {
+            panic!("Result constraints can't be free");
+        }
+    }
+
+    /// Get the value type of input value number `n`, having resolved the controlling type variable
+    /// to `ctrl_type`.
+    ///
+    /// Unlike results, it is possible for some input values to vary freely within a specific
+    /// `ValueTypeSet`. This is represented with the `ArgumentConstraint::Free` variant.
+    pub fn value_argument_constraint(self, n: usize, ctrl_type: Type) -> ResolvedConstraint {
+        debug_assert!(
+            n < self.num_fixed_value_arguments(),
+            "Invalid value argument index"
+        );
+        let offset = self.constraint_offset() + self.num_fixed_results();
+        OPERAND_CONSTRAINTS[offset + n].resolve(ctrl_type)
+    }
+
+    /// Get the typeset of allowed types for the controlling type variable in a polymorphic
+    /// instruction.
+    pub fn ctrl_typeset(self) -> Option<ValueTypeSet> {
+        self.typeset_offset().map(|offset| TYPE_SETS[offset])
+    }
+
+    /// Is this instruction polymorphic?
+    pub fn is_polymorphic(self) -> bool {
+        self.ctrl_typeset().is_some()
+    }
+}
+
+type BitSet8 = BitSet<u8>;
+type BitSet16 = BitSet<u16>;
+
+/// A value type set describes the permitted set of types for a type variable.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub struct ValueTypeSet {
+    /// Allowed lane sizes
+    pub lanes: BitSet16,
+    /// Allowed int widths
+    pub ints: BitSet8,
+    /// Allowed float widths
+    pub floats: BitSet8,
+    /// Allowed bool widths
+    pub bools: BitSet8,
+}
+
+impl ValueTypeSet {
+    /// Is `scalar` part of the base type set?
+    ///
+    /// Note that the base type set does not have to be included in the type set proper.
+    fn is_base_type(self, scalar: Type) -> bool {
+        let l2b = scalar.log2_lane_bits();
+        if scalar.is_int() {
+            self.ints.contains(l2b)
+        } else if scalar.is_float() {
+            self.floats.contains(l2b)
+        } else if scalar.is_bool() {
+            self.bools.contains(l2b)
+        } else {
+            false
+        }
+    }
+
+    /// Does `typ` belong to this set?
+    pub fn contains(self, typ: Type) -> bool {
+        let l2l = typ.log2_lane_count();
+        self.lanes.contains(l2l) && self.is_base_type(typ.lane_type())
+    }
+
+    /// Get an example member of this type set.
+    ///
+    /// This is used for error messages to avoid suggesting invalid types.
+    pub fn example(self) -> Type {
+        let t = if self.ints.max().unwrap_or(0) > 5 {
+            types::I32
+        } else if self.floats.max().unwrap_or(0) > 5 {
+            types::F32
+        } else if self.bools.max().unwrap_or(0) > 5 {
+            types::B32
+        } else {
+            types::B1
+        };
+        t.by(1 << self.lanes.min().unwrap()).unwrap()
+    }
+}
+
+/// Operand constraints. This describes the value type constraints on a single `Value` operand.
+enum OperandConstraint {
+    /// This operand has a concrete value type.
+    Concrete(Type),
+
+    /// This operand can vary freely within the given type set.
+    /// The type set is identified by its index into the TYPE_SETS constant table.
+    Free(u8),
+
+    /// This operand is the same type as the controlling type variable.
+    Same,
+
+    /// This operand is `ctrlType.lane_type()`.
+    LaneOf,
+
+    /// This operand is `ctrlType.as_bool()`.
+    AsBool,
+
+    /// This operand is `ctrlType.half_width()`.
+    HalfWidth,
+
+    /// This operand is `ctrlType.double_width()`.
+    DoubleWidth,
+
+    /// This operand is `ctrlType.half_vector()`.
+    HalfVector,
+
+    /// This operand is `ctrlType.double_vector()`.
+    DoubleVector,
+}
+
+impl OperandConstraint {
+    /// Resolve this operand constraint into a concrete value type, given the value of the
+    /// controlling type variable.
+    pub fn resolve(&self, ctrl_type: Type) -> ResolvedConstraint {
+        use self::OperandConstraint::*;
+        use self::ResolvedConstraint::Bound;
+        match *self {
+            Concrete(t) => Bound(t),
+            Free(vts) => ResolvedConstraint::Free(TYPE_SETS[vts as usize]),
+            Same => Bound(ctrl_type),
+            LaneOf => Bound(ctrl_type.lane_type()),
+            AsBool => Bound(ctrl_type.as_bool()),
+            HalfWidth => Bound(ctrl_type.half_width().expect("invalid type for half_width")),
+            DoubleWidth => Bound(
+                ctrl_type
+                    .double_width()
+                    .expect("invalid type for double_width"),
+            ),
+            HalfVector => Bound(
+                ctrl_type
+                    .half_vector()
+                    .expect("invalid type for half_vector"),
+            ),
+            DoubleVector => Bound(ctrl_type.by(2).expect("invalid type for double_vector")),
+        }
+    }
+}
+
+/// The type constraint on a value argument once the controlling type variable is known.
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+pub enum ResolvedConstraint {
+    /// The operand is bound to a known type.
+    Bound(Type),
+    /// The operand type can vary freely within the given set.
+    Free(ValueTypeSet),
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::string::ToString;
+
+    #[test]
+    fn opcodes() {
+        use core::mem;
+
+        let x = Opcode::Iadd;
+        let mut y = Opcode::Isub;
+
+        assert!(x != y);
+        y = Opcode::Iadd;
+        assert_eq!(x, y);
+        assert_eq!(x.format(), InstructionFormat::Binary);
+
+        assert_eq!(format!("{:?}", Opcode::IaddImm), "IaddImm");
+        assert_eq!(Opcode::IaddImm.to_string(), "iadd_imm");
+
+        // Check the matcher.
+        assert_eq!("iadd".parse::<Opcode>(), Ok(Opcode::Iadd));
+        assert_eq!("iadd_imm".parse::<Opcode>(), Ok(Opcode::IaddImm));
+        assert_eq!("iadd\0".parse::<Opcode>(), Err("Unknown opcode"));
+        assert_eq!("".parse::<Opcode>(), Err("Unknown opcode"));
+        assert_eq!("\0".parse::<Opcode>(), Err("Unknown opcode"));
+
+        // Opcode is a single byte, and because Option<Opcode> originally came to 2 bytes, early on
+        // Opcode included a variant NotAnOpcode to avoid the unnecessary bloat. Since then the Rust
+        // compiler has brought in NonZero optimization, meaning that an enum not using the 0 value
+        // can be optional for no size cost. We want to ensure Option<Opcode> remains small.
+        assert_eq!(mem::size_of::<Opcode>(), mem::size_of::<Option<Opcode>>());
+    }
+
+    #[test]
+    fn instruction_data() {
+        use core::mem;
+        // The size of the `InstructionData` enum is important for performance. It should not
+        // exceed 16 bytes. Use `Box<FooData>` out-of-line payloads for instruction formats that
+        // require more space than that. It would be fine with a data structure smaller than 16
+        // bytes, but what are the odds of that?
+        assert_eq!(mem::size_of::<InstructionData>(), 16);
+    }
+
+    #[test]
+    fn constraints() {
+        let a = Opcode::Iadd.constraints();
+        assert!(a.use_typevar_operand());
+        assert!(!a.requires_typevar_operand());
+        assert_eq!(a.num_fixed_results(), 1);
+        assert_eq!(a.num_fixed_value_arguments(), 2);
+        assert_eq!(a.result_type(0, types::I32), types::I32);
+        assert_eq!(a.result_type(0, types::I8), types::I8);
+        assert_eq!(
+            a.value_argument_constraint(0, types::I32),
+            ResolvedConstraint::Bound(types::I32)
+        );
+        assert_eq!(
+            a.value_argument_constraint(1, types::I32),
+            ResolvedConstraint::Bound(types::I32)
+        );
+
+        let b = Opcode::Bitcast.constraints();
+        assert!(!b.use_typevar_operand());
+        assert!(!b.requires_typevar_operand());
+        assert_eq!(b.num_fixed_results(), 1);
+        assert_eq!(b.num_fixed_value_arguments(), 1);
+        assert_eq!(b.result_type(0, types::I32), types::I32);
+        assert_eq!(b.result_type(0, types::I8), types::I8);
+        match b.value_argument_constraint(0, types::I32) {
+            ResolvedConstraint::Free(vts) => assert!(vts.contains(types::F32)),
+            _ => panic!("Unexpected constraint from value_argument_constraint"),
+        }
+
+        let c = Opcode::Call.constraints();
+        assert_eq!(c.num_fixed_results(), 0);
+        assert_eq!(c.num_fixed_value_arguments(), 0);
+
+        let i = Opcode::CallIndirect.constraints();
+        assert_eq!(i.num_fixed_results(), 0);
+        assert_eq!(i.num_fixed_value_arguments(), 1);
+
+        let cmp = Opcode::Icmp.constraints();
+        assert!(cmp.use_typevar_operand());
+        assert!(cmp.requires_typevar_operand());
+        assert_eq!(cmp.num_fixed_results(), 1);
+        assert_eq!(cmp.num_fixed_value_arguments(), 2);
+    }
+
+    #[test]
+    fn value_set() {
+        use crate::ir::types::*;
+
+        let vts = ValueTypeSet {
+            lanes: BitSet16::from_range(0, 8),
+            ints: BitSet8::from_range(4, 7),
+            floats: BitSet8::from_range(0, 0),
+            bools: BitSet8::from_range(3, 7),
+        };
+        assert!(!vts.contains(I8));
+        assert!(vts.contains(I32));
+        assert!(vts.contains(I64));
+        assert!(vts.contains(I32X4));
+        assert!(!vts.contains(F32));
+        assert!(!vts.contains(B1));
+        assert!(vts.contains(B8));
+        assert!(vts.contains(B64));
+        assert_eq!(vts.example().to_string(), "i32");
+
+        let vts = ValueTypeSet {
+            lanes: BitSet16::from_range(0, 8),
+            ints: BitSet8::from_range(0, 0),
+            floats: BitSet8::from_range(5, 7),
+            bools: BitSet8::from_range(3, 7),
+        };
+        assert_eq!(vts.example().to_string(), "f32");
+
+        let vts = ValueTypeSet {
+            lanes: BitSet16::from_range(1, 8),
+            ints: BitSet8::from_range(0, 0),
+            floats: BitSet8::from_range(5, 7),
+            bools: BitSet8::from_range(3, 7),
+        };
+        assert_eq!(vts.example().to_string(), "f32x2");
+
+        let vts = ValueTypeSet {
+            lanes: BitSet16::from_range(2, 8),
+            ints: BitSet8::from_range(0, 0),
+            floats: BitSet8::from_range(0, 0),
+            bools: BitSet8::from_range(3, 7),
+        };
+        assert!(!vts.contains(B32X2));
+        assert!(vts.contains(B32X4));
+        assert_eq!(vts.example().to_string(), "b32x4");
+
+        let vts = ValueTypeSet {
+            // TypeSet(lanes=(1, 256), ints=(8, 64))
+            lanes: BitSet16::from_range(0, 9),
+            ints: BitSet8::from_range(3, 7),
+            floats: BitSet8::from_range(0, 0),
+            bools: BitSet8::from_range(0, 0),
+        };
+        assert!(vts.contains(I32));
+        assert!(vts.contains(I32X4));
+    }
+}
--- a/cranelift/codegen/src/ir/jumptable.rs
+++ b/cranelift/codegen/src/ir/jumptable.rs
@@ -0,0 +1,119 @@
+//! Jump table representation.
+//!
+//! Jump tables are declared in the preamble and assigned an `ir::entities::JumpTable` reference.
+//! The actual table of destinations is stored in a `JumpTableData` struct defined in this module.
+
+use crate::ir::entities::Ebb;
+use core::fmt::{self, Display, Formatter};
+use core::slice::{Iter, IterMut};
+use std::vec::Vec;
+
+/// Contents of a jump table.
+///
+/// All jump tables use 0-based indexing and densely populated.
+#[derive(Clone)]
+pub struct JumpTableData {
+    // Table entries.
+    table: Vec<Ebb>,
+}
+
+impl JumpTableData {
+    /// Create a new empty jump table.
+    pub fn new() -> Self {
+        Self { table: Vec::new() }
+    }
+
+    /// Create a new empty jump table with the specified capacity.
+    pub fn with_capacity(capacity: usize) -> Self {
+        Self {
+            table: Vec::with_capacity(capacity),
+        }
+    }
+
+    /// Get the number of table entries.
+    pub fn len(&self) -> usize {
+        self.table.len()
+    }
+
+    /// Append a table entry.
+    pub fn push_entry(&mut self, dest: Ebb) {
+        self.table.push(dest)
+    }
+
+    /// Checks if any of the entries branch to `ebb`.
+    pub fn branches_to(&self, ebb: Ebb) -> bool {
+        self.table.iter().any(|target_ebb| *target_ebb == ebb)
+    }
+
+    /// Access the whole table as a slice.
+    pub fn as_slice(&self) -> &[Ebb] {
+        self.table.as_slice()
+    }
+
+    /// Access the whole table as a mutable slice.
+    pub fn as_mut_slice(&mut self) -> &mut [Ebb] {
+        self.table.as_mut_slice()
+    }
+
+    /// Returns an iterator over the table.
+    pub fn iter(&self) -> Iter<Ebb> {
+        self.table.iter()
+    }
+
+    /// Returns an iterator that allows modifying each value.
+    pub fn iter_mut(&mut self) -> IterMut<Ebb> {
+        self.table.iter_mut()
+    }
+}
+
+impl Display for JumpTableData {
+    fn fmt(&self, fmt: &mut Formatter) -> fmt::Result {
+        write!(fmt, "jump_table [")?;
+        match self.table.first() {
+            None => (),
+            Some(first) => write!(fmt, "{}", first)?,
+        }
+        for ebb in self.table.iter().skip(1) {
+            write!(fmt, ", {}", ebb)?;
+        }
+        write!(fmt, "]")
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::JumpTableData;
+    use crate::entity::EntityRef;
+    use crate::ir::Ebb;
+    use std::string::ToString;
+
+    #[test]
+    fn empty() {
+        let jt = JumpTableData::new();
+
+        assert_eq!(jt.as_slice().get(0), None);
+        assert_eq!(jt.as_slice().get(10), None);
+
+        assert_eq!(jt.to_string(), "jump_table []");
+
+        let v = jt.as_slice();
+        assert_eq!(v, []);
+    }
+
+    #[test]
+    fn insert() {
+        let e1 = Ebb::new(1);
+        let e2 = Ebb::new(2);
+
+        let mut jt = JumpTableData::new();
+
+        jt.push_entry(e1);
+        jt.push_entry(e2);
+        jt.push_entry(e1);
+
+        assert_eq!(jt.to_string(), "jump_table [ebb1, ebb2, ebb1]");
+
+        let v = jt.as_slice();
+        assert_eq!(v, [e1, e2, e1]);
+    }
+}
--- a/cranelift/codegen/src/ir/layout.rs
+++ b/cranelift/codegen/src/ir/layout.rs
--- a/cranelift/codegen/src/ir/libcall.rs
+++ b/cranelift/codegen/src/ir/libcall.rs
@@ -0,0 +1,205 @@
+//! Naming well-known routines in the runtime library.
+
+use crate::ir::{
+    types, AbiParam, ArgumentPurpose, ExtFuncData, ExternalName, FuncRef, Function, Inst, Opcode,
+    Signature, Type,
+};
+use crate::isa::{CallConv, RegUnit, TargetIsa};
+use core::fmt;
+use core::str::FromStr;
+
+/// The name of a runtime library routine.
+///
+/// Runtime library calls are generated for Cranelift IR instructions that don't have an equivalent
+/// ISA instruction or an easy macro expansion. A `LibCall` is used as a well-known name to refer to
+/// the runtime library routine. This way, Cranelift doesn't have to know about the naming
+/// convention in the embedding VM's runtime library.
+///
+/// This list is likely to grow over time.
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+pub enum LibCall {
+    /// probe for stack overflow. These are emitted for functions which need
+    /// when the `probestack_enabled` setting is true.
+    Probestack,
+    /// ceil.f32
+    CeilF32,
+    /// ceil.f64
+    CeilF64,
+    /// floor.f32
+    FloorF32,
+    /// floor.f64
+    FloorF64,
+    /// trunc.f32
+    TruncF32,
+    /// frunc.f64
+    TruncF64,
+    /// nearest.f32
+    NearestF32,
+    /// nearest.f64
+    NearestF64,
+    /// libc.memcpy
+    Memcpy,
+    /// libc.memset
+    Memset,
+    /// libc.memmove
+    Memmove,
+}
+
+impl fmt::Display for LibCall {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        fmt::Debug::fmt(self, f)
+    }
+}
+
+impl FromStr for LibCall {
+    type Err = ();
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        match s {
+            "Probestack" => Ok(LibCall::Probestack),
+            "CeilF32" => Ok(LibCall::CeilF32),
+            "CeilF64" => Ok(LibCall::CeilF64),
+            "FloorF32" => Ok(LibCall::FloorF32),
+            "FloorF64" => Ok(LibCall::FloorF64),
+            "TruncF32" => Ok(LibCall::TruncF32),
+            "TruncF64" => Ok(LibCall::TruncF64),
+            "NearestF32" => Ok(LibCall::NearestF32),
+            "NearestF64" => Ok(LibCall::NearestF64),
+            "Memcpy" => Ok(LibCall::Memcpy),
+            "Memset" => Ok(LibCall::Memset),
+            "Memmove" => Ok(LibCall::Memmove),
+            _ => Err(()),
+        }
+    }
+}
+
+impl LibCall {
+    /// Get the well-known library call name to use as a replacement for an instruction with the
+    /// given opcode and controlling type variable.
+    ///
+    /// Returns `None` if no well-known library routine name exists for that instruction.
+    pub fn for_inst(opcode: Opcode, ctrl_type: Type) -> Option<Self> {
+        Some(match ctrl_type {
+            types::F32 => match opcode {
+                Opcode::Ceil => LibCall::CeilF32,
+                Opcode::Floor => LibCall::FloorF32,
+                Opcode::Trunc => LibCall::TruncF32,
+                Opcode::Nearest => LibCall::NearestF32,
+                _ => return None,
+            },
+            types::F64 => match opcode {
+                Opcode::Ceil => LibCall::CeilF64,
+                Opcode::Floor => LibCall::FloorF64,
+                Opcode::Trunc => LibCall::TruncF64,
+                Opcode::Nearest => LibCall::NearestF64,
+                _ => return None,
+            },
+            _ => return None,
+        })
+    }
+}
+
+/// Get a function reference for `libcall` in `func`, following the signature
+/// for `inst`.
+///
+/// If there is an existing reference, use it, otherwise make a new one.
+pub fn get_libcall_funcref(
+    libcall: LibCall,
+    func: &mut Function,
+    inst: Inst,
+    isa: &TargetIsa,
+) -> FuncRef {
+    find_funcref(libcall, func).unwrap_or_else(|| make_funcref_for_inst(libcall, func, inst, isa))
+}
+
+/// Get a function reference for the probestack function in `func`.
+///
+/// If there is an existing reference, use it, otherwise make a new one.
+pub fn get_probestack_funcref(
+    func: &mut Function,
+    reg_type: Type,
+    arg_reg: RegUnit,
+    isa: &TargetIsa,
+) -> FuncRef {
+    find_funcref(LibCall::Probestack, func)
+        .unwrap_or_else(|| make_funcref_for_probestack(func, reg_type, arg_reg, isa))
+}
+
+/// Get the existing function reference for `libcall` in `func` if it exists.
+fn find_funcref(libcall: LibCall, func: &Function) -> Option<FuncRef> {
+    // We're assuming that all libcall function decls are at the end.
+    // If we get this wrong, worst case we'll have duplicate libcall decls which is harmless.
+    for (fref, func_data) in func.dfg.ext_funcs.iter().rev() {
+        match func_data.name {
+            ExternalName::LibCall(lc) => {
+                if lc == libcall {
+                    return Some(fref);
+                }
+            }
+            _ => break,
+        }
+    }
+    None
+}
+
+/// Create a funcref for `LibCall::Probestack`.
+fn make_funcref_for_probestack(
+    func: &mut Function,
+    reg_type: Type,
+    arg_reg: RegUnit,
+    isa: &TargetIsa,
+) -> FuncRef {
+    let mut sig = Signature::new(CallConv::Probestack);
+    let rax = AbiParam::special_reg(reg_type, ArgumentPurpose::Normal, arg_reg);
+    sig.params.push(rax);
+    if !isa.flags().probestack_func_adjusts_sp() {
+        sig.returns.push(rax);
+    }
+    make_funcref(LibCall::Probestack, func, sig, isa)
+}
+
+/// Create a funcref for `libcall` with a signature matching `inst`.
+fn make_funcref_for_inst(
+    libcall: LibCall,
+    func: &mut Function,
+    inst: Inst,
+    isa: &TargetIsa,
+) -> FuncRef {
+    let mut sig = Signature::new(isa.default_call_conv());
+    for &v in func.dfg.inst_args(inst) {
+        sig.params.push(AbiParam::new(func.dfg.value_type(v)));
+    }
+    for &v in func.dfg.inst_results(inst) {
+        sig.returns.push(AbiParam::new(func.dfg.value_type(v)));
+    }
+
+    make_funcref(libcall, func, sig, isa)
+}
+
+/// Create a funcref for `libcall`.
+fn make_funcref(libcall: LibCall, func: &mut Function, sig: Signature, isa: &TargetIsa) -> FuncRef {
+    let sigref = func.import_signature(sig);
+
+    func.import_function(ExtFuncData {
+        name: ExternalName::LibCall(libcall),
+        signature: sigref,
+        colocated: isa.flags().colocated_libcalls(),
+    })
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::string::ToString;
+
+    #[test]
+    fn display() {
+        assert_eq!(LibCall::CeilF32.to_string(), "CeilF32");
+        assert_eq!(LibCall::NearestF64.to_string(), "NearestF64");
+    }
+
+    #[test]
+    fn parsing() {
+        assert_eq!("FloorF32".parse(), Ok(LibCall::FloorF32));
+    }
+}
--- a/cranelift/codegen/src/ir/memflags.rs
+++ b/cranelift/codegen/src/ir/memflags.rs
@@ -0,0 +1,117 @@
+//! Memory operation flags.
+
+use core::fmt;
+
+enum FlagBit {
+    Notrap,
+    Aligned,
+    Readonly,
+}
+
+const NAMES: [&str; 3] = ["notrap", "aligned", "readonly"];
+
+/// Flags for memory operations like load/store.
+///
+/// Each of these flags introduce a limited form of undefined behavior. The flags each enable
+/// certain optimizations that need to make additional assumptions. Generally, the semantics of a
+/// program does not change when a flag is removed, but adding a flag will.
+#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
+pub struct MemFlags {
+    bits: u8,
+}
+
+impl MemFlags {
+    /// Create a new empty set of flags.
+    pub fn new() -> Self {
+        Self { bits: 0 }
+    }
+
+    /// Create a set of flags representing an access from a "trusted" address, meaning it's
+    /// known to be aligned and non-trapping.
+    pub fn trusted() -> Self {
+        let mut result = Self::new();
+        result.set_notrap();
+        result.set_aligned();
+        result
+    }
+
+    /// Read a flag bit.
+    fn read(self, bit: FlagBit) -> bool {
+        self.bits & (1 << bit as usize) != 0
+    }
+
+    /// Set a flag bit.
+    fn set(&mut self, bit: FlagBit) {
+        self.bits |= 1 << bit as usize
+    }
+
+    /// Set a flag bit by name.
+    ///
+    /// Returns true if the flag was found and set, false for an unknown flag name.
+    pub fn set_by_name(&mut self, name: &str) -> bool {
+        match NAMES.iter().position(|&s| s == name) {
+            Some(bit) => {
+                self.bits |= 1 << bit;
+                true
+            }
+            None => false,
+        }
+    }
+
+    /// Test if the `notrap` flag is set.
+    ///
+    /// Normally, trapping is part of the semantics of a load/store operation. If the platform
+    /// would cause a trap when accessing the effective address, the Cranelift memory operation is
+    /// also required to trap.
+    ///
+    /// The `notrap` flag tells Cranelift that the memory is *accessible*, which means that
+    /// accesses will not trap. This makes it possible to delete an unused load or a dead store
+    /// instruction.
+    pub fn notrap(self) -> bool {
+        self.read(FlagBit::Notrap)
+    }
+
+    /// Set the `notrap` flag.
+    pub fn set_notrap(&mut self) {
+        self.set(FlagBit::Notrap)
+    }
+
+    /// Test if the `aligned` flag is set.
+    ///
+    /// By default, Cranelift memory instructions work with any unaligned effective address. If the
+    /// `aligned` flag is set, the instruction is permitted to trap or return a wrong result if the
+    /// effective address is misaligned.
+    pub fn aligned(self) -> bool {
+        self.read(FlagBit::Aligned)
+    }
+
+    /// Set the `aligned` flag.
+    pub fn set_aligned(&mut self) {
+        self.set(FlagBit::Aligned)
+    }
+
+    /// Test if the `readonly` flag is set.
+    ///
+    /// Loads with this flag have no memory dependencies.
+    /// This results in undefined behavior if the dereferenced memory is mutated at any time
+    /// between when the function is called and when it is exited.
+    pub fn readonly(self) -> bool {
+        self.read(FlagBit::Readonly)
+    }
+
+    /// Set the `readonly` flag.
+    pub fn set_readonly(&mut self) {
+        self.set(FlagBit::Readonly)
+    }
+}
+
+impl fmt::Display for MemFlags {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        for (i, n) in NAMES.iter().enumerate() {
+            if self.bits & (1 << i) != 0 {
+                write!(f, " {}", n)?;
+            }
+        }
+        Ok(())
+    }
+}
--- a/cranelift/codegen/src/ir/mod.rs
+++ b/cranelift/codegen/src/ir/mod.rs
@@ -0,0 +1,73 @@
+//! Representation of Cranelift IR functions.
+
+mod builder;
+pub mod condcodes;
+pub mod dfg;
+pub mod entities;
+mod extfunc;
+mod extname;
+pub mod function;
+mod globalvalue;
+mod heap;
+pub mod immediates;
+pub mod instructions;
+pub mod jumptable;
+pub mod layout;
+mod libcall;
+mod memflags;
+mod progpoint;
+mod sourceloc;
+pub mod stackslot;
+mod table;
+mod trapcode;
+pub mod types;
+mod valueloc;
+
+pub use crate::ir::builder::{InsertBuilder, InstBuilder, InstBuilderBase, InstInserterBase};
+pub use crate::ir::dfg::{DataFlowGraph, ValueDef};
+pub use crate::ir::entities::{
+    Ebb, FuncRef, GlobalValue, Heap, Inst, JumpTable, SigRef, StackSlot, Table, Value,
+};
+pub use crate::ir::extfunc::{
+    AbiParam, ArgumentExtension, ArgumentPurpose, ExtFuncData, Signature,
+};
+pub use crate::ir::extname::ExternalName;
+pub use crate::ir::function::Function;
+pub use crate::ir::globalvalue::GlobalValueData;
+pub use crate::ir::heap::{HeapData, HeapStyle};
+pub use crate::ir::instructions::{
+    InstructionData, Opcode, ValueList, ValueListPool, VariableArgs,
+};
+pub use crate::ir::jumptable::JumpTableData;
+pub use crate::ir::layout::Layout;
+pub use crate::ir::libcall::{get_libcall_funcref, get_probestack_funcref, LibCall};
+pub use crate::ir::memflags::MemFlags;
+pub use crate::ir::progpoint::{ExpandedProgramPoint, ProgramOrder, ProgramPoint};
+pub use crate::ir::sourceloc::SourceLoc;
+pub use crate::ir::stackslot::{StackSlotData, StackSlotKind, StackSlots};
+pub use crate::ir::table::TableData;
+pub use crate::ir::trapcode::TrapCode;
+pub use crate::ir::types::Type;
+pub use crate::ir::valueloc::{ArgumentLoc, ValueLoc};
+
+use crate::binemit;
+use crate::entity::{PrimaryMap, SecondaryMap};
+use crate::isa;
+
+/// Map of value locations.
+pub type ValueLocations = SecondaryMap<Value, ValueLoc>;
+
+/// Map of jump tables.
+pub type JumpTables = PrimaryMap<JumpTable, JumpTableData>;
+
+/// Map of instruction encodings.
+pub type InstEncodings = SecondaryMap<Inst, isa::Encoding>;
+
+/// Code offsets for EBBs.
+pub type EbbOffsets = SecondaryMap<Ebb, binemit::CodeOffset>;
+
+/// Code offsets for Jump Tables.
+pub type JumpTableOffsets = SecondaryMap<JumpTable, binemit::CodeOffset>;
+
+/// Source locations for instructions.
+pub type SourceLocs = SecondaryMap<Inst, SourceLoc>;
--- a/cranelift/codegen/src/ir/progpoint.rs
+++ b/cranelift/codegen/src/ir/progpoint.rs
@@ -0,0 +1,164 @@
+//! Program points.
+
+use crate::entity::EntityRef;
+use crate::ir::{Ebb, Inst, ValueDef};
+use core::cmp;
+use core::fmt;
+use core::u32;
+
+/// A `ProgramPoint` represents a position in a function where the live range of an SSA value can
+/// begin or end. It can be either:
+///
+/// 1. An instruction or
+/// 2. An EBB header.
+///
+/// This corresponds more or less to the lines in the textual form of Cranelift IR.
+#[derive(PartialEq, Eq, Clone, Copy)]
+pub struct ProgramPoint(u32);
+
+impl From<Inst> for ProgramPoint {
+    fn from(inst: Inst) -> Self {
+        let idx = inst.index();
+        debug_assert!(idx < (u32::MAX / 2) as usize);
+        ProgramPoint((idx * 2) as u32)
+    }
+}
+
+impl From<Ebb> for ProgramPoint {
+    fn from(ebb: Ebb) -> Self {
+        let idx = ebb.index();
+        debug_assert!(idx < (u32::MAX / 2) as usize);
+        ProgramPoint((idx * 2 + 1) as u32)
+    }
+}
+
+impl From<ValueDef> for ProgramPoint {
+    fn from(def: ValueDef) -> Self {
+        match def {
+            ValueDef::Result(inst, _) => inst.into(),
+            ValueDef::Param(ebb, _) => ebb.into(),
+        }
+    }
+}
+
+/// An expanded program point directly exposes the variants, but takes twice the space to
+/// represent.
+#[derive(PartialEq, Eq, Clone, Copy)]
+pub enum ExpandedProgramPoint {
+    /// An instruction in the function.
+    Inst(Inst),
+    /// An EBB header.
+    Ebb(Ebb),
+}
+
+impl ExpandedProgramPoint {
+    /// Get the instruction we know is inside.
+    pub fn unwrap_inst(self) -> Inst {
+        match self {
+            ExpandedProgramPoint::Inst(x) => x,
+            ExpandedProgramPoint::Ebb(x) => panic!("expected inst: {}", x),
+        }
+    }
+}
+
+impl From<Inst> for ExpandedProgramPoint {
+    fn from(inst: Inst) -> Self {
+        ExpandedProgramPoint::Inst(inst)
+    }
+}
+
+impl From<Ebb> for ExpandedProgramPoint {
+    fn from(ebb: Ebb) -> Self {
+        ExpandedProgramPoint::Ebb(ebb)
+    }
+}
+
+impl From<ValueDef> for ExpandedProgramPoint {
+    fn from(def: ValueDef) -> Self {
+        match def {
+            ValueDef::Result(inst, _) => inst.into(),
+            ValueDef::Param(ebb, _) => ebb.into(),
+        }
+    }
+}
+
+impl From<ProgramPoint> for ExpandedProgramPoint {
+    fn from(pp: ProgramPoint) -> Self {
+        if pp.0 & 1 == 0 {
+            ExpandedProgramPoint::Inst(Inst::from_u32(pp.0 / 2))
+        } else {
+            ExpandedProgramPoint::Ebb(Ebb::from_u32(pp.0 / 2))
+        }
+    }
+}
+
+impl fmt::Display for ExpandedProgramPoint {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match *self {
+            ExpandedProgramPoint::Inst(x) => write!(f, "{}", x),
+            ExpandedProgramPoint::Ebb(x) => write!(f, "{}", x),
+        }
+    }
+}
+
+impl fmt::Display for ProgramPoint {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        let epp: ExpandedProgramPoint = (*self).into();
+        epp.fmt(f)
+    }
+}
+
+impl fmt::Debug for ExpandedProgramPoint {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "ExpandedProgramPoint({})", self)
+    }
+}
+
+impl fmt::Debug for ProgramPoint {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "ProgramPoint({})", self)
+    }
+}
+
+/// Context for ordering program points.
+///
+/// `ProgramPoint` objects don't carry enough information to be ordered independently, they need a
+/// context providing the program order.
+pub trait ProgramOrder {
+    /// Compare the program points `a` and `b` relative to this program order.
+    ///
+    /// Return `Less` if `a` appears in the program before `b`.
+    ///
+    /// This is declared as a generic such that it can be called with `Inst` and `Ebb` arguments
+    /// directly. Depending on the implementation, there is a good chance performance will be
+    /// improved for those cases where the type of either argument is known statically.
+    fn cmp<A, B>(&self, a: A, b: B) -> cmp::Ordering
+    where
+        A: Into<ExpandedProgramPoint>,
+        B: Into<ExpandedProgramPoint>;
+
+    /// Is the range from `inst` to `ebb` just the gap between consecutive EBBs?
+    ///
+    /// This returns true if `inst` is the terminator in the EBB immediately before `ebb`.
+    fn is_ebb_gap(&self, inst: Inst, ebb: Ebb) -> bool;
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::entity::EntityRef;
+    use crate::ir::{Ebb, Inst};
+    use std::string::ToString;
+
+    #[test]
+    fn convert() {
+        let i5 = Inst::new(5);
+        let b3 = Ebb::new(3);
+
+        let pp1: ProgramPoint = i5.into();
+        let pp2: ProgramPoint = b3.into();
+
+        assert_eq!(pp1.to_string(), "inst5");
+        assert_eq!(pp2.to_string(), "ebb3");
+    }
+}
--- a/cranelift/codegen/src/ir/sourceloc.rs
+++ b/cranelift/codegen/src/ir/sourceloc.rs
@@ -0,0 +1,63 @@
+//! Source locations.
+//!
+//! Cranelift tracks the original source location of each instruction, and preserves the source
+//! location when instructions are transformed.
+
+use core::fmt;
+
+/// A source location.
+///
+/// This is an opaque 32-bit number attached to each Cranelift IR instruction. Cranelift does not
+/// interpret source locations in any way, they are simply preserved from the input to the output.
+///
+/// The default source location uses the all-ones bit pattern `!0`. It is used for instructions
+/// that can't be given a real source location.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub struct SourceLoc(u32);
+
+impl SourceLoc {
+    /// Create a new source location with the given bits.
+    pub fn new(bits: u32) -> Self {
+        SourceLoc(bits)
+    }
+
+    /// Is this the default source location?
+    pub fn is_default(self) -> bool {
+        self == Default::default()
+    }
+
+    /// Read the bits of this source location.
+    pub fn bits(self) -> u32 {
+        self.0
+    }
+}
+
+impl Default for SourceLoc {
+    fn default() -> Self {
+        SourceLoc(!0)
+    }
+}
+
+impl fmt::Display for SourceLoc {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        if self.is_default() {
+            write!(f, "@-")
+        } else {
+            write!(f, "@{:04x}", self.0)
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::ir::SourceLoc;
+    use std::string::ToString;
+
+    #[test]
+    fn display() {
+        assert_eq!(SourceLoc::default().to_string(), "@-");
+        assert_eq!(SourceLoc::new(0).to_string(), "@0000");
+        assert_eq!(SourceLoc::new(16).to_string(), "@0010");
+        assert_eq!(SourceLoc::new(0xabcdef).to_string(), "@abcdef");
+    }
+}
--- a/cranelift/codegen/src/ir/stackslot.rs
+++ b/cranelift/codegen/src/ir/stackslot.rs
@@ -0,0 +1,427 @@
+//! Stack slots.
+//!
+//! The `StackSlotData` struct keeps track of a single stack slot in a function.
+//!
+
+use crate::entity::{Iter, IterMut, Keys, PrimaryMap};
+use crate::ir::{StackSlot, Type};
+use crate::packed_option::PackedOption;
+use core::cmp;
+use core::fmt;
+use core::ops::{Index, IndexMut};
+use core::slice;
+use core::str::FromStr;
+use std::vec::Vec;
+
+/// The size of an object on the stack, or the size of a stack frame.
+///
+/// We don't use `usize` to represent object sizes on the target platform because Cranelift supports
+/// cross-compilation, and `usize` is a type that depends on the host platform, not the target
+/// platform.
+pub type StackSize = u32;
+
+/// A stack offset.
+///
+/// The location of a stack offset relative to a stack pointer or frame pointer.
+pub type StackOffset = i32;
+
+/// The minimum size of a spill slot in bytes.
+///
+/// ISA implementations are allowed to assume that small types like `b1` and `i8` get a full 4-byte
+/// spill slot.
+const MIN_SPILL_SLOT_SIZE: StackSize = 4;
+
+/// Get the spill slot size to use for `ty`.
+fn spill_size(ty: Type) -> StackSize {
+    cmp::max(MIN_SPILL_SLOT_SIZE, ty.bytes())
+}
+
+/// The kind of a stack slot.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum StackSlotKind {
+    /// A spill slot. This is a stack slot created by the register allocator.
+    SpillSlot,
+
+    /// An explicit stack slot. This is a chunk of stack memory for use by the `stack_load`
+    /// and `stack_store` instructions.
+    ExplicitSlot,
+
+    /// An incoming function argument.
+    ///
+    /// If the current function has more arguments than fits in registers, the remaining arguments
+    /// are passed on the stack by the caller. These incoming arguments are represented as SSA
+    /// values assigned to incoming stack slots.
+    IncomingArg,
+
+    /// An outgoing function argument.
+    ///
+    /// When preparing to call a function whose arguments don't fit in registers, outgoing argument
+    /// stack slots are used to represent individual arguments in the outgoing call frame. These
+    /// stack slots are only valid while setting up a call.
+    OutgoingArg,
+
+    /// An emergency spill slot.
+    ///
+    /// Emergency slots are allocated late when the register's constraint solver needs extra space
+    /// to shuffle registers around. They are only used briefly, and can be reused.
+    EmergencySlot,
+}
+
+impl FromStr for StackSlotKind {
+    type Err = ();
+
+    fn from_str(s: &str) -> Result<Self, ()> {
+        use self::StackSlotKind::*;
+        match s {
+            "explicit_slot" => Ok(ExplicitSlot),
+            "spill_slot" => Ok(SpillSlot),
+            "incoming_arg" => Ok(IncomingArg),
+            "outgoing_arg" => Ok(OutgoingArg),
+            "emergency_slot" => Ok(EmergencySlot),
+            _ => Err(()),
+        }
+    }
+}
+
+impl fmt::Display for StackSlotKind {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        use self::StackSlotKind::*;
+        f.write_str(match *self {
+            ExplicitSlot => "explicit_slot",
+            SpillSlot => "spill_slot",
+            IncomingArg => "incoming_arg",
+            OutgoingArg => "outgoing_arg",
+            EmergencySlot => "emergency_slot",
+        })
+    }
+}
+
+/// Contents of a stack slot.
+#[derive(Clone, Debug)]
+pub struct StackSlotData {
+    /// The kind of stack slot.
+    pub kind: StackSlotKind,
+
+    /// Size of stack slot in bytes.
+    pub size: StackSize,
+
+    /// Offset of stack slot relative to the stack pointer in the caller.
+    ///
+    /// On x86, the base address is the stack pointer *before* the return address was pushed. On
+    /// RISC ISAs, the base address is the value of the stack pointer on entry to the function.
+    ///
+    /// For `OutgoingArg` stack slots, the offset is relative to the current function's stack
+    /// pointer immediately before the call.
+    pub offset: Option<StackOffset>,
+}
+
+impl StackSlotData {
+    /// Create a stack slot with the specified byte size.
+    pub fn new(kind: StackSlotKind, size: StackSize) -> Self {
+        Self {
+            kind,
+            size,
+            offset: None,
+        }
+    }
+
+    /// Get the alignment in bytes of this stack slot given the stack pointer alignment.
+    pub fn alignment(&self, max_align: StackSize) -> StackSize {
+        debug_assert!(max_align.is_power_of_two());
+        // We want to find the largest power of two that divides both `self.size` and `max_align`.
+        // That is the same as isolating the rightmost bit in `x`.
+        let x = self.size | max_align;
+        // C.f. Hacker's delight.
+        x & x.wrapping_neg()
+    }
+}
+
+impl fmt::Display for StackSlotData {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{} {}", self.kind, self.size)?;
+        if let Some(offset) = self.offset {
+            write!(f, ", offset {}", offset)?;
+        }
+        Ok(())
+    }
+}
+
+/// Stack frame manager.
+///
+/// Keep track of all the stack slots used by a function.
+#[derive(Clone, Debug)]
+pub struct StackSlots {
+    /// All allocated stack slots.
+    slots: PrimaryMap<StackSlot, StackSlotData>,
+
+    /// All the outgoing stack slots, ordered by offset.
+    outgoing: Vec<StackSlot>,
+
+    /// All the emergency slots.
+    emergency: Vec<StackSlot>,
+
+    /// The total size of the stack frame.
+    ///
+    /// This is the distance from the stack pointer in the current function to the stack pointer in
+    /// the calling function, so it includes a pushed return address as well as space for outgoing
+    /// call arguments.
+    ///
+    /// This is computed by the `layout()` method.
+    pub frame_size: Option<StackSize>,
+}
+
+/// Stack slot manager functions that behave mostly like an entity map.
+impl StackSlots {
+    /// Create an empty stack slot manager.
+    pub fn new() -> Self {
+        Self {
+            slots: PrimaryMap::new(),
+            outgoing: Vec::new(),
+            emergency: Vec::new(),
+            frame_size: None,
+        }
+    }
+
+    /// Clear out everything.
+    pub fn clear(&mut self) {
+        self.slots.clear();
+        self.outgoing.clear();
+        self.emergency.clear();
+        self.frame_size = None;
+    }
+
+    /// Allocate a new stack slot.
+    ///
+    /// This function should be primarily used by the text format parser. There are more convenient
+    /// functions for creating specific kinds of stack slots below.
+    pub fn push(&mut self, data: StackSlotData) -> StackSlot {
+        self.slots.push(data)
+    }
+
+    /// Check if `ss` is a valid stack slot reference.
+    pub fn is_valid(&self, ss: StackSlot) -> bool {
+        self.slots.is_valid(ss)
+    }
+
+    /// Set the offset of a stack slot.
+    pub fn set_offset(&mut self, ss: StackSlot, offset: StackOffset) {
+        self.slots[ss].offset = Some(offset);
+    }
+
+    /// Get an iterator over all the stack slot keys.
+    pub fn iter(&self) -> Iter<StackSlot, StackSlotData> {
+        self.slots.iter()
+    }
+
+    /// Get an iterator over all the stack slot keys, mutable edition.
+    pub fn iter_mut(&mut self) -> IterMut<StackSlot, StackSlotData> {
+        self.slots.iter_mut()
+    }
+
+    /// Get an iterator over all the stack slot records.
+    pub fn values(&self) -> slice::Iter<StackSlotData> {
+        self.slots.values()
+    }
+
+    /// Get an iterator over all the stack slot records, mutable edition.
+    pub fn values_mut(&mut self) -> slice::IterMut<StackSlotData> {
+        self.slots.values_mut()
+    }
+
+    /// Get an iterator over all the stack slot keys.
+    pub fn keys(&self) -> Keys<StackSlot> {
+        self.slots.keys()
+    }
+
+    /// Get a reference to the next stack slot that would be created by `push()`.
+    ///
+    /// This should just be used by the parser.
+    pub fn next_key(&self) -> StackSlot {
+        self.slots.next_key()
+    }
+}
+
+impl Index<StackSlot> for StackSlots {
+    type Output = StackSlotData;
+
+    fn index(&self, ss: StackSlot) -> &StackSlotData {
+        &self.slots[ss]
+    }
+}
+
+impl IndexMut<StackSlot> for StackSlots {
+    fn index_mut(&mut self, ss: StackSlot) -> &mut StackSlotData {
+        &mut self.slots[ss]
+    }
+}
+
+/// Higher-level stack frame manipulation functions.
+impl StackSlots {
+    /// Create a new spill slot for spilling values of type `ty`.
+    pub fn make_spill_slot(&mut self, ty: Type) -> StackSlot {
+        self.push(StackSlotData::new(StackSlotKind::SpillSlot, spill_size(ty)))
+    }
+
+    /// Create a stack slot representing an incoming function argument.
+    pub fn make_incoming_arg(&mut self, ty: Type, offset: StackOffset) -> StackSlot {
+        let mut data = StackSlotData::new(StackSlotKind::IncomingArg, ty.bytes());
+        debug_assert!(offset <= StackOffset::max_value() - data.size as StackOffset);
+        data.offset = Some(offset);
+        self.push(data)
+    }
+
+    /// Get a stack slot representing an outgoing argument.
+    ///
+    /// This may create a new stack slot, or reuse an existing outgoing stack slot with the
+    /// requested offset and size.
+    ///
+    /// The requested offset is relative to this function's stack pointer immediately before making
+    /// the call.
+    pub fn get_outgoing_arg(&mut self, ty: Type, offset: StackOffset) -> StackSlot {
+        let size = ty.bytes();
+
+        // Look for an existing outgoing stack slot with the same offset and size.
+        let inspos = match self.outgoing.binary_search_by_key(&(offset, size), |&ss| {
+            (self[ss].offset.unwrap(), self[ss].size)
+        }) {
+            Ok(idx) => return self.outgoing[idx],
+            Err(idx) => idx,
+        };
+
+        // No existing slot found. Make one and insert it into `outgoing`.
+        let mut data = StackSlotData::new(StackSlotKind::OutgoingArg, size);
+        debug_assert!(offset <= StackOffset::max_value() - size as StackOffset);
+        data.offset = Some(offset);
+        let ss = self.slots.push(data);
+        self.outgoing.insert(inspos, ss);
+        ss
+    }
+
+    /// Get an emergency spill slot that can be used to store a `ty` value.
+    ///
+    /// This may allocate a new slot, or it may reuse an existing emergency spill slot, excluding
+    /// any slots in the `in_use` list.
+    pub fn get_emergency_slot(
+        &mut self,
+        ty: Type,
+        in_use: &[PackedOption<StackSlot>],
+    ) -> StackSlot {
+        let size = spill_size(ty);
+
+        // Find the smallest existing slot that can fit the type.
+        if let Some(&ss) = self
+            .emergency
+            .iter()
+            .filter(|&&ss| self[ss].size >= size && !in_use.contains(&ss.into()))
+            .min_by_key(|&&ss| self[ss].size)
+        {
+            return ss;
+        }
+
+        // Alternatively, use the largest available slot and make it larger.
+        if let Some(&ss) = self
+            .emergency
+            .iter()
+            .filter(|&&ss| !in_use.contains(&ss.into()))
+            .max_by_key(|&&ss| self[ss].size)
+        {
+            self.slots[ss].size = size;
+            return ss;
+        }
+
+        // No existing slot found. Make one and insert it into `emergency`.
+        let data = StackSlotData::new(StackSlotKind::EmergencySlot, size);
+        let ss = self.slots.push(data);
+        self.emergency.push(ss);
+        ss
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::ir::types;
+    use crate::ir::Function;
+    use std::string::ToString;
+
+    #[test]
+    fn stack_slot() {
+        let mut func = Function::new();
+
+        let ss0 = func.create_stack_slot(StackSlotData::new(StackSlotKind::IncomingArg, 4));
+        let ss1 = func.create_stack_slot(StackSlotData::new(StackSlotKind::SpillSlot, 8));
+        assert_eq!(ss0.to_string(), "ss0");
+        assert_eq!(ss1.to_string(), "ss1");
+
+        assert_eq!(func.stack_slots[ss0].size, 4);
+        assert_eq!(func.stack_slots[ss1].size, 8);
+
+        assert_eq!(func.stack_slots[ss0].to_string(), "incoming_arg 4");
+        assert_eq!(func.stack_slots[ss1].to_string(), "spill_slot 8");
+    }
+
+    #[test]
+    fn outgoing() {
+        let mut sss = StackSlots::new();
+
+        let ss0 = sss.get_outgoing_arg(types::I32, 8);
+        let ss1 = sss.get_outgoing_arg(types::I32, 4);
+        let ss2 = sss.get_outgoing_arg(types::I64, 8);
+
+        assert_eq!(sss[ss0].offset, Some(8));
+        assert_eq!(sss[ss0].size, 4);
+
+        assert_eq!(sss[ss1].offset, Some(4));
+        assert_eq!(sss[ss1].size, 4);
+
+        assert_eq!(sss[ss2].offset, Some(8));
+        assert_eq!(sss[ss2].size, 8);
+
+        assert_eq!(sss.get_outgoing_arg(types::I32, 8), ss0);
+        assert_eq!(sss.get_outgoing_arg(types::I32, 4), ss1);
+        assert_eq!(sss.get_outgoing_arg(types::I64, 8), ss2);
+    }
+
+    #[test]
+    fn alignment() {
+        let slot = StackSlotData::new(StackSlotKind::SpillSlot, 8);
+
+        assert_eq!(slot.alignment(4), 4);
+        assert_eq!(slot.alignment(8), 8);
+        assert_eq!(slot.alignment(16), 8);
+
+        let slot2 = StackSlotData::new(StackSlotKind::ExplicitSlot, 24);
+
+        assert_eq!(slot2.alignment(4), 4);
+        assert_eq!(slot2.alignment(8), 8);
+        assert_eq!(slot2.alignment(16), 8);
+        assert_eq!(slot2.alignment(32), 8);
+    }
+
+    #[test]
+    fn emergency() {
+        let mut sss = StackSlots::new();
+
+        let ss0 = sss.get_emergency_slot(types::I32, &[]);
+        assert_eq!(sss[ss0].size, 4);
+
+        // When a smaller size is requested, we should simply get the same slot back.
+        assert_eq!(sss.get_emergency_slot(types::I8, &[]), ss0);
+        assert_eq!(sss[ss0].size, 4);
+        assert_eq!(sss.get_emergency_slot(types::F32, &[]), ss0);
+        assert_eq!(sss[ss0].size, 4);
+
+        // Ask for a larger size and the slot should grow.
+        assert_eq!(sss.get_emergency_slot(types::F64, &[]), ss0);
+        assert_eq!(sss[ss0].size, 8);
+
+        // When one slot is in use, we should get a new one.
+        let ss1 = sss.get_emergency_slot(types::I32, &[None.into(), ss0.into()]);
+        assert_eq!(sss[ss0].size, 8);
+        assert_eq!(sss[ss1].size, 4);
+
+        // Now we should get the smallest fit of the two available slots.
+        assert_eq!(sss.get_emergency_slot(types::F32, &[]), ss1);
+        assert_eq!(sss.get_emergency_slot(types::F64, &[]), ss0);
+    }
+}
--- a/cranelift/codegen/src/ir/table.rs
+++ b/cranelift/codegen/src/ir/table.rs
@@ -0,0 +1,36 @@
+//! Tables.
+
+use crate::ir::immediates::Uimm64;
+use crate::ir::{GlobalValue, Type};
+use core::fmt;
+
+/// Information about a table declaration.
+#[derive(Clone)]
+pub struct TableData {
+    /// Global value giving the address of the start of the table.
+    pub base_gv: GlobalValue,
+
+    /// Guaranteed minimum table size in elements. Table accesses before `min_size` don't need
+    /// bounds checking.
+    pub min_size: Uimm64,
+
+    /// Global value giving the current bound of the table, in elements.
+    pub bound_gv: GlobalValue,
+
+    /// The size of a table element, in bytes.
+    pub element_size: Uimm64,
+
+    /// The index type for the table.
+    pub index_type: Type,
+}
+
+impl fmt::Display for TableData {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        f.write_str("dynamic")?;
+        write!(
+            f,
+            " {}, min {}, bound {}, element_size {}, index_type {}",
+            self.base_gv, self.min_size, self.bound_gv, self.element_size, self.index_type
+        )
+    }
+}
--- a/cranelift/codegen/src/ir/trapcode.rs
+++ b/cranelift/codegen/src/ir/trapcode.rs
@@ -0,0 +1,134 @@
+//! Trap codes describing the reason for a trap.
+
+use core::fmt::{self, Display, Formatter};
+use core::str::FromStr;
+
+/// A trap code describing the reason for a trap.
+///
+/// All trap instructions have an explicit trap code.
+#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
+pub enum TrapCode {
+    /// The current stack space was exhausted.
+    ///
+    /// On some platforms, a stack overflow may also be indicated by a segmentation fault from the
+    /// stack guard page.
+    StackOverflow,
+
+    /// A `heap_addr` instruction detected an out-of-bounds error.
+    ///
+    /// Note that not all out-of-bounds heap accesses are reported this way;
+    /// some are detected by a segmentation fault on the heap unmapped or
+    /// offset-guard pages.
+    HeapOutOfBounds,
+
+    /// A `table_addr` instruction detected an out-of-bounds error.
+    TableOutOfBounds,
+
+    /// Other bounds checking error.
+    OutOfBounds,
+
+    /// Indirect call to a null table entry.
+    IndirectCallToNull,
+
+    /// Signature mismatch on indirect call.
+    BadSignature,
+
+    /// An integer arithmetic operation caused an overflow.
+    IntegerOverflow,
+
+    /// An integer division by zero.
+    IntegerDivisionByZero,
+
+    /// Failed float-to-int conversion.
+    BadConversionToInteger,
+
+    /// Code that was supposed to have been unreachable was reached.
+    UnreachableCodeReached,
+
+    /// Execution has potentially run too long and may be interrupted.
+    /// This trap is resumable.
+    Interrupt,
+
+    /// A user-defined trap code.
+    User(u16),
+}
+
+impl Display for TrapCode {
+    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+        use self::TrapCode::*;
+        let identifier = match *self {
+            StackOverflow => "stk_ovf",
+            HeapOutOfBounds => "heap_oob",
+            TableOutOfBounds => "table_oob",
+            OutOfBounds => "oob",
+            IndirectCallToNull => "icall_null",
+            BadSignature => "bad_sig",
+            IntegerOverflow => "int_ovf",
+            IntegerDivisionByZero => "int_divz",
+            BadConversionToInteger => "bad_toint",
+            UnreachableCodeReached => "unreachable",
+            Interrupt => "interrupt",
+            User(x) => return write!(f, "user{}", x),
+        };
+        f.write_str(identifier)
+    }
+}
+
+impl FromStr for TrapCode {
+    type Err = ();
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        use self::TrapCode::*;
+        match s {
+            "stk_ovf" => Ok(StackOverflow),
+            "heap_oob" => Ok(HeapOutOfBounds),
+            "table_oob" => Ok(TableOutOfBounds),
+            "oob" => Ok(OutOfBounds),
+            "icall_null" => Ok(IndirectCallToNull),
+            "bad_sig" => Ok(BadSignature),
+            "int_ovf" => Ok(IntegerOverflow),
+            "int_divz" => Ok(IntegerDivisionByZero),
+            "bad_toint" => Ok(BadConversionToInteger),
+            "unreachable" => Ok(UnreachableCodeReached),
+            "interrupt" => Ok(Interrupt),
+            _ if s.starts_with("user") => s[4..].parse().map(User).map_err(|_| ()),
+            _ => Err(()),
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::string::ToString;
+
+    // Everything but user-defined codes.
+    const CODES: [TrapCode; 11] = [
+        TrapCode::StackOverflow,
+        TrapCode::HeapOutOfBounds,
+        TrapCode::TableOutOfBounds,
+        TrapCode::OutOfBounds,
+        TrapCode::IndirectCallToNull,
+        TrapCode::BadSignature,
+        TrapCode::IntegerOverflow,
+        TrapCode::IntegerDivisionByZero,
+        TrapCode::BadConversionToInteger,
+        TrapCode::UnreachableCodeReached,
+        TrapCode::Interrupt,
+    ];
+
+    #[test]
+    fn display() {
+        for r in &CODES {
+            let tc = *r;
+            assert_eq!(tc.to_string().parse(), Ok(tc));
+        }
+        assert_eq!("bogus".parse::<TrapCode>(), Err(()));
+
+        assert_eq!(TrapCode::User(17).to_string(), "user17");
+        assert_eq!("user22".parse(), Ok(TrapCode::User(22)));
+        assert_eq!("user".parse::<TrapCode>(), Err(()));
+        assert_eq!("user-1".parse::<TrapCode>(), Err(()));
+        assert_eq!("users".parse::<TrapCode>(), Err(()));
+    }
+}
--- a/cranelift/codegen/src/ir/types.rs
+++ b/cranelift/codegen/src/ir/types.rs
@@ -0,0 +1,466 @@
+//! Common types for the Cranelift code generator.
+
+use core::default::Default;
+use core::fmt::{self, Debug, Display, Formatter};
+use target_lexicon::{PointerWidth, Triple};
+
+/// The type of an SSA value.
+///
+/// The `INVALID` type isn't a real type, and is used as a placeholder in the IR where a type
+/// field is present put no type is needed, such as the controlling type variable for a
+/// non-polymorphic instruction.
+///
+/// Basic integer types: `I8`, `I16`, `I32`, and `I64`. These types are sign-agnostic.
+///
+/// Basic floating point types: `F32` and `F64`. IEEE single and double precision.
+///
+/// Boolean types: `B1`, `B8`, `B16`, `B32`, and `B64`. These all encode 'true' or 'false'. The
+/// larger types use redundant bits.
+///
+/// SIMD vector types have power-of-two lanes, up to 256. Lanes can be any int/float/bool type.
+///
+#[derive(Copy, Clone, PartialEq, Eq, Hash)]
+pub struct Type(u8);
+
+/// Not a valid type. Can't be loaded or stored. Can't be part of a SIMD vector.
+pub const INVALID: Type = Type(0);
+
+/// Start of the lane types. See also `meta-python/cdsl/types.py`.
+const LANE_BASE: u8 = 0x70;
+
+/// Start of the 2-lane vector types.
+const VECTOR_BASE: u8 = LANE_BASE + 16;
+
+// Include code generated by `cranelift-codegen/meta/gen_types.rs`. This file contains constant
+// definitions for all the scalar types as well as common vector types for 64, 128, 256, and
+// 512-bit SIMD vectors.
+include!(concat!(env!("OUT_DIR"), "/types.rs"));
+
+impl Type {
+    /// Get the lane type of this SIMD vector type.
+    ///
+    /// A lane type is the same as a SIMD vector type with one lane, so it returns itself.
+    pub fn lane_type(self) -> Self {
+        if self.0 < VECTOR_BASE {
+            self
+        } else {
+            Type(LANE_BASE | (self.0 & 0x0f))
+        }
+    }
+
+    /// Get log_2 of the number of bits in a lane.
+    pub fn log2_lane_bits(self) -> u8 {
+        match self.lane_type() {
+            B1 => 0,
+            B8 | I8 => 3,
+            B16 | I16 => 4,
+            B32 | I32 | F32 => 5,
+            B64 | I64 | F64 => 6,
+            _ => 0,
+        }
+    }
+
+    /// Get the number of bits in a lane.
+    pub fn lane_bits(self) -> u8 {
+        match self.lane_type() {
+            B1 => 1,
+            B8 | I8 => 8,
+            B16 | I16 => 16,
+            B32 | I32 | F32 => 32,
+            B64 | I64 | F64 => 64,
+            _ => 0,
+        }
+    }
+
+    /// Get an integer type with the requested number of bits.
+    pub fn int(bits: u16) -> Option<Self> {
+        match bits {
+            8 => Some(I8),
+            16 => Some(I16),
+            32 => Some(I32),
+            64 => Some(I64),
+            _ => None,
+        }
+    }
+
+    /// Get a type with the same number of lanes as `self`, but using `lane` as the lane type.
+    fn replace_lanes(self, lane: Self) -> Self {
+        debug_assert!(lane.is_lane() && !self.is_special());
+        Type((lane.0 & 0x0f) | (self.0 & 0xf0))
+    }
+
+    /// Get a type with the same number of lanes as this type, but with the lanes replaced by
+    /// booleans of the same size.
+    ///
+    /// Scalar types are treated as vectors with one lane, so they are converted to the multi-bit
+    /// boolean types.
+    pub fn as_bool_pedantic(self) -> Self {
+        // Replace the low 4 bits with the boolean version, preserve the high 4 bits.
+        self.replace_lanes(match self.lane_type() {
+            B8 | I8 => B8,
+            B16 | I16 => B16,
+            B32 | I32 | F32 => B32,
+            B64 | I64 | F64 => B64,
+            _ => B1,
+        })
+    }
+
+    /// Get a type with the same number of lanes as this type, but with the lanes replaced by
+    /// booleans of the same size.
+    ///
+    /// Scalar types are all converted to `b1` which is usually what you want.
+    pub fn as_bool(self) -> Self {
+        if !self.is_vector() {
+            B1
+        } else {
+            self.as_bool_pedantic()
+        }
+    }
+
+    /// Get a type with the same number of lanes as this type, but with lanes that are half the
+    /// number of bits.
+    pub fn half_width(self) -> Option<Self> {
+        Some(self.replace_lanes(match self.lane_type() {
+            I16 => I8,
+            I32 => I16,
+            I64 => I32,
+            F64 => F32,
+            B16 => B8,
+            B32 => B16,
+            B64 => B32,
+            _ => return None,
+        }))
+    }
+
+    /// Get a type with the same number of lanes as this type, but with lanes that are twice the
+    /// number of bits.
+    pub fn double_width(self) -> Option<Self> {
+        Some(self.replace_lanes(match self.lane_type() {
+            I8 => I16,
+            I16 => I32,
+            I32 => I64,
+            F32 => F64,
+            B8 => B16,
+            B16 => B32,
+            B32 => B64,
+            _ => return None,
+        }))
+    }
+
+    /// Is this the INVALID type?
+    pub fn is_invalid(self) -> bool {
+        self == INVALID
+    }
+
+    /// Is this a special type?
+    pub fn is_special(self) -> bool {
+        self.0 < LANE_BASE
+    }
+
+    /// Is this a lane type?
+    ///
+    /// This is a scalar type that can also appear as the lane type of a SIMD vector.
+    pub fn is_lane(self) -> bool {
+        LANE_BASE <= self.0 && self.0 < VECTOR_BASE
+    }
+
+    /// Is this a SIMD vector type?
+    ///
+    /// A vector type has 2 or more lanes.
+    pub fn is_vector(self) -> bool {
+        self.0 >= VECTOR_BASE
+    }
+
+    /// Is this a scalar boolean type?
+    pub fn is_bool(self) -> bool {
+        match self {
+            B1 | B8 | B16 | B32 | B64 => true,
+            _ => false,
+        }
+    }
+
+    /// Is this a scalar integer type?
+    pub fn is_int(self) -> bool {
+        match self {
+            I8 | I16 | I32 | I64 => true,
+            _ => false,
+        }
+    }
+
+    /// Is this a scalar floating point type?
+    pub fn is_float(self) -> bool {
+        match self {
+            F32 | F64 => true,
+            _ => false,
+        }
+    }
+
+    /// Is this a CPU flags type?
+    pub fn is_flags(self) -> bool {
+        match self {
+            IFLAGS | FFLAGS => true,
+            _ => false,
+        }
+    }
+
+    /// Get log_2 of the number of lanes in this SIMD vector type.
+    ///
+    /// All SIMD types have a lane count that is a power of two and no larger than 256, so this
+    /// will be a number in the range 0-8.
+    ///
+    /// A scalar type is the same as a SIMD vector type with one lane, so it returns 0.
+    pub fn log2_lane_count(self) -> u8 {
+        self.0.saturating_sub(LANE_BASE) >> 4
+    }
+
+    /// Get the number of lanes in this SIMD vector type.
+    ///
+    /// A scalar type is the same as a SIMD vector type with one lane, so it returns 1.
+    pub fn lane_count(self) -> u16 {
+        1 << self.log2_lane_count()
+    }
+
+    /// Get the total number of bits used to represent this type.
+    pub fn bits(self) -> u16 {
+        u16::from(self.lane_bits()) * self.lane_count()
+    }
+
+    /// Get the number of bytes used to store this type in memory.
+    pub fn bytes(self) -> u32 {
+        (u32::from(self.bits()) + 7) / 8
+    }
+
+    /// Get a SIMD vector type with `n` times more lanes than this one.
+    ///
+    /// If this is a scalar type, this produces a SIMD type with this as a lane type and `n` lanes.
+    ///
+    /// If this is already a SIMD vector type, this produces a SIMD vector type with `n *
+    /// self.lane_count()` lanes.
+    pub fn by(self, n: u16) -> Option<Self> {
+        if self.lane_bits() == 0 || !n.is_power_of_two() {
+            return None;
+        }
+        let log2_lanes: u32 = n.trailing_zeros();
+        let new_type = u32::from(self.0) + (log2_lanes << 4);
+        if new_type < 0x100 {
+            Some(Type(new_type as u8))
+        } else {
+            None
+        }
+    }
+
+    /// Get a SIMD vector with half the number of lanes.
+    ///
+    /// There is no `double_vector()` method. Use `t.by(2)` instead.
+    pub fn half_vector(self) -> Option<Self> {
+        if self.is_vector() {
+            Some(Type(self.0 - 0x10))
+        } else {
+            None
+        }
+    }
+
+    /// Index of this type, for use with hash tables etc.
+    pub fn index(self) -> usize {
+        usize::from(self.0)
+    }
+
+    /// True iff:
+    ///
+    /// 1. `self.lane_count() == other.lane_count()` and
+    /// 2. `self.lane_bits() >= other.lane_bits()`
+    pub fn wider_or_equal(self, other: Self) -> bool {
+        self.lane_count() == other.lane_count() && self.lane_bits() >= other.lane_bits()
+    }
+
+    /// Return the pointer type for the given target triple.
+    pub fn triple_pointer_type(triple: &Triple) -> Self {
+        match triple.pointer_width() {
+            Ok(PointerWidth::U16) => I16,
+            Ok(PointerWidth::U32) => I32,
+            Ok(PointerWidth::U64) => I64,
+            Err(()) => panic!("unable to determine architecture pointer width"),
+        }
+    }
+}
+
+impl Display for Type {
+    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+        if self.is_bool() {
+            write!(f, "b{}", self.lane_bits())
+        } else if self.is_int() {
+            write!(f, "i{}", self.lane_bits())
+        } else if self.is_float() {
+            write!(f, "f{}", self.lane_bits())
+        } else if self.is_vector() {
+            write!(f, "{}x{}", self.lane_type(), self.lane_count())
+        } else {
+            f.write_str(match *self {
+                IFLAGS => "iflags",
+                FFLAGS => "fflags",
+                INVALID => panic!("INVALID encountered"),
+                _ => panic!("Unknown Type(0x{:x})", self.0),
+            })
+        }
+    }
+}
+
+impl Debug for Type {
+    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+        if self.is_bool() {
+            write!(f, "types::B{}", self.lane_bits())
+        } else if self.is_int() {
+            write!(f, "types::I{}", self.lane_bits())
+        } else if self.is_float() {
+            write!(f, "types::F{}", self.lane_bits())
+        } else if self.is_vector() {
+            write!(f, "{:?}X{}", self.lane_type(), self.lane_count())
+        } else {
+            match *self {
+                INVALID => write!(f, "types::INVALID"),
+                IFLAGS => write!(f, "types::IFLAGS"),
+                FFLAGS => write!(f, "types::FFLAGS"),
+                _ => write!(f, "Type(0x{:x})", self.0),
+            }
+        }
+    }
+}
+
+impl Default for Type {
+    fn default() -> Self {
+        INVALID
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::string::ToString;
+
+    #[test]
+    fn basic_scalars() {
+        assert_eq!(INVALID, INVALID.lane_type());
+        assert_eq!(0, INVALID.bits());
+        assert_eq!(IFLAGS, IFLAGS.lane_type());
+        assert_eq!(0, IFLAGS.bits());
+        assert_eq!(FFLAGS, FFLAGS.lane_type());
+        assert_eq!(0, FFLAGS.bits());
+        assert_eq!(B1, B1.lane_type());
+        assert_eq!(B8, B8.lane_type());
+        assert_eq!(B16, B16.lane_type());
+        assert_eq!(B32, B32.lane_type());
+        assert_eq!(B64, B64.lane_type());
+        assert_eq!(I8, I8.lane_type());
+        assert_eq!(I16, I16.lane_type());
+        assert_eq!(I32, I32.lane_type());
+        assert_eq!(I64, I64.lane_type());
+        assert_eq!(F32, F32.lane_type());
+        assert_eq!(F64, F64.lane_type());
+
+        assert_eq!(INVALID.lane_bits(), 0);
+        assert_eq!(IFLAGS.lane_bits(), 0);
+        assert_eq!(FFLAGS.lane_bits(), 0);
+        assert_eq!(B1.lane_bits(), 1);
+        assert_eq!(B8.lane_bits(), 8);
+        assert_eq!(B16.lane_bits(), 16);
+        assert_eq!(B32.lane_bits(), 32);
+        assert_eq!(B64.lane_bits(), 64);
+        assert_eq!(I8.lane_bits(), 8);
+        assert_eq!(I16.lane_bits(), 16);
+        assert_eq!(I32.lane_bits(), 32);
+        assert_eq!(I64.lane_bits(), 64);
+        assert_eq!(F32.lane_bits(), 32);
+        assert_eq!(F64.lane_bits(), 64);
+    }
+
+    #[test]
+    fn typevar_functions() {
+        assert_eq!(INVALID.half_width(), None);
+        assert_eq!(INVALID.half_width(), None);
+        assert_eq!(FFLAGS.half_width(), None);
+        assert_eq!(B1.half_width(), None);
+        assert_eq!(B8.half_width(), None);
+        assert_eq!(B16.half_width(), Some(B8));
+        assert_eq!(B32.half_width(), Some(B16));
+        assert_eq!(B64.half_width(), Some(B32));
+        assert_eq!(I8.half_width(), None);
+        assert_eq!(I16.half_width(), Some(I8));
+        assert_eq!(I32.half_width(), Some(I16));
+        assert_eq!(I32X4.half_width(), Some(I16X4));
+        assert_eq!(I64.half_width(), Some(I32));
+        assert_eq!(F32.half_width(), None);
+        assert_eq!(F64.half_width(), Some(F32));
+
+        assert_eq!(INVALID.double_width(), None);
+        assert_eq!(IFLAGS.double_width(), None);
+        assert_eq!(FFLAGS.double_width(), None);
+        assert_eq!(B1.double_width(), None);
+        assert_eq!(B8.double_width(), Some(B16));
+        assert_eq!(B16.double_width(), Some(B32));
+        assert_eq!(B32.double_width(), Some(B64));
+        assert_eq!(B64.double_width(), None);
+        assert_eq!(I8.double_width(), Some(I16));
+        assert_eq!(I16.double_width(), Some(I32));
+        assert_eq!(I32.double_width(), Some(I64));
+        assert_eq!(I32X4.double_width(), Some(I64X4));
+        assert_eq!(I64.double_width(), None);
+        assert_eq!(F32.double_width(), Some(F64));
+        assert_eq!(F64.double_width(), None);
+    }
+
+    #[test]
+    fn vectors() {
+        let big = F64.by(256).unwrap();
+        assert_eq!(big.lane_bits(), 64);
+        assert_eq!(big.lane_count(), 256);
+        assert_eq!(big.bits(), 64 * 256);
+
+        assert_eq!(big.half_vector().unwrap().to_string(), "f64x128");
+        assert_eq!(B1.by(2).unwrap().half_vector().unwrap().to_string(), "b1");
+        assert_eq!(I32.half_vector(), None);
+        assert_eq!(INVALID.half_vector(), None);
+
+        // Check that the generated constants match the computed vector types.
+        assert_eq!(I32.by(4), Some(I32X4));
+        assert_eq!(F64.by(8), Some(F64X8));
+    }
+
+    #[test]
+    fn format_scalars() {
+        assert_eq!(IFLAGS.to_string(), "iflags");
+        assert_eq!(FFLAGS.to_string(), "fflags");
+        assert_eq!(B1.to_string(), "b1");
+        assert_eq!(B8.to_string(), "b8");
+        assert_eq!(B16.to_string(), "b16");
+        assert_eq!(B32.to_string(), "b32");
+        assert_eq!(B64.to_string(), "b64");
+        assert_eq!(I8.to_string(), "i8");
+        assert_eq!(I16.to_string(), "i16");
+        assert_eq!(I32.to_string(), "i32");
+        assert_eq!(I64.to_string(), "i64");
+        assert_eq!(F32.to_string(), "f32");
+        assert_eq!(F64.to_string(), "f64");
+    }
+
+    #[test]
+    fn format_vectors() {
+        assert_eq!(B1.by(8).unwrap().to_string(), "b1x8");
+        assert_eq!(B8.by(1).unwrap().to_string(), "b8");
+        assert_eq!(B16.by(256).unwrap().to_string(), "b16x256");
+        assert_eq!(B32.by(4).unwrap().by(2).unwrap().to_string(), "b32x8");
+        assert_eq!(B64.by(8).unwrap().to_string(), "b64x8");
+        assert_eq!(I8.by(64).unwrap().to_string(), "i8x64");
+        assert_eq!(F64.by(2).unwrap().to_string(), "f64x2");
+        assert_eq!(I8.by(3), None);
+        assert_eq!(I8.by(512), None);
+        assert_eq!(INVALID.by(4), None);
+    }
+
+    #[test]
+    fn as_bool() {
+        assert_eq!(I32X4.as_bool(), B32X4);
+        assert_eq!(I32.as_bool(), B1);
+        assert_eq!(I32X4.as_bool_pedantic(), B32X4);
+        assert_eq!(I32.as_bool_pedantic(), B32);
+    }
+}
--- a/cranelift/codegen/src/ir/valueloc.rs
+++ b/cranelift/codegen/src/ir/valueloc.rs
@@ -0,0 +1,161 @@
+//! Value locations.
+//!
+//! The register allocator assigns every SSA value to either a register or a stack slot. This
+//! assignment is represented by a `ValueLoc` object.
+
+use crate::ir::StackSlot;
+use crate::isa::{RegInfo, RegUnit};
+use core::fmt;
+
+/// Value location.
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+pub enum ValueLoc {
+    /// This value has not been assigned to a location yet.
+    Unassigned,
+    /// Value is assigned to a register.
+    Reg(RegUnit),
+    /// Value is assigned to a stack slot.
+    Stack(StackSlot),
+}
+
+impl Default for ValueLoc {
+    fn default() -> Self {
+        ValueLoc::Unassigned
+    }
+}
+
+impl ValueLoc {
+    /// Is this an assigned location? (That is, not `Unassigned`).
+    pub fn is_assigned(self) -> bool {
+        match self {
+            ValueLoc::Unassigned => false,
+            _ => true,
+        }
+    }
+
+    /// Get the register unit of this location, or panic.
+    pub fn unwrap_reg(self) -> RegUnit {
+        match self {
+            ValueLoc::Reg(ru) => ru,
+            _ => panic!("Expected register: {:?}", self),
+        }
+    }
+
+    /// Get the stack slot of this location, or panic.
+    pub fn unwrap_stack(self) -> StackSlot {
+        match self {
+            ValueLoc::Stack(ss) => ss,
+            _ => panic!("Expected stack slot: {:?}", self),
+        }
+    }
+
+    /// Return an object that can display this value location, using the register info from the
+    /// target ISA.
+    pub fn display<'a, R: Into<Option<&'a RegInfo>>>(self, regs: R) -> DisplayValueLoc<'a> {
+        DisplayValueLoc(self, regs.into())
+    }
+}
+
+/// Displaying a `ValueLoc` correctly requires the associated `RegInfo` from the target ISA.
+/// Without the register info, register units are simply show as numbers.
+///
+/// The `DisplayValueLoc` type can display the contained `ValueLoc`.
+pub struct DisplayValueLoc<'a>(ValueLoc, Option<&'a RegInfo>);
+
+impl<'a> fmt::Display for DisplayValueLoc<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self.0 {
+            ValueLoc::Unassigned => write!(f, "-"),
+            ValueLoc::Reg(ru) => match self.1 {
+                Some(regs) => write!(f, "{}", regs.display_regunit(ru)),
+                None => write!(f, "%{}", ru),
+            },
+            ValueLoc::Stack(ss) => write!(f, "{}", ss),
+        }
+    }
+}
+
+/// Function argument location.
+///
+/// The ABI specifies how arguments are passed to a function, and where return values appear after
+/// the call. Just like a `ValueLoc`, function arguments can be passed in registers or on the
+/// stack.
+///
+/// Function arguments on the stack are accessed differently for the incoming arguments to the
+/// current function and the outgoing arguments to a called external function. For this reason,
+/// the location of stack arguments is described as an offset into the array of function arguments
+/// on the stack.
+///
+/// An `ArgumentLoc` can be translated to a `ValueLoc` only when we know if we're talking about an
+/// incoming argument or an outgoing argument.
+///
+/// - For stack arguments, different `StackSlot` entities are used to represent incoming and
+///   outgoing arguments.
+/// - For register arguments, there is usually no difference, but if we ever add support for a
+///   register-window ISA like SPARC, register arguments would also need to be translated.
+#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
+pub enum ArgumentLoc {
+    /// This argument has not been assigned to a location yet.
+    Unassigned,
+    /// Argument is passed in a register.
+    Reg(RegUnit),
+    /// Argument is passed on the stack, at the given byte offset into the argument array.
+    Stack(i32),
+}
+
+impl Default for ArgumentLoc {
+    fn default() -> Self {
+        ArgumentLoc::Unassigned
+    }
+}
+
+impl ArgumentLoc {
+    /// Is this an assigned location? (That is, not `Unassigned`).
+    pub fn is_assigned(self) -> bool {
+        match self {
+            ArgumentLoc::Unassigned => false,
+            _ => true,
+        }
+    }
+
+    /// Is this a register location?
+    pub fn is_reg(self) -> bool {
+        match self {
+            ArgumentLoc::Reg(_) => true,
+            _ => false,
+        }
+    }
+
+    /// Is this a stack location?
+    pub fn is_stack(self) -> bool {
+        match self {
+            ArgumentLoc::Stack(_) => true,
+            _ => false,
+        }
+    }
+
+    /// Return an object that can display this argument location, using the register info from the
+    /// target ISA.
+    pub fn display<'a, R: Into<Option<&'a RegInfo>>>(self, regs: R) -> DisplayArgumentLoc<'a> {
+        DisplayArgumentLoc(self, regs.into())
+    }
+}
+
+/// Displaying a `ArgumentLoc` correctly requires the associated `RegInfo` from the target ISA.
+/// Without the register info, register units are simply show as numbers.
+///
+/// The `DisplayArgumentLoc` type can display the contained `ArgumentLoc`.
+pub struct DisplayArgumentLoc<'a>(ArgumentLoc, Option<&'a RegInfo>);
+
+impl<'a> fmt::Display for DisplayArgumentLoc<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self.0 {
+            ArgumentLoc::Unassigned => write!(f, "-"),
+            ArgumentLoc::Reg(ru) => match self.1 {
+                Some(regs) => write!(f, "{}", regs.display_regunit(ru)),
+                None => write!(f, "%{}", ru),
+            },
+            ArgumentLoc::Stack(offset) => write!(f, "{}", offset),
+        }
+    }
+}
--- a/cranelift/codegen/src/isa/arm32/abi.rs
+++ b/cranelift/codegen/src/isa/arm32/abi.rs
@@ -0,0 +1,35 @@
+//! ARM ABI implementation.
+
+use super::registers::{D, GPR, Q, S};
+use crate::ir;
+use crate::isa::RegClass;
+use crate::regalloc::RegisterSet;
+use crate::settings as shared_settings;
+
+/// Legalize `sig`.
+pub fn legalize_signature(
+    _sig: &mut ir::Signature,
+    _flags: &shared_settings::Flags,
+    _current: bool,
+) {
+    unimplemented!()
+}
+
+/// Get register class for a type appearing in a legalized signature.
+pub fn regclass_for_abi_type(ty: ir::Type) -> RegClass {
+    if ty.is_int() {
+        GPR
+    } else {
+        match ty.bits() {
+            32 => S,
+            64 => D,
+            128 => Q,
+            _ => panic!("Unexpected {} ABI type for arm32", ty),
+        }
+    }
+}
+
+/// Get the set of allocatable registers for `func`.
+pub fn allocatable_registers(_func: &ir::Function) -> RegisterSet {
+    unimplemented!()
+}
--- a/cranelift/codegen/src/isa/arm32/binemit.rs
+++ b/cranelift/codegen/src/isa/arm32/binemit.rs
@@ -0,0 +1,7 @@
+//! Emitting binary ARM32 machine code.
+
+use crate::binemit::{bad_encoding, CodeSink};
+use crate::ir::{Function, Inst};
+use crate::regalloc::RegDiversions;
+
+include!(concat!(env!("OUT_DIR"), "/binemit-arm32.rs"));
--- a/cranelift/codegen/src/isa/arm32/enc_tables.rs
+++ b/cranelift/codegen/src/isa/arm32/enc_tables.rs
@@ -0,0 +1,9 @@
+//! Encoding tables for ARM32 ISA.
+
+use crate::isa;
+use crate::isa::constraints::*;
+use crate::isa::enc_tables::*;
+use crate::isa::encoding::RecipeSizing;
+
+include!(concat!(env!("OUT_DIR"), "/encoding-arm32.rs"));
+include!(concat!(env!("OUT_DIR"), "/legalize-arm32.rs"));
--- a/cranelift/codegen/src/isa/arm32/mod.rs
+++ b/cranelift/codegen/src/isa/arm32/mod.rs
@@ -0,0 +1,136 @@
+//! ARM 32-bit Instruction Set Architecture.
+
+mod abi;
+mod binemit;
+mod enc_tables;
+mod registers;
+pub mod settings;
+
+use super::super::settings as shared_settings;
+#[cfg(feature = "testing_hooks")]
+use crate::binemit::CodeSink;
+use crate::binemit::{emit_function, MemoryCodeSink};
+use crate::ir;
+use crate::isa::enc_tables::{self as shared_enc_tables, lookup_enclist, Encodings};
+use crate::isa::Builder as IsaBuilder;
+use crate::isa::{EncInfo, RegClass, RegInfo, TargetIsa};
+use crate::regalloc;
+use core::fmt;
+use std::boxed::Box;
+use target_lexicon::{Architecture, Triple};
+
+#[allow(dead_code)]
+struct Isa {
+    triple: Triple,
+    shared_flags: shared_settings::Flags,
+    isa_flags: settings::Flags,
+    cpumode: &'static [shared_enc_tables::Level1Entry<u16>],
+}
+
+/// Get an ISA builder for creating ARM32 targets.
+pub fn isa_builder(triple: Triple) -> IsaBuilder {
+    IsaBuilder {
+        triple,
+        setup: settings::builder(),
+        constructor: isa_constructor,
+    }
+}
+
+fn isa_constructor(
+    triple: Triple,
+    shared_flags: shared_settings::Flags,
+    builder: shared_settings::Builder,
+) -> Box<TargetIsa> {
+    let level1 = match triple.architecture {
+        Architecture::Thumbv6m | Architecture::Thumbv7em | Architecture::Thumbv7m => {
+            &enc_tables::LEVEL1_T32[..]
+        }
+        Architecture::Arm
+        | Architecture::Armv4t
+        | Architecture::Armv5te
+        | Architecture::Armv7
+        | Architecture::Armv7s => &enc_tables::LEVEL1_A32[..],
+        _ => panic!(),
+    };
+    Box::new(Isa {
+        triple,
+        isa_flags: settings::Flags::new(&shared_flags, builder),
+        shared_flags,
+        cpumode: level1,
+    })
+}
+
+impl TargetIsa for Isa {
+    fn name(&self) -> &'static str {
+        "arm32"
+    }
+
+    fn triple(&self) -> &Triple {
+        &self.triple
+    }
+
+    fn flags(&self) -> &shared_settings::Flags {
+        &self.shared_flags
+    }
+
+    fn register_info(&self) -> RegInfo {
+        registers::INFO.clone()
+    }
+
+    fn encoding_info(&self) -> EncInfo {
+        enc_tables::INFO.clone()
+    }
+
+    fn legal_encodings<'a>(
+        &'a self,
+        func: &'a ir::Function,
+        inst: &'a ir::InstructionData,
+        ctrl_typevar: ir::Type,
+    ) -> Encodings<'a> {
+        lookup_enclist(
+            ctrl_typevar,
+            inst,
+            func,
+            self.cpumode,
+            &enc_tables::LEVEL2[..],
+            &enc_tables::ENCLISTS[..],
+            &enc_tables::LEGALIZE_ACTIONS[..],
+            &enc_tables::RECIPE_PREDICATES[..],
+            &enc_tables::INST_PREDICATES[..],
+            self.isa_flags.predicate_view(),
+        )
+    }
+
+    fn legalize_signature(&self, sig: &mut ir::Signature, current: bool) {
+        abi::legalize_signature(sig, &self.shared_flags, current)
+    }
+
+    fn regclass_for_abi_type(&self, ty: ir::Type) -> RegClass {
+        abi::regclass_for_abi_type(ty)
+    }
+
+    fn allocatable_registers(&self, func: &ir::Function) -> regalloc::RegisterSet {
+        abi::allocatable_registers(func)
+    }
+
+    #[cfg(feature = "testing_hooks")]
+    fn emit_inst(
+        &self,
+        func: &ir::Function,
+        inst: ir::Inst,
+        divert: &mut regalloc::RegDiversions,
+        sink: &mut CodeSink,
+    ) {
+        binemit::emit_inst(func, inst, divert, sink)
+    }
+
+    fn emit_function_to_memory(&self, func: &ir::Function, sink: &mut MemoryCodeSink) {
+        emit_function(func, binemit::emit_inst, sink)
+    }
+}
+
+impl fmt::Display for Isa {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{}\n{}", self.shared_flags, self.isa_flags)
+    }
+}
--- a/cranelift/codegen/src/isa/arm32/registers.rs
+++ b/cranelift/codegen/src/isa/arm32/registers.rs
@@ -0,0 +1,68 @@
+//! ARM32 register descriptions.
+
+use crate::isa::registers::{RegBank, RegClass, RegClassData, RegInfo, RegUnit};
+
+include!(concat!(env!("OUT_DIR"), "/registers-arm32.rs"));
+
+#[cfg(test)]
+mod tests {
+    use super::{D, GPR, INFO, S};
+    use crate::isa::RegUnit;
+    use std::string::{String, ToString};
+
+    #[test]
+    fn unit_encodings() {
+        assert_eq!(INFO.parse_regunit("s0"), Some(0));
+        assert_eq!(INFO.parse_regunit("s31"), Some(31));
+        assert_eq!(INFO.parse_regunit("s32"), Some(32));
+        assert_eq!(INFO.parse_regunit("r0"), Some(64));
+        assert_eq!(INFO.parse_regunit("r15"), Some(79));
+    }
+
+    #[test]
+    fn unit_names() {
+        fn uname(ru: RegUnit) -> String {
+            INFO.display_regunit(ru).to_string()
+        }
+
+        assert_eq!(uname(0), "%s0");
+        assert_eq!(uname(1), "%s1");
+        assert_eq!(uname(31), "%s31");
+        assert_eq!(uname(64), "%r0");
+    }
+
+    #[test]
+    fn overlaps() {
+        // arm32 has the most interesting register geometries, so test `regs_overlap()` here.
+        use crate::isa::regs_overlap;
+
+        let r0 = GPR.unit(0);
+        let r1 = GPR.unit(1);
+        let r2 = GPR.unit(2);
+
+        assert!(regs_overlap(GPR, r0, GPR, r0));
+        assert!(regs_overlap(GPR, r2, GPR, r2));
+        assert!(!regs_overlap(GPR, r0, GPR, r1));
+        assert!(!regs_overlap(GPR, r1, GPR, r0));
+        assert!(!regs_overlap(GPR, r2, GPR, r1));
+        assert!(!regs_overlap(GPR, r1, GPR, r2));
+
+        let s0 = S.unit(0);
+        let s1 = S.unit(1);
+        let s2 = S.unit(2);
+        let s3 = S.unit(3);
+        let d0 = D.unit(0);
+        let d1 = D.unit(1);
+
+        assert!(regs_overlap(S, s0, D, d0));
+        assert!(regs_overlap(S, s1, D, d0));
+        assert!(!regs_overlap(S, s0, D, d1));
+        assert!(!regs_overlap(S, s1, D, d1));
+        assert!(regs_overlap(S, s2, D, d1));
+        assert!(regs_overlap(S, s3, D, d1));
+        assert!(!regs_overlap(D, d1, S, s1));
+        assert!(regs_overlap(D, d1, S, s2));
+        assert!(!regs_overlap(D, d0, D, d1));
+        assert!(regs_overlap(D, d1, D, d1));
+    }
+}
--- a/cranelift/codegen/src/isa/arm32/settings.rs
+++ b/cranelift/codegen/src/isa/arm32/settings.rs
@@ -0,0 +1,9 @@
+//! ARM32 Settings.
+
+use crate::settings::{self, detail, Builder};
+use core::fmt;
+
+// Include code generated by `cranelift-codegen/meta-python/gen_settings.py`. This file contains a public
+// `Flags` struct with an impl for all of the settings defined in
+// `cranelift-codegen/meta-python/isa/arm32/settings.py`.
+include!(concat!(env!("OUT_DIR"), "/settings-arm32.rs"));
--- a/cranelift/codegen/src/isa/arm64/abi.rs
+++ b/cranelift/codegen/src/isa/arm64/abi.rs
@@ -0,0 +1,30 @@
+//! ARM 64 ABI implementation.
+
+use super::registers::{FPR, GPR};
+use crate::ir;
+use crate::isa::RegClass;
+use crate::regalloc::RegisterSet;
+use crate::settings as shared_settings;
+
+/// Legalize `sig`.
+pub fn legalize_signature(
+    _sig: &mut ir::Signature,
+    _flags: &shared_settings::Flags,
+    _current: bool,
+) {
+    unimplemented!()
+}
+
+/// Get register class for a type appearing in a legalized signature.
+pub fn regclass_for_abi_type(ty: ir::Type) -> RegClass {
+    if ty.is_int() {
+        GPR
+    } else {
+        FPR
+    }
+}
+
+/// Get the set of allocatable registers for `func`.
+pub fn allocatable_registers(_func: &ir::Function) -> RegisterSet {
+    unimplemented!()
+}
--- a/cranelift/codegen/src/isa/arm64/binemit.rs
+++ b/cranelift/codegen/src/isa/arm64/binemit.rs
@@ -0,0 +1,7 @@
+//! Emitting binary ARM64 machine code.
+
+use crate::binemit::{bad_encoding, CodeSink};
+use crate::ir::{Function, Inst};
+use crate::regalloc::RegDiversions;
+
+include!(concat!(env!("OUT_DIR"), "/binemit-arm64.rs"));
--- a/cranelift/codegen/src/isa/arm64/enc_tables.rs
+++ b/cranelift/codegen/src/isa/arm64/enc_tables.rs
@@ -0,0 +1,9 @@
+//! Encoding tables for ARM64 ISA.
+
+use crate::isa;
+use crate::isa::constraints::*;
+use crate::isa::enc_tables::*;
+use crate::isa::encoding::RecipeSizing;
+
+include!(concat!(env!("OUT_DIR"), "/encoding-arm64.rs"));
+include!(concat!(env!("OUT_DIR"), "/legalize-arm64.rs"));
--- a/cranelift/codegen/src/isa/arm64/mod.rs
+++ b/cranelift/codegen/src/isa/arm64/mod.rs
@@ -0,0 +1,123 @@
+//! ARM 64-bit Instruction Set Architecture.
+
+mod abi;
+mod binemit;
+mod enc_tables;
+mod registers;
+pub mod settings;
+
+use super::super::settings as shared_settings;
+#[cfg(feature = "testing_hooks")]
+use crate::binemit::CodeSink;
+use crate::binemit::{emit_function, MemoryCodeSink};
+use crate::ir;
+use crate::isa::enc_tables::{lookup_enclist, Encodings};
+use crate::isa::Builder as IsaBuilder;
+use crate::isa::{EncInfo, RegClass, RegInfo, TargetIsa};
+use crate::regalloc;
+use core::fmt;
+use std::boxed::Box;
+use target_lexicon::Triple;
+
+#[allow(dead_code)]
+struct Isa {
+    triple: Triple,
+    shared_flags: shared_settings::Flags,
+    isa_flags: settings::Flags,
+}
+
+/// Get an ISA builder for creating ARM64 targets.
+pub fn isa_builder(triple: Triple) -> IsaBuilder {
+    IsaBuilder {
+        triple,
+        setup: settings::builder(),
+        constructor: isa_constructor,
+    }
+}
+
+fn isa_constructor(
+    triple: Triple,
+    shared_flags: shared_settings::Flags,
+    builder: shared_settings::Builder,
+) -> Box<TargetIsa> {
+    Box::new(Isa {
+        triple,
+        isa_flags: settings::Flags::new(&shared_flags, builder),
+        shared_flags,
+    })
+}
+
+impl TargetIsa for Isa {
+    fn name(&self) -> &'static str {
+        "arm64"
+    }
+
+    fn triple(&self) -> &Triple {
+        &self.triple
+    }
+
+    fn flags(&self) -> &shared_settings::Flags {
+        &self.shared_flags
+    }
+
+    fn register_info(&self) -> RegInfo {
+        registers::INFO.clone()
+    }
+
+    fn encoding_info(&self) -> EncInfo {
+        enc_tables::INFO.clone()
+    }
+
+    fn legal_encodings<'a>(
+        &'a self,
+        func: &'a ir::Function,
+        inst: &'a ir::InstructionData,
+        ctrl_typevar: ir::Type,
+    ) -> Encodings<'a> {
+        lookup_enclist(
+            ctrl_typevar,
+            inst,
+            func,
+            &enc_tables::LEVEL1_A64[..],
+            &enc_tables::LEVEL2[..],
+            &enc_tables::ENCLISTS[..],
+            &enc_tables::LEGALIZE_ACTIONS[..],
+            &enc_tables::RECIPE_PREDICATES[..],
+            &enc_tables::INST_PREDICATES[..],
+            self.isa_flags.predicate_view(),
+        )
+    }
+
+    fn legalize_signature(&self, sig: &mut ir::Signature, current: bool) {
+        abi::legalize_signature(sig, &self.shared_flags, current)
+    }
+
+    fn regclass_for_abi_type(&self, ty: ir::Type) -> RegClass {
+        abi::regclass_for_abi_type(ty)
+    }
+
+    fn allocatable_registers(&self, func: &ir::Function) -> regalloc::RegisterSet {
+        abi::allocatable_registers(func)
+    }
+
+    #[cfg(feature = "testing_hooks")]
+    fn emit_inst(
+        &self,
+        func: &ir::Function,
+        inst: ir::Inst,
+        divert: &mut regalloc::RegDiversions,
+        sink: &mut CodeSink,
+    ) {
+        binemit::emit_inst(func, inst, divert, sink)
+    }
+
+    fn emit_function_to_memory(&self, func: &ir::Function, sink: &mut MemoryCodeSink) {
+        emit_function(func, binemit::emit_inst, sink)
+    }
+}
+
+impl fmt::Display for Isa {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{}\n{}", self.shared_flags, self.isa_flags)
+    }
+}
--- a/cranelift/codegen/src/isa/arm64/registers.rs
+++ b/cranelift/codegen/src/isa/arm64/registers.rs
@@ -0,0 +1,39 @@
+//! ARM64 register descriptions.
+
+use crate::isa::registers::{RegBank, RegClass, RegClassData, RegInfo, RegUnit};
+
+include!(concat!(env!("OUT_DIR"), "/registers-arm64.rs"));
+
+#[cfg(test)]
+mod tests {
+    use super::INFO;
+    use crate::isa::RegUnit;
+    use std::string::{String, ToString};
+
+    #[test]
+    fn unit_encodings() {
+        assert_eq!(INFO.parse_regunit("x0"), Some(0));
+        assert_eq!(INFO.parse_regunit("x31"), Some(31));
+        assert_eq!(INFO.parse_regunit("v0"), Some(32));
+        assert_eq!(INFO.parse_regunit("v31"), Some(63));
+
+        assert_eq!(INFO.parse_regunit("x32"), None);
+        assert_eq!(INFO.parse_regunit("v32"), None);
+    }
+
+    #[test]
+    fn unit_names() {
+        fn uname(ru: RegUnit) -> String {
+            INFO.display_regunit(ru).to_string()
+        }
+
+        assert_eq!(uname(0), "%x0");
+        assert_eq!(uname(1), "%x1");
+        assert_eq!(uname(31), "%x31");
+        assert_eq!(uname(32), "%v0");
+        assert_eq!(uname(33), "%v1");
+        assert_eq!(uname(63), "%v31");
+        assert_eq!(uname(64), "%nzcv");
+        assert_eq!(uname(65), "%INVALID65");
+    }
+}
--- a/cranelift/codegen/src/isa/arm64/settings.rs
+++ b/cranelift/codegen/src/isa/arm64/settings.rs
@@ -0,0 +1,9 @@
+//! ARM64 Settings.
+
+use crate::settings::{self, detail, Builder};
+use core::fmt;
+
+// Include code generated by `cranelift-codegen/meta-python/gen_settings.py`. This file contains a public
+// `Flags` struct with an impl for all of the settings defined in
+// `cranelift-codegen/meta-python/isa/arm64/settings.py`.
+include!(concat!(env!("OUT_DIR"), "/settings-arm64.rs"));
--- a/cranelift/codegen/src/isa/call_conv.rs
+++ b/cranelift/codegen/src/isa/call_conv.rs
@@ -0,0 +1,60 @@
+use core::fmt;
+use core::str;
+use target_lexicon::{CallingConvention, Triple};
+
+/// Calling convention identifiers.
+#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
+pub enum CallConv {
+    /// Best performance, not ABI-stable
+    Fast,
+    /// Smallest caller code size, not ABI-stable
+    Cold,
+    /// System V-style convention used on many platforms
+    SystemV,
+    /// Windows "fastcall" convention, also used for x64 and ARM
+    WindowsFastcall,
+    /// SpiderMonkey WebAssembly convention
+    Baldrdash,
+    /// Specialized convention for the probestack function
+    Probestack,
+}
+
+impl CallConv {
+    /// Return the default calling convention for the given target triple.
+    pub fn triple_default(triple: &Triple) -> Self {
+        match triple.default_calling_convention() {
+            // Default to System V for unknown targets because most everything
+            // uses System V.
+            Ok(CallingConvention::SystemV) | Err(()) => CallConv::SystemV,
+            Ok(CallingConvention::WindowsFastcall) => CallConv::WindowsFastcall,
+        }
+    }
+}
+
+impl fmt::Display for CallConv {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        f.write_str(match *self {
+            CallConv::Fast => "fast",
+            CallConv::Cold => "cold",
+            CallConv::SystemV => "system_v",
+            CallConv::WindowsFastcall => "windows_fastcall",
+            CallConv::Baldrdash => "baldrdash",
+            CallConv::Probestack => "probestack",
+        })
+    }
+}
+
+impl str::FromStr for CallConv {
+    type Err = ();
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        match s {
+            "fast" => Ok(CallConv::Fast),
+            "cold" => Ok(CallConv::Cold),
+            "system_v" => Ok(CallConv::SystemV),
+            "windows_fastcall" => Ok(CallConv::WindowsFastcall),
+            "baldrdash" => Ok(CallConv::Baldrdash),
+            "probestack" => Ok(CallConv::Probestack),
+            _ => Err(()),
+        }
+    }
+}
--- a/cranelift/codegen/src/isa/constraints.rs
+++ b/cranelift/codegen/src/isa/constraints.rs
@@ -0,0 +1,207 @@
+//! Register constraints for instruction operands.
+//!
+//! An encoding recipe specifies how an instruction is encoded as binary machine code, but it only
+//! works if the operands and results satisfy certain constraints. Constraints on immediate
+//! operands are checked by instruction predicates when the recipe is chosen.
+//!
+//! It is the register allocator's job to make sure that the register constraints on value operands
+//! are satisfied.
+
+use crate::binemit::CodeOffset;
+use crate::ir::{Function, Inst, ValueLoc};
+use crate::isa::{RegClass, RegUnit};
+use crate::regalloc::RegDiversions;
+
+/// Register constraint for a single value operand or instruction result.
+#[derive(PartialEq, Debug)]
+pub struct OperandConstraint {
+    /// The kind of constraint.
+    pub kind: ConstraintKind,
+
+    /// The register class of the operand.
+    ///
+    /// This applies to all kinds of constraints, but with slightly different meaning.
+    pub regclass: RegClass,
+}
+
+impl OperandConstraint {
+    /// Check if this operand constraint is satisfied by the given value location.
+    /// For tied constraints, this only checks the register class, not that the
+    /// counterpart operand has the same value location.
+    pub fn satisfied(&self, loc: ValueLoc) -> bool {
+        match self.kind {
+            ConstraintKind::Reg | ConstraintKind::Tied(_) => {
+                if let ValueLoc::Reg(reg) = loc {
+                    self.regclass.contains(reg)
+                } else {
+                    false
+                }
+            }
+            ConstraintKind::FixedReg(reg) | ConstraintKind::FixedTied(reg) => {
+                loc == ValueLoc::Reg(reg) && self.regclass.contains(reg)
+            }
+            ConstraintKind::Stack => {
+                if let ValueLoc::Stack(_) = loc {
+                    true
+                } else {
+                    false
+                }
+            }
+        }
+    }
+}
+
+/// The different kinds of operand constraints.
+#[derive(Clone, Copy, PartialEq, Eq, Debug)]
+pub enum ConstraintKind {
+    /// This operand or result must be a register from the given register class.
+    Reg,
+
+    /// This operand or result must be a fixed register.
+    ///
+    /// The constraint's `regclass` field is the top-level register class containing the fixed
+    /// register.
+    FixedReg(RegUnit),
+
+    /// This result value must use the same register as an input value operand.
+    ///
+    /// The associated number is the index of the input value operand this result is tied to. The
+    /// constraint's `regclass` field is the same as the tied operand's register class.
+    ///
+    /// When an (in, out) operand pair is tied, this constraint kind appears in both the `ins` and
+    /// the `outs` arrays. The constraint for the in operand is `Tied(out)`, and the constraint for
+    /// the out operand is `Tied(in)`.
+    Tied(u8),
+
+    /// This operand must be a fixed register, and it has a tied counterpart.
+    ///
+    /// This works just like `FixedReg`, but additionally indicates that there are identical
+    /// input/output operands for this fixed register. For an input operand, this means that the
+    /// value will be clobbered by the instruction
+    FixedTied(RegUnit),
+
+    /// This operand must be a value in a stack slot.
+    ///
+    /// The constraint's `regclass` field is the register class that would normally be used to load
+    /// and store values of this type.
+    Stack,
+}
+
+/// Value operand constraints for an encoding recipe.
+#[derive(PartialEq, Clone)]
+pub struct RecipeConstraints {
+    /// Constraints for the instruction's fixed value operands.
+    ///
+    /// If the instruction takes a variable number of operands, the register constraints for those
+    /// operands must be computed dynamically.
+    ///
+    /// - For branches and jumps, EBB arguments must match the expectations of the destination EBB.
+    /// - For calls and returns, the calling convention ABI specifies constraints.
+    pub ins: &'static [OperandConstraint],
+
+    /// Constraints for the instruction's fixed results.
+    ///
+    /// If the instruction produces a variable number of results, it's probably a call and the
+    /// constraints must be derived from the calling convention ABI.
+    pub outs: &'static [OperandConstraint],
+
+    /// Are any of the input constraints `FixedReg`?
+    pub fixed_ins: bool,
+
+    /// Are any of the output constraints `FixedReg`?
+    pub fixed_outs: bool,
+
+    /// Are there any tied operands?
+    pub tied_ops: bool,
+
+    /// Does this instruction clobber the CPU flags?
+    ///
+    /// When true, SSA values of type `iflags` or `fflags` can not be live across the instruction.
+    pub clobbers_flags: bool,
+}
+
+impl RecipeConstraints {
+    /// Check that these constraints are satisfied by the operands on `inst`.
+    pub fn satisfied(&self, inst: Inst, divert: &RegDiversions, func: &Function) -> bool {
+        for (&arg, constraint) in func.dfg.inst_args(inst).iter().zip(self.ins) {
+            let loc = divert.get(arg, &func.locations);
+
+            if let ConstraintKind::Tied(out_index) = constraint.kind {
+                let out_val = func.dfg.inst_results(inst)[out_index as usize];
+                let out_loc = func.locations[out_val];
+                if loc != out_loc {
+                    return false;
+                }
+            }
+
+            if !constraint.satisfied(loc) {
+                return false;
+            }
+        }
+
+        for (&arg, constraint) in func.dfg.inst_results(inst).iter().zip(self.outs) {
+            let loc = divert.get(arg, &func.locations);
+            if !constraint.satisfied(loc) {
+                return false;
+            }
+        }
+
+        true
+    }
+}
+
+/// Constraints on the range of a branch instruction.
+///
+/// A branch instruction usually encodes its destination as a signed n-bit offset from an origin.
+/// The origin depends on the ISA and the specific instruction:
+///
+/// - RISC-V and ARM Aarch64 use the address of the branch instruction, `origin = 0`.
+/// - x86 uses the address of the instruction following the branch, `origin = 2` for a 2-byte
+///   branch instruction.
+/// - ARM's A32 encoding uses the address of the branch instruction + 8 bytes, `origin = 8`.
+#[derive(Clone, Copy, Debug)]
+pub struct BranchRange {
+    /// Offset in bytes from the address of the branch instruction to the origin used for computing
+    /// the branch displacement. This is the destination of a branch that encodes a 0 displacement.
+    pub origin: u8,
+
+    /// Number of bits in the signed byte displacement encoded in the instruction. This does not
+    /// account for branches that can only target aligned addresses.
+    pub bits: u8,
+}
+
+impl BranchRange {
+    /// Determine if this branch range can represent the range from `branch` to `dest`, where
+    /// `branch` is the code offset of the branch instruction itself and `dest` is the code offset
+    /// of the destination EBB header.
+    ///
+    /// This method does not detect if the range is larger than 2 GB.
+    pub fn contains(self, branch: CodeOffset, dest: CodeOffset) -> bool {
+        let d = dest.wrapping_sub(branch + CodeOffset::from(self.origin)) as i32;
+        let s = 32 - self.bits;
+        d == d << s >> s
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn branch_range() {
+        // ARM T1 branch.
+        let t1 = BranchRange { origin: 4, bits: 9 };
+        assert!(t1.contains(0, 0));
+        assert!(t1.contains(0, 2));
+        assert!(t1.contains(2, 0));
+        assert!(t1.contains(1000, 1000));
+
+        // Forward limit.
+        assert!(t1.contains(1000, 1258));
+        assert!(!t1.contains(1000, 1260));
+
+        // Backward limit
+        assert!(t1.contains(1000, 748));
+        assert!(!t1.contains(1000, 746));
+    }
+}
--- a/cranelift/codegen/src/isa/enc_tables.rs
+++ b/cranelift/codegen/src/isa/enc_tables.rs
@@ -0,0 +1,292 @@
+//! Support types for generated encoding tables.
+//!
+//! This module contains types and functions for working with the encoding tables generated by
+//! `cranelift-codegen/meta-python/gen_encoding.py`.
+
+use crate::constant_hash::{probe, Table};
+use crate::ir::{Function, InstructionData, Opcode, Type};
+use crate::isa::{Encoding, Legalize};
+use crate::settings::PredicateView;
+use core::ops::Range;
+
+/// A recipe predicate.
+///
+/// This is a predicate function capable of testing ISA and instruction predicates simultaneously.
+///
+/// A None predicate is always satisfied.
+pub type RecipePredicate = Option<fn(PredicateView, &InstructionData) -> bool>;
+
+/// An instruction predicate.
+///
+/// This is a predicate function that needs to be tested in addition to the recipe predicate. It
+/// can't depend on ISA settings.
+pub type InstPredicate = fn(&Function, &InstructionData) -> bool;
+
+/// Legalization action to perform when no encoding can be found for an instruction.
+///
+/// This is an index into an ISA-specific table of legalization actions.
+pub type LegalizeCode = u8;
+
+/// Level 1 hash table entry.
+///
+/// One level 1 hash table is generated per CPU mode. This table is keyed by the controlling type
+/// variable, using `INVALID` for non-polymorphic instructions.
+///
+/// The hash table values are references to level 2 hash tables, encoded as an offset in `LEVEL2`
+/// where the table begins, and the binary logarithm of its length. All the level 2 hash tables
+/// have a power-of-two size.
+///
+/// Entries are generic over the offset type. It will typically be `u32` or `u16`, depending on the
+/// size of the `LEVEL2` table.
+///
+/// Empty entries are encoded with a `!0` value for `log2len` which will always be out of range.
+/// Entries that have a `legalize` value but no level 2 table have an `offset` field that is out of
+/// bounds.
+pub struct Level1Entry<OffT: Into<u32> + Copy> {
+    pub ty: Type,
+    pub log2len: u8,
+    pub legalize: LegalizeCode,
+    pub offset: OffT,
+}
+
+impl<OffT: Into<u32> + Copy> Level1Entry<OffT> {
+    /// Get the level 2 table range indicated by this entry.
+    fn range(&self) -> Range<usize> {
+        let b = self.offset.into() as usize;
+        b..b + (1 << self.log2len)
+    }
+}
+
+impl<OffT: Into<u32> + Copy> Table<Type> for [Level1Entry<OffT>] {
+    fn len(&self) -> usize {
+        self.len()
+    }
+
+    fn key(&self, idx: usize) -> Option<Type> {
+        if self[idx].log2len != !0 {
+            Some(self[idx].ty)
+        } else {
+            None
+        }
+    }
+}
+
+/// Level 2 hash table entry.
+///
+/// The second level hash tables are keyed by `Opcode`, and contain an offset into the `ENCLISTS`
+/// table where the encoding recipes for the instruction are stored.
+///
+/// Entries are generic over the offset type which depends on the size of `ENCLISTS`. A `u16`
+/// offset allows the entries to be only 32 bits each. There is no benefit to dropping down to `u8`
+/// for tiny ISAs. The entries won't shrink below 32 bits since the opcode is expected to be 16
+/// bits.
+///
+/// Empty entries are encoded with a `NotAnOpcode` `opcode` field.
+pub struct Level2Entry<OffT: Into<u32> + Copy> {
+    pub opcode: Option<Opcode>,
+    pub offset: OffT,
+}
+
+impl<OffT: Into<u32> + Copy> Table<Opcode> for [Level2Entry<OffT>] {
+    fn len(&self) -> usize {
+        self.len()
+    }
+
+    fn key(&self, idx: usize) -> Option<Opcode> {
+        self[idx].opcode
+    }
+}
+
+/// Two-level hash table lookup and iterator construction.
+///
+/// Given the controlling type variable and instruction opcode, find the corresponding encoding
+/// list.
+///
+/// Returns an iterator that produces legal encodings for `inst`.
+pub fn lookup_enclist<'a, OffT1, OffT2>(
+    ctrl_typevar: Type,
+    inst: &'a InstructionData,
+    func: &'a Function,
+    level1_table: &'static [Level1Entry<OffT1>],
+    level2_table: &'static [Level2Entry<OffT2>],
+    enclist: &'static [EncListEntry],
+    legalize_actions: &'static [Legalize],
+    recipe_preds: &'static [RecipePredicate],
+    inst_preds: &'static [InstPredicate],
+    isa_preds: PredicateView<'a>,
+) -> Encodings<'a>
+where
+    OffT1: Into<u32> + Copy,
+    OffT2: Into<u32> + Copy,
+{
+    let (offset, legalize) = match probe(level1_table, ctrl_typevar, ctrl_typevar.index()) {
+        Err(l1idx) => {
+            // No level 1 entry found for the type.
+            // We have a sentinel entry with the default legalization code.
+            (!0, level1_table[l1idx].legalize)
+        }
+        Ok(l1idx) => {
+            // We have a valid level 1 entry for this type.
+            let l1ent = &level1_table[l1idx];
+            let offset = match level2_table.get(l1ent.range()) {
+                Some(l2tab) => {
+                    let opcode = inst.opcode();
+                    match probe(l2tab, opcode, opcode as usize) {
+                        Ok(l2idx) => l2tab[l2idx].offset.into() as usize,
+                        Err(_) => !0,
+                    }
+                }
+                // The l1ent range is invalid. This means that we just have a customized
+                // legalization code for this type. The level 2 table is empty.
+                None => !0,
+            };
+            (offset, l1ent.legalize)
+        }
+    };
+
+    // Now we have an offset into `enclist` that is `!0` when no encoding list could be found.
+    // The default legalization code is always valid.
+    Encodings::new(
+        offset,
+        legalize,
+        inst,
+        func,
+        enclist,
+        legalize_actions,
+        recipe_preds,
+        inst_preds,
+        isa_preds,
+    )
+}
+
+/// Encoding list entry.
+///
+/// Encoding lists are represented as sequences of u16 words.
+pub type EncListEntry = u16;
+
+/// Number of bits used to represent a predicate. c.f. `meta-python/gen_encoding.py`.
+const PRED_BITS: u8 = 12;
+const PRED_MASK: usize = (1 << PRED_BITS) - 1;
+/// First code word representing a predicate check. c.f. `meta-python/gen_encoding.py`.
+const PRED_START: usize = 0x1000;
+
+/// An iterator over legal encodings for the instruction.
+pub struct Encodings<'a> {
+    // Current offset into `enclist`, or out of bounds after we've reached the end.
+    offset: usize,
+    // Legalization code to use of no encoding is found.
+    legalize: LegalizeCode,
+    inst: &'a InstructionData,
+    func: &'a Function,
+    enclist: &'static [EncListEntry],
+    legalize_actions: &'static [Legalize],
+    recipe_preds: &'static [RecipePredicate],
+    inst_preds: &'static [InstPredicate],
+    isa_preds: PredicateView<'a>,
+}
+
+impl<'a> Encodings<'a> {
+    /// Creates a new instance of `Encodings`.
+    ///
+    /// This iterator provides search for encodings that applies to the given instruction. The
+    /// encoding lists are laid out such that first call to `next` returns valid entry in the list
+    /// or `None`.
+    pub fn new(
+        offset: usize,
+        legalize: LegalizeCode,
+        inst: &'a InstructionData,
+        func: &'a Function,
+        enclist: &'static [EncListEntry],
+        legalize_actions: &'static [Legalize],
+        recipe_preds: &'static [RecipePredicate],
+        inst_preds: &'static [InstPredicate],
+        isa_preds: PredicateView<'a>,
+    ) -> Self {
+        Encodings {
+            offset,
+            inst,
+            func,
+            legalize,
+            isa_preds,
+            recipe_preds,
+            inst_preds,
+            enclist,
+            legalize_actions,
+        }
+    }
+
+    /// Get the legalization action that caused the enumeration of encodings to stop.
+    /// This can be the default legalization action for the type or a custom code for the
+    /// instruction.
+    ///
+    /// This method must only be called after the iterator returns `None`.
+    pub fn legalize(&self) -> Legalize {
+        debug_assert_eq!(self.offset, !0, "Premature Encodings::legalize()");
+        self.legalize_actions[self.legalize as usize]
+    }
+
+    /// Check if the `rpred` recipe predicate is satisfied.
+    fn check_recipe(&self, rpred: RecipePredicate) -> bool {
+        match rpred {
+            Some(p) => p(self.isa_preds, self.inst),
+            None => true,
+        }
+    }
+
+    /// Check an instruction or isa predicate.
+    fn check_pred(&self, pred: usize) -> bool {
+        if let Some(&p) = self.inst_preds.get(pred) {
+            p(self.func, self.inst)
+        } else {
+            let pred = pred - self.inst_preds.len();
+            self.isa_preds.test(pred)
+        }
+    }
+}
+
+impl<'a> Iterator for Encodings<'a> {
+    type Item = Encoding;
+
+    fn next(&mut self) -> Option<Encoding> {
+        while let Some(entryref) = self.enclist.get(self.offset) {
+            let entry = *entryref as usize;
+
+            // Check for "recipe+bits".
+            let recipe = entry >> 1;
+            if let Some(&rpred) = self.recipe_preds.get(recipe) {
+                let bits = self.offset + 1;
+                if entry & 1 == 0 {
+                    self.offset += 2; // Next entry.
+                } else {
+                    self.offset = !0; // Stop.
+                }
+                if self.check_recipe(rpred) {
+                    return Some(Encoding::new(recipe as u16, self.enclist[bits]));
+                }
+                continue;
+            }
+
+            // Check for "stop with legalize".
+            if entry < PRED_START {
+                self.legalize = (entry - 2 * self.recipe_preds.len()) as LegalizeCode;
+                self.offset = !0; // Stop.
+                return None;
+            }
+
+            // Finally, this must be a predicate entry.
+            let pred_entry = entry - PRED_START;
+            let skip = pred_entry >> PRED_BITS;
+            let pred = pred_entry & PRED_MASK;
+
+            if self.check_pred(pred) {
+                self.offset += 1;
+            } else if skip == 0 {
+                self.offset = !0; // Stop.
+                return None;
+            } else {
+                self.offset += 1 + skip;
+            }
+        }
+        None
+    }
+}
--- a/cranelift/codegen/src/isa/encoding.rs
+++ b/cranelift/codegen/src/isa/encoding.rs
@@ -0,0 +1,157 @@
+//! The `Encoding` struct.
+
+use crate::binemit::CodeOffset;
+use crate::ir::{Function, Inst};
+use crate::isa::constraints::{BranchRange, RecipeConstraints};
+use crate::regalloc::RegDiversions;
+use core::fmt;
+
+/// Bits needed to encode an instruction as binary machine code.
+///
+/// The encoding consists of two parts, both specific to the target ISA: An encoding *recipe*, and
+/// encoding *bits*. The recipe determines the native instruction format and the mapping of
+/// operands to encoded bits. The encoding bits provide additional information to the recipe,
+/// typically parts of the opcode.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub struct Encoding {
+    recipe: u16,
+    bits: u16,
+}
+
+impl Encoding {
+    /// Create a new `Encoding` containing `(recipe, bits)`.
+    pub fn new(recipe: u16, bits: u16) -> Self {
+        Self { recipe, bits }
+    }
+
+    /// Get the recipe number in this encoding.
+    pub fn recipe(self) -> usize {
+        self.recipe as usize
+    }
+
+    /// Get the recipe-specific encoding bits.
+    pub fn bits(self) -> u16 {
+        self.bits
+    }
+
+    /// Is this a legal encoding, or the default placeholder?
+    pub fn is_legal(self) -> bool {
+        self != Self::default()
+    }
+}
+
+/// The default encoding is the illegal one.
+impl Default for Encoding {
+    fn default() -> Self {
+        Self::new(0xffff, 0xffff)
+    }
+}
+
+/// ISA-independent display of an encoding.
+impl fmt::Display for Encoding {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        if self.is_legal() {
+            write!(f, "{}#{:02x}", self.recipe, self.bits)
+        } else {
+            write!(f, "-")
+        }
+    }
+}
+
+/// Temporary object that holds enough context to properly display an encoding.
+/// This is meant to be created by `EncInfo::display()`.
+pub struct DisplayEncoding {
+    pub encoding: Encoding,
+    pub recipe_names: &'static [&'static str],
+}
+
+impl fmt::Display for DisplayEncoding {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        if self.encoding.is_legal() {
+            write!(
+                f,
+                "{}#{:02x}",
+                self.recipe_names[self.encoding.recipe()],
+                self.encoding.bits
+            )
+        } else {
+            write!(f, "-")
+        }
+    }
+}
+
+type SizeCalculatorFn = fn(&RecipeSizing, Inst, &RegDiversions, &Function) -> u8;
+
+/// Returns the base size of the Recipe, assuming it's fixed. This is the default for most
+/// encodings; others can be variable and longer than this base size, depending on the registers
+/// they're using and use a different function, specific per platform.
+pub fn base_size(sizing: &RecipeSizing, _: Inst, _: &RegDiversions, _: &Function) -> u8 {
+    sizing.base_size
+}
+
+/// Code size information for an encoding recipe.
+///
+/// All encoding recipes correspond to an exact instruction size.
+pub struct RecipeSizing {
+    /// Size in bytes of instructions encoded with this recipe.
+    pub base_size: u8,
+
+    /// Method computing the real instruction's size, given inputs and outputs.
+    pub compute_size: SizeCalculatorFn,
+
+    /// Allowed branch range in this recipe, if any.
+    ///
+    /// All encoding recipes for branches have exact branch range information.
+    pub branch_range: Option<BranchRange>,
+}
+
+/// Information about all the encodings in this ISA.
+#[derive(Clone)]
+pub struct EncInfo {
+    /// Constraints on value operands per recipe.
+    pub constraints: &'static [RecipeConstraints],
+
+    /// Code size information per recipe.
+    pub sizing: &'static [RecipeSizing],
+
+    /// Names of encoding recipes.
+    pub names: &'static [&'static str],
+}
+
+impl EncInfo {
+    /// Get the value operand constraints for `enc` if it is a legal encoding.
+    pub fn operand_constraints(&self, enc: Encoding) -> Option<&'static RecipeConstraints> {
+        self.constraints.get(enc.recipe())
+    }
+
+    /// Create an object that can display an ISA-dependent encoding properly.
+    pub fn display(&self, enc: Encoding) -> DisplayEncoding {
+        DisplayEncoding {
+            encoding: enc,
+            recipe_names: self.names,
+        }
+    }
+
+    /// Get the precise size in bytes of instructions encoded with `enc`.
+    ///
+    /// Returns 0 for illegal encodings.
+    pub fn byte_size(
+        &self,
+        enc: Encoding,
+        inst: Inst,
+        divert: &RegDiversions,
+        func: &Function,
+    ) -> CodeOffset {
+        self.sizing.get(enc.recipe()).map_or(0, |s| {
+            let compute_size = s.compute_size;
+            CodeOffset::from(compute_size(&s, inst, divert, func))
+        })
+    }
+
+    /// Get the branch range that is supported by `enc`, if any.
+    ///
+    /// This will never return `None` for a legal branch encoding.
+    pub fn branch_range(&self, enc: Encoding) -> Option<BranchRange> {
+        self.sizing.get(enc.recipe()).and_then(|s| s.branch_range)
+    }
+}
--- a/cranelift/codegen/src/isa/mod.rs
+++ b/cranelift/codegen/src/isa/mod.rs
@@ -0,0 +1,375 @@
+//! Instruction Set Architectures.
+//!
+//! The `isa` module provides a `TargetIsa` trait which provides the behavior specialization needed
+//! by the ISA-independent code generator. The sub-modules of this module provide definitions for
+//! the instruction sets that Cranelift can target. Each sub-module has it's own implementation of
+//! `TargetIsa`.
+//!
+//! # Constructing a `TargetIsa` instance
+//!
+//! The target ISA is built from the following information:
+//!
+//! - The name of the target ISA as a string. Cranelift is a cross-compiler, so the ISA to target
+//!   can be selected dynamically. Individual ISAs can be left out when Cranelift is compiled, so a
+//!   string is used to identify the proper sub-module.
+//! - Values for settings that apply to all ISAs. This is represented by a `settings::Flags`
+//!   instance.
+//! - Values for ISA-specific settings.
+//!
+//! The `isa::lookup()` function is the main entry point which returns an `isa::Builder`
+//! appropriate for the requested ISA:
+//!
+//! ```
+//! # extern crate cranelift_codegen;
+//! # #[macro_use] extern crate target_lexicon;
+//! # fn main() {
+//! use cranelift_codegen::isa;
+//! use cranelift_codegen::settings::{self, Configurable};
+//! use std::str::FromStr;
+//! use target_lexicon::Triple;
+//!
+//! let shared_builder = settings::builder();
+//! let shared_flags = settings::Flags::new(shared_builder);
+//!
+//! match isa::lookup(triple!("riscv32")) {
+//!     Err(_) => {
+//!         // The RISC-V target ISA is not available.
+//!     }
+//!     Ok(mut isa_builder) => {
+//!         isa_builder.set("supports_m", "on");
+//!         let isa = isa_builder.finish(shared_flags);
+//!     }
+//! }
+//! # }
+//! ```
+//!
+//! The configured target ISA trait object is a `Box<TargetIsa>` which can be used for multiple
+//! concurrent function compilations.
+
+pub use crate::isa::call_conv::CallConv;
+pub use crate::isa::constraints::{
+    BranchRange, ConstraintKind, OperandConstraint, RecipeConstraints,
+};
+pub use crate::isa::encoding::{base_size, EncInfo, Encoding};
+pub use crate::isa::registers::{regs_overlap, RegClass, RegClassIndex, RegInfo, RegUnit};
+pub use crate::isa::stack::{StackBase, StackBaseMask, StackRef};
+
+use crate::binemit;
+use crate::flowgraph;
+use crate::ir;
+use crate::isa::enc_tables::Encodings;
+use crate::regalloc;
+use crate::result::CodegenResult;
+use crate::settings;
+use crate::settings::SetResult;
+use crate::timing;
+use core::fmt;
+use failure_derive::Fail;
+use std::boxed::Box;
+use target_lexicon::{Architecture, PointerWidth, Triple};
+
+#[cfg(build_riscv)]
+mod riscv;
+
+#[cfg(build_x86)]
+mod x86;
+
+#[cfg(build_arm32)]
+mod arm32;
+
+#[cfg(build_arm64)]
+mod arm64;
+
+mod call_conv;
+mod constraints;
+mod enc_tables;
+mod encoding;
+pub mod registers;
+mod stack;
+
+/// Returns a builder that can create a corresponding `TargetIsa`
+/// or `Err(LookupError::Unsupported)` if not enabled.
+macro_rules! isa_builder {
+    ($module:ident, $name:ident) => {{
+        #[cfg($name)]
+        fn $name(triple: Triple) -> Result<Builder, LookupError> {
+            Ok($module::isa_builder(triple))
+        };
+        #[cfg(not($name))]
+        fn $name(_triple: Triple) -> Result<Builder, LookupError> {
+            Err(LookupError::Unsupported)
+        }
+        $name
+    }};
+}
+
+/// Look for a supported ISA with the given `name`.
+/// Return a builder that can create a corresponding `TargetIsa`.
+pub fn lookup(triple: Triple) -> Result<Builder, LookupError> {
+    match triple.architecture {
+        Architecture::Riscv32 | Architecture::Riscv64 => isa_builder!(riscv, build_riscv)(triple),
+        Architecture::I386 | Architecture::I586 | Architecture::I686 | Architecture::X86_64 => {
+            isa_builder!(x86, build_x86)(triple)
+        }
+        Architecture::Thumbv6m
+        | Architecture::Thumbv7em
+        | Architecture::Thumbv7m
+        | Architecture::Arm
+        | Architecture::Armv4t
+        | Architecture::Armv5te
+        | Architecture::Armv7
+        | Architecture::Armv7s => isa_builder!(arm32, build_arm32)(triple),
+        Architecture::Aarch64 => isa_builder!(arm64, build_arm64)(triple),
+        _ => Err(LookupError::Unsupported),
+    }
+}
+
+/// Describes reason for target lookup failure
+#[derive(Fail, PartialEq, Eq, Copy, Clone, Debug)]
+pub enum LookupError {
+    /// Support for this target was disabled in the current build.
+    #[fail(display = "Support for this target is disabled")]
+    SupportDisabled,
+
+    /// Support for this target has not yet been implemented.
+    #[fail(display = "Support for this target has not been implemented yet")]
+    Unsupported,
+}
+
+/// Builder for a `TargetIsa`.
+/// Modify the ISA-specific settings before creating the `TargetIsa` trait object with `finish`.
+pub struct Builder {
+    triple: Triple,
+    setup: settings::Builder,
+    constructor: fn(Triple, settings::Flags, settings::Builder) -> Box<TargetIsa>,
+}
+
+impl Builder {
+    /// Combine the ISA-specific settings with the provided ISA-independent settings and allocate a
+    /// fully configured `TargetIsa` trait object.
+    pub fn finish(self, shared_flags: settings::Flags) -> Box<TargetIsa> {
+        (self.constructor)(self.triple, shared_flags, self.setup)
+    }
+}
+
+impl settings::Configurable for Builder {
+    fn set(&mut self, name: &str, value: &str) -> SetResult<()> {
+        self.setup.set(name, value)
+    }
+
+    fn enable(&mut self, name: &str) -> SetResult<()> {
+        self.setup.enable(name)
+    }
+}
+
+/// After determining that an instruction doesn't have an encoding, how should we proceed to
+/// legalize it?
+///
+/// The `Encodings` iterator returns a legalization function to call.
+pub type Legalize =
+    fn(ir::Inst, &mut ir::Function, &mut flowgraph::ControlFlowGraph, &TargetIsa) -> bool;
+
+/// This struct provides information that a frontend may need to know about a target to
+/// produce Cranelift IR for the target.
+#[derive(Clone, Copy)]
+pub struct TargetFrontendConfig {
+    /// The default calling convention of the target.
+    pub default_call_conv: CallConv,
+
+    /// The pointer width of the target.
+    pub pointer_width: PointerWidth,
+}
+
+impl TargetFrontendConfig {
+    /// Get the pointer type of this target.
+    pub fn pointer_type(self) -> ir::Type {
+        ir::Type::int(u16::from(self.pointer_bits())).unwrap()
+    }
+
+    /// Get the width of pointers on this target, in units of bits.
+    pub fn pointer_bits(self) -> u8 {
+        self.pointer_width.bits()
+    }
+
+    /// Get the width of pointers on this target, in units of bytes.
+    pub fn pointer_bytes(self) -> u8 {
+        self.pointer_width.bytes()
+    }
+}
+
+/// Methods that are specialized to a target ISA. Implies a Display trait that shows the
+/// shared flags, as well as any isa-specific flags.
+pub trait TargetIsa: fmt::Display + Sync {
+    /// Get the name of this ISA.
+    fn name(&self) -> &'static str;
+
+    /// Get the target triple that was used to make this trait object.
+    fn triple(&self) -> &Triple;
+
+    /// Get the ISA-independent flags that were used to make this trait object.
+    fn flags(&self) -> &settings::Flags;
+
+    /// Get the default calling convention of this target.
+    fn default_call_conv(&self) -> CallConv {
+        CallConv::triple_default(self.triple())
+    }
+
+    /// Get the pointer type of this ISA.
+    fn pointer_type(&self) -> ir::Type {
+        ir::Type::int(u16::from(self.pointer_bits())).unwrap()
+    }
+
+    /// Get the width of pointers on this ISA.
+    fn pointer_width(&self) -> PointerWidth {
+        self.triple().pointer_width().unwrap()
+    }
+
+    /// Get the width of pointers on this ISA, in units of bits.
+    fn pointer_bits(&self) -> u8 {
+        self.pointer_width().bits()
+    }
+
+    /// Get the width of pointers on this ISA, in units of bytes.
+    fn pointer_bytes(&self) -> u8 {
+        self.pointer_width().bytes()
+    }
+
+    /// Get the information needed by frontends producing Cranelift IR.
+    fn frontend_config(&self) -> TargetFrontendConfig {
+        TargetFrontendConfig {
+            default_call_conv: self.default_call_conv(),
+            pointer_width: self.pointer_width(),
+        }
+    }
+
+    /// Does the CPU implement scalar comparisons using a CPU flags register?
+    fn uses_cpu_flags(&self) -> bool {
+        false
+    }
+
+    /// Does the CPU implement multi-register addressing?
+    fn uses_complex_addresses(&self) -> bool {
+        false
+    }
+
+    /// Get a data structure describing the registers in this ISA.
+    fn register_info(&self) -> RegInfo;
+
+    /// Returns an iterator over legal encodings for the instruction.
+    fn legal_encodings<'a>(
+        &'a self,
+        func: &'a ir::Function,
+        inst: &'a ir::InstructionData,
+        ctrl_typevar: ir::Type,
+    ) -> Encodings<'a>;
+
+    /// Encode an instruction after determining it is legal.
+    ///
+    /// If `inst` can legally be encoded in this ISA, produce the corresponding `Encoding` object.
+    /// Otherwise, return `Legalize` action.
+    ///
+    /// This is also the main entry point for determining if an instruction is legal.
+    fn encode(
+        &self,
+        func: &ir::Function,
+        inst: &ir::InstructionData,
+        ctrl_typevar: ir::Type,
+    ) -> Result<Encoding, Legalize> {
+        let mut iter = self.legal_encodings(func, inst, ctrl_typevar);
+        iter.next().ok_or_else(|| iter.legalize())
+    }
+
+    /// Get a data structure describing the instruction encodings in this ISA.
+    fn encoding_info(&self) -> EncInfo;
+
+    /// Legalize a function signature.
+    ///
+    /// This is used to legalize both the signature of the function being compiled and any called
+    /// functions. The signature should be modified by adding `ArgumentLoc` annotations to all
+    /// arguments and return values.
+    ///
+    /// Arguments with types that are not supported by the ABI can be expanded into multiple
+    /// arguments:
+    ///
+    /// - Integer types that are too large to fit in a register can be broken into multiple
+    ///   arguments of a smaller integer type.
+    /// - Floating point types can be bit-cast to an integer type of the same size, and possible
+    ///   broken into smaller integer types.
+    /// - Vector types can be bit-cast and broken down into smaller vectors or scalars.
+    ///
+    /// The legalizer will adapt argument and return values as necessary at all ABI boundaries.
+    ///
+    /// When this function is called to legalize the signature of the function currently being
+    /// compiled, `current` is true. The legalized signature can then also contain special purpose
+    /// arguments and return values such as:
+    ///
+    /// - A `link` argument representing the link registers on RISC architectures that don't push
+    ///   the return address on the stack.
+    /// - A `link` return value which will receive the value that was passed to the `link`
+    ///   argument.
+    /// - An `sret` argument can be added if one wasn't present already. This is necessary if the
+    ///   signature returns more values than registers are available for returning values.
+    /// - An `sret` return value can be added if the ABI requires a function to return its `sret`
+    ///   argument in a register.
+    ///
+    /// Arguments and return values for the caller's frame pointer and other callee-saved registers
+    /// should not be added by this function. These arguments are not added until after register
+    /// allocation.
+    fn legalize_signature(&self, sig: &mut ir::Signature, current: bool);
+
+    /// Get the register class that should be used to represent an ABI argument or return value of
+    /// type `ty`. This should be the top-level register class that contains the argument
+    /// registers.
+    ///
+    /// This function can assume that it will only be asked to provide register classes for types
+    /// that `legalize_signature()` produces in `ArgumentLoc::Reg` entries.
+    fn regclass_for_abi_type(&self, ty: ir::Type) -> RegClass;
+
+    /// Get the set of allocatable registers that can be used when compiling `func`.
+    ///
+    /// This set excludes reserved registers like the stack pointer and other special-purpose
+    /// registers.
+    fn allocatable_registers(&self, func: &ir::Function) -> regalloc::RegisterSet;
+
+    /// Compute the stack layout and insert prologue and epilogue code into `func`.
+    ///
+    /// Return an error if the stack frame is too large.
+    fn prologue_epilogue(&self, func: &mut ir::Function) -> CodegenResult<()> {
+        let _tt = timing::prologue_epilogue();
+        // This default implementation is unlikely to be good enough.
+        use crate::ir::stackslot::{StackOffset, StackSize};
+        use crate::stack_layout::layout_stack;
+
+        let word_size = StackSize::from(self.pointer_bytes());
+
+        // Account for the SpiderMonkey standard prologue pushes.
+        if func.signature.call_conv == CallConv::Baldrdash {
+            let bytes = StackSize::from(self.flags().baldrdash_prologue_words()) * word_size;
+            let mut ss = ir::StackSlotData::new(ir::StackSlotKind::IncomingArg, bytes);
+            ss.offset = Some(-(bytes as StackOffset));
+            func.stack_slots.push(ss);
+        }
+
+        layout_stack(&mut func.stack_slots, word_size)?;
+        Ok(())
+    }
+
+    /// Emit binary machine code for a single instruction into the `sink` trait object.
+    ///
+    /// Note that this will call `put*` methods on the `sink` trait object via its vtable which
+    /// is not the fastest way of emitting code.
+    ///
+    /// This function is under the "testing_hooks" feature, and is only suitable for use by
+    /// test harnesses. It increases code size, and is inefficient.
+    #[cfg(feature = "testing_hooks")]
+    fn emit_inst(
+        &self,
+        func: &ir::Function,
+        inst: ir::Inst,
+        divert: &mut regalloc::RegDiversions,
+        sink: &mut binemit::CodeSink,
+    );
+
+    /// Emit a whole function into memory.
+    fn emit_function_to_memory(&self, func: &ir::Function, sink: &mut binemit::MemoryCodeSink);
+}
--- a/cranelift/codegen/src/isa/registers.rs
+++ b/cranelift/codegen/src/isa/registers.rs
@@ -0,0 +1,325 @@
+//! Data structures describing the registers in an ISA.
+
+use crate::entity::EntityRef;
+use core::fmt;
+
+/// Register units are the smallest units of register allocation.
+///
+/// Normally there is a 1-1 correspondence between registers and register units, but when an ISA
+/// has aliasing registers, the aliasing can be modeled with registers that cover multiple
+/// register units.
+///
+/// The register allocator will enforce that each register unit only gets used for one thing.
+pub type RegUnit = u16;
+
+/// A bit mask indexed by register units.
+///
+/// The size of this type is determined by the target ISA that has the most register units defined.
+/// Currently that is arm32 which has 64+16 units.
+///
+/// This type should be coordinated with meta-python/cdsl/registers.py.
+pub type RegUnitMask = [u32; 3];
+
+/// A bit mask indexed by register classes.
+///
+/// The size of this type is determined by the ISA with the most register classes.
+///
+/// This type should be coordinated with meta-python/cdsl/isa.py.
+pub type RegClassMask = u32;
+
+/// Guaranteed maximum number of top-level register classes with pressure tracking in any ISA.
+///
+/// This can be increased, but should be coordinated with meta-python/cdsl/isa.py.
+pub const MAX_TRACKED_TOPRCS: usize = 4;
+
+/// The register units in a target ISA are divided into disjoint register banks. Each bank covers a
+/// contiguous range of register units.
+///
+/// The `RegBank` struct provides a static description of a register bank.
+pub struct RegBank {
+    /// The name of this register bank as defined in the ISA's `registers.py` file.
+    pub name: &'static str,
+
+    /// The first register unit in this bank.
+    pub first_unit: RegUnit,
+
+    /// The total number of register units in this bank.
+    pub units: RegUnit,
+
+    /// Array of specially named register units. This array can be shorter than the number of units
+    /// in the bank.
+    pub names: &'static [&'static str],
+
+    /// Name prefix to use for those register units in the bank not covered by the `names` array.
+    /// The remaining register units will be named this prefix followed by their decimal offset in
+    /// the bank. So with a prefix `r`, registers will be named `r8`, `r9`, ...
+    pub prefix: &'static str,
+
+    /// Index of the first top-level register class in this bank.
+    pub first_toprc: usize,
+
+    /// Number of top-level register classes in this bank.
+    ///
+    /// The top-level register classes in a bank are guaranteed to be numbered sequentially from
+    /// `first_toprc`, and all top-level register classes across banks come before any sub-classes.
+    pub num_toprcs: usize,
+
+    /// Is register pressure tracking enabled for this bank?
+    pub pressure_tracking: bool,
+}
+
+impl RegBank {
+    /// Does this bank contain `regunit`?
+    fn contains(&self, regunit: RegUnit) -> bool {
+        regunit >= self.first_unit && regunit - self.first_unit < self.units
+    }
+
+    /// Try to parse a regunit name. The name is not expected to begin with `%`.
+    fn parse_regunit(&self, name: &str) -> Option<RegUnit> {
+        match self.names.iter().position(|&x| x == name) {
+            Some(offset) => {
+                // This is one of the special-cased names.
+                Some(offset as RegUnit)
+            }
+            None => {
+                // Try a regular prefixed name.
+                if name.starts_with(self.prefix) {
+                    name[self.prefix.len()..].parse().ok()
+                } else {
+                    None
+                }
+            }
+        }
+        .and_then(|offset| {
+            if offset < self.units {
+                Some(offset + self.first_unit)
+            } else {
+                None
+            }
+        })
+    }
+
+    /// Write `regunit` to `w`, assuming that it belongs to this bank.
+    /// All regunits are written with a `%` prefix.
+    fn write_regunit(&self, f: &mut fmt::Formatter, regunit: RegUnit) -> fmt::Result {
+        let offset = regunit - self.first_unit;
+        assert!(offset < self.units);
+        if (offset as usize) < self.names.len() {
+            write!(f, "%{}", self.names[offset as usize])
+        } else {
+            write!(f, "%{}{}", self.prefix, offset)
+        }
+    }
+}
+
+/// A register class reference.
+///
+/// All register classes are statically defined in tables generated from the meta descriptions.
+pub type RegClass = &'static RegClassData;
+
+/// Data about a register class.
+///
+/// A register class represents a subset of the registers in a bank. It describes the set of
+/// permitted registers for a register operand in a given encoding of an instruction.
+///
+/// A register class can be a subset of another register class. The top-level register classes are
+/// disjoint.
+pub struct RegClassData {
+    /// The name of the register class.
+    pub name: &'static str,
+
+    /// The index of this class in the ISA's RegInfo description.
+    pub index: u8,
+
+    /// How many register units to allocate per register.
+    pub width: u8,
+
+    /// Index of the register bank this class belongs to.
+    pub bank: u8,
+
+    /// Index of the top-level register class contains this one.
+    pub toprc: u8,
+
+    /// The first register unit in this class.
+    pub first: RegUnit,
+
+    /// Bit-mask of sub-classes of this register class, including itself.
+    ///
+    /// Bits correspond to RC indexes.
+    pub subclasses: RegClassMask,
+
+    /// Mask of register units in the class. If `width > 1`, the mask only has a bit set for the
+    /// first register unit in each allocatable register.
+    pub mask: RegUnitMask,
+
+    /// The global `RegInfo` instance containing this register class.
+    pub info: &'static RegInfo,
+}
+
+impl RegClassData {
+    /// Get the register class index corresponding to the intersection of `self` and `other`.
+    ///
+    /// This register class is guaranteed to exist if the register classes overlap. If the register
+    /// classes don't overlap, returns `None`.
+    pub fn intersect_index(&self, other: RegClass) -> Option<RegClassIndex> {
+        // Compute the set of common subclasses.
+        let mask = self.subclasses & other.subclasses;
+
+        if mask == 0 {
+            // No overlap.
+            None
+        } else {
+            // Register class indexes are topologically ordered, so the largest common subclass has
+            // the smallest index.
+            Some(RegClassIndex(mask.trailing_zeros() as u8))
+        }
+    }
+
+    /// Get the intersection of `self` and `other`.
+    pub fn intersect(&self, other: RegClass) -> Option<RegClass> {
+        self.intersect_index(other).map(|rci| self.info.rc(rci))
+    }
+
+    /// Returns true if `other` is a subclass of this register class.
+    /// A register class is considered to be a subclass of itself.
+    pub fn has_subclass<RCI: Into<RegClassIndex>>(&self, other: RCI) -> bool {
+        self.subclasses & (1 << other.into().0) != 0
+    }
+
+    /// Get the top-level register class containing this class.
+    pub fn toprc(&self) -> RegClass {
+        self.info.rc(RegClassIndex(self.toprc))
+    }
+
+    /// Get a specific register unit in this class.
+    pub fn unit(&self, offset: usize) -> RegUnit {
+        let uoffset = offset * usize::from(self.width);
+        self.first + uoffset as RegUnit
+    }
+
+    /// Does this register class contain `regunit`?
+    pub fn contains(&self, regunit: RegUnit) -> bool {
+        self.mask[(regunit / 32) as usize] & (1u32 << (regunit % 32)) != 0
+    }
+}
+
+impl fmt::Display for RegClassData {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        f.write_str(self.name)
+    }
+}
+
+impl fmt::Debug for RegClassData {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        f.write_str(self.name)
+    }
+}
+
+/// Within an ISA, register classes are uniquely identified by their index.
+impl PartialEq for RegClassData {
+    fn eq(&self, other: &Self) -> bool {
+        self.index == other.index
+    }
+}
+
+/// A small reference to a register class.
+///
+/// Use this when storing register classes in compact data structures. The `RegInfo::rc()` method
+/// can be used to get the real register class reference back.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub struct RegClassIndex(u8);
+
+impl EntityRef for RegClassIndex {
+    fn new(idx: usize) -> Self {
+        RegClassIndex(idx as u8)
+    }
+
+    fn index(self) -> usize {
+        usize::from(self.0)
+    }
+}
+
+impl From<RegClass> for RegClassIndex {
+    fn from(rc: RegClass) -> Self {
+        RegClassIndex(rc.index)
+    }
+}
+
+impl fmt::Display for RegClassIndex {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "rci{}", self.0)
+    }
+}
+
+/// Test of two registers overlap.
+///
+/// A register is identified as a `(RegClass, RegUnit)` pair. The register class is needed to
+/// determine the width (in regunits) of the register.
+pub fn regs_overlap(rc1: RegClass, reg1: RegUnit, rc2: RegClass, reg2: RegUnit) -> bool {
+    let end1 = reg1 + RegUnit::from(rc1.width);
+    let end2 = reg2 + RegUnit::from(rc2.width);
+    !(end1 <= reg2 || end2 <= reg1)
+}
+
+/// Information about the registers in an ISA.
+///
+/// The `RegUnit` data structure collects all relevant static information about the registers in an
+/// ISA.
+#[derive(Clone)]
+pub struct RegInfo {
+    /// All register banks, ordered by their `first_unit`. The register banks are disjoint, but
+    /// there may be holes of unused register unit numbers between banks due to alignment.
+    pub banks: &'static [RegBank],
+
+    /// All register classes ordered topologically so a sub-class always follows its parent.
+    pub classes: &'static [RegClass],
+}
+
+impl RegInfo {
+    /// Get the register bank holding `regunit`.
+    pub fn bank_containing_regunit(&self, regunit: RegUnit) -> Option<&RegBank> {
+        // We could do a binary search, but most ISAs have only two register banks...
+        self.banks.iter().find(|b| b.contains(regunit))
+    }
+
+    /// Try to parse a regunit name. The name is not expected to begin with `%`.
+    pub fn parse_regunit(&self, name: &str) -> Option<RegUnit> {
+        self.banks
+            .iter()
+            .filter_map(|b| b.parse_regunit(name))
+            .next()
+    }
+
+    /// Make a temporary object that can display a register unit.
+    pub fn display_regunit(&self, regunit: RegUnit) -> DisplayRegUnit {
+        DisplayRegUnit {
+            regunit,
+            reginfo: self,
+        }
+    }
+
+    /// Get the register class corresponding to `idx`.
+    pub fn rc(&self, idx: RegClassIndex) -> RegClass {
+        self.classes[idx.index()]
+    }
+
+    /// Get the top-level register class containing the `idx` class.
+    pub fn toprc(&self, idx: RegClassIndex) -> RegClass {
+        self.classes[self.rc(idx).toprc as usize]
+    }
+}
+
+/// Temporary object that holds enough information to print a register unit.
+pub struct DisplayRegUnit<'a> {
+    regunit: RegUnit,
+    reginfo: &'a RegInfo,
+}
+
+impl<'a> fmt::Display for DisplayRegUnit<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self.reginfo.bank_containing_regunit(self.regunit) {
+            Some(b) => b.write_regunit(f, self.regunit),
+            None => write!(f, "%INVALID{}", self.regunit),
+        }
+    }
+}
--- a/cranelift/codegen/src/isa/riscv/abi.rs
+++ b/cranelift/codegen/src/isa/riscv/abi.rs
@@ -0,0 +1,144 @@
+//! RISC-V ABI implementation.
+//!
+//! This module implements the RISC-V calling convention through the primary `legalize_signature()`
+//! entry point.
+//!
+//! This doesn't support the soft-float ABI at the moment.
+
+use super::registers::{FPR, GPR};
+use super::settings;
+use crate::abi::{legalize_args, ArgAction, ArgAssigner, ValueConversion};
+use crate::ir::{self, AbiParam, ArgumentExtension, ArgumentLoc, ArgumentPurpose, Type};
+use crate::isa::RegClass;
+use crate::regalloc::RegisterSet;
+use core::i32;
+use target_lexicon::Triple;
+
+struct Args {
+    pointer_bits: u8,
+    pointer_bytes: u8,
+    pointer_type: Type,
+    regs: u32,
+    reg_limit: u32,
+    offset: u32,
+}
+
+impl Args {
+    fn new(bits: u8, enable_e: bool) -> Self {
+        Self {
+            pointer_bits: bits,
+            pointer_bytes: bits / 8,
+            pointer_type: Type::int(u16::from(bits)).unwrap(),
+            regs: 0,
+            reg_limit: if enable_e { 6 } else { 8 },
+            offset: 0,
+        }
+    }
+}
+
+impl ArgAssigner for Args {
+    fn assign(&mut self, arg: &AbiParam) -> ArgAction {
+        fn align(value: u32, to: u32) -> u32 {
+            (value + to - 1) & !(to - 1)
+        }
+
+        let ty = arg.value_type;
+
+        // Check for a legal type.
+        // RISC-V doesn't have SIMD at all, so break all vectors down.
+        if ty.is_vector() {
+            return ValueConversion::VectorSplit.into();
+        }
+
+        // Large integers and booleans are broken down to fit in a register.
+        if !ty.is_float() && ty.bits() > u16::from(self.pointer_bits) {
+            // Align registers and stack to a multiple of two pointers.
+            self.regs = align(self.regs, 2);
+            self.offset = align(self.offset, 2 * u32::from(self.pointer_bytes));
+            return ValueConversion::IntSplit.into();
+        }
+
+        // Small integers are extended to the size of a pointer register.
+        if ty.is_int() && ty.bits() < u16::from(self.pointer_bits) {
+            match arg.extension {
+                ArgumentExtension::None => {}
+                ArgumentExtension::Uext => return ValueConversion::Uext(self.pointer_type).into(),
+                ArgumentExtension::Sext => return ValueConversion::Sext(self.pointer_type).into(),
+            }
+        }
+
+        if self.regs < self.reg_limit {
+            // Assign to a register.
+            let reg = if ty.is_float() {
+                FPR.unit(10 + self.regs as usize)
+            } else {
+                GPR.unit(10 + self.regs as usize)
+            };
+            self.regs += 1;
+            ArgumentLoc::Reg(reg).into()
+        } else {
+            // Assign a stack location.
+            let loc = ArgumentLoc::Stack(self.offset as i32);
+            self.offset += u32::from(self.pointer_bytes);
+            debug_assert!(self.offset <= i32::MAX as u32);
+            loc.into()
+        }
+    }
+}
+
+/// Legalize `sig` for RISC-V.
+pub fn legalize_signature(
+    sig: &mut ir::Signature,
+    triple: &Triple,
+    isa_flags: &settings::Flags,
+    current: bool,
+) {
+    let bits = triple.pointer_width().unwrap().bits();
+
+    let mut args = Args::new(bits, isa_flags.enable_e());
+    legalize_args(&mut sig.params, &mut args);
+
+    let mut rets = Args::new(bits, isa_flags.enable_e());
+    legalize_args(&mut sig.returns, &mut rets);
+
+    if current {
+        let ptr = Type::int(u16::from(bits)).unwrap();
+
+        // Add the link register as an argument and return value.
+        //
+        // The `jalr` instruction implementing a return can technically accept the return address
+        // in any register, but a micro-architecture with a return address predictor will only
+        // recognize it as a return if the address is in `x1`.
+        let link = AbiParam::special_reg(ptr, ArgumentPurpose::Link, GPR.unit(1));
+        sig.params.push(link);
+        sig.returns.push(link);
+    }
+}
+
+/// Get register class for a type appearing in a legalized signature.
+pub fn regclass_for_abi_type(ty: Type) -> RegClass {
+    if ty.is_float() {
+        FPR
+    } else {
+        GPR
+    }
+}
+
+pub fn allocatable_registers(_func: &ir::Function, isa_flags: &settings::Flags) -> RegisterSet {
+    let mut regs = RegisterSet::new();
+    regs.take(GPR, GPR.unit(0)); // Hard-wired 0.
+                                 // %x1 is the link register which is available for allocation.
+    regs.take(GPR, GPR.unit(2)); // Stack pointer.
+    regs.take(GPR, GPR.unit(3)); // Global pointer.
+    regs.take(GPR, GPR.unit(4)); // Thread pointer.
+                                 // TODO: %x8 is the frame pointer. Reserve it?
+
+    // Remove %x16 and up for RV32E.
+    if isa_flags.enable_e() {
+        for u in 16..32 {
+            regs.take(GPR, GPR.unit(u));
+        }
+    }
+
+    regs
+}
--- a/cranelift/codegen/src/isa/riscv/binemit.rs
+++ b/cranelift/codegen/src/isa/riscv/binemit.rs
@@ -0,0 +1,182 @@
+//! Emitting binary RISC-V machine code.
+
+use crate::binemit::{bad_encoding, CodeSink, Reloc};
+use crate::ir::{Function, Inst, InstructionData};
+use crate::isa::{RegUnit, StackBaseMask, StackRef};
+use crate::predicates::is_signed_int;
+use crate::regalloc::RegDiversions;
+use core::u32;
+
+include!(concat!(env!("OUT_DIR"), "/binemit-riscv.rs"));
+
+/// R-type instructions.
+///
+///   31     24  19  14     11 6
+///   funct7 rs2 rs1 funct3 rd opcode
+///       25  20  15     12  7      0
+///
+/// Encoding bits: `opcode[6:2] | (funct3 << 5) | (funct7 << 8)`.
+fn put_r<CS: CodeSink + ?Sized>(bits: u16, rs1: RegUnit, rs2: RegUnit, rd: RegUnit, sink: &mut CS) {
+    let bits = u32::from(bits);
+    let opcode5 = bits & 0x1f;
+    let funct3 = (bits >> 5) & 0x7;
+    let funct7 = (bits >> 8) & 0x7f;
+    let rs1 = u32::from(rs1) & 0x1f;
+    let rs2 = u32::from(rs2) & 0x1f;
+    let rd = u32::from(rd) & 0x1f;
+
+    // 0-6: opcode
+    let mut i = 0x3;
+    i |= opcode5 << 2;
+    i |= rd << 7;
+    i |= funct3 << 12;
+    i |= rs1 << 15;
+    i |= rs2 << 20;
+    i |= funct7 << 25;
+
+    sink.put4(i);
+}
+
+/// R-type instructions with a shift amount instead of rs2.
+///
+///   31     25    19  14     11 6
+///   funct7 shamt rs1 funct3 rd opcode
+///       25    20  15     12  7      0
+///
+/// Both funct7 and shamt contribute to bit 25. In RV64, shamt uses it for shifts > 31.
+///
+/// Encoding bits: `opcode[6:2] | (funct3 << 5) | (funct7 << 8)`.
+fn put_rshamt<CS: CodeSink + ?Sized>(
+    bits: u16,
+    rs1: RegUnit,
+    shamt: i64,
+    rd: RegUnit,
+    sink: &mut CS,
+) {
+    let bits = u32::from(bits);
+    let opcode5 = bits & 0x1f;
+    let funct3 = (bits >> 5) & 0x7;
+    let funct7 = (bits >> 8) & 0x7f;
+    let rs1 = u32::from(rs1) & 0x1f;
+    let shamt = shamt as u32 & 0x3f;
+    let rd = u32::from(rd) & 0x1f;
+
+    // 0-6: opcode
+    let mut i = 0x3;
+    i |= opcode5 << 2;
+    i |= rd << 7;
+    i |= funct3 << 12;
+    i |= rs1 << 15;
+    i |= shamt << 20;
+    i |= funct7 << 25;
+
+    sink.put4(i);
+}
+
+/// I-type instructions.
+///
+///   31  19  14     11 6
+///   imm rs1 funct3 rd opcode
+///    20  15     12  7      0
+///
+/// Encoding bits: `opcode[6:2] | (funct3 << 5)`
+fn put_i<CS: CodeSink + ?Sized>(bits: u16, rs1: RegUnit, imm: i64, rd: RegUnit, sink: &mut CS) {
+    let bits = u32::from(bits);
+    let opcode5 = bits & 0x1f;
+    let funct3 = (bits >> 5) & 0x7;
+    let rs1 = u32::from(rs1) & 0x1f;
+    let rd = u32::from(rd) & 0x1f;
+
+    // 0-6: opcode
+    let mut i = 0x3;
+    i |= opcode5 << 2;
+    i |= rd << 7;
+    i |= funct3 << 12;
+    i |= rs1 << 15;
+    i |= (imm << 20) as u32;
+
+    sink.put4(i);
+}
+
+/// U-type instructions.
+///
+///   31  11 6
+///   imm rd opcode
+///    12  7      0
+///
+/// Encoding bits: `opcode[6:2] | (funct3 << 5)`
+fn put_u<CS: CodeSink + ?Sized>(bits: u16, imm: i64, rd: RegUnit, sink: &mut CS) {
+    let bits = u32::from(bits);
+    let opcode5 = bits & 0x1f;
+    let rd = u32::from(rd) & 0x1f;
+
+    // 0-6: opcode
+    let mut i = 0x3;
+    i |= opcode5 << 2;
+    i |= rd << 7;
+    i |= imm as u32 & 0xfffff000;
+
+    sink.put4(i);
+}
+
+/// SB-type branch instructions.
+///
+///   31  24  19  14     11  6
+///   imm rs2 rs1 funct3 imm opcode
+///    25  20  15     12   7      0
+///
+/// Encoding bits: `opcode[6:2] | (funct3 << 5)`
+fn put_sb<CS: CodeSink + ?Sized>(bits: u16, imm: i64, rs1: RegUnit, rs2: RegUnit, sink: &mut CS) {
+    let bits = u32::from(bits);
+    let opcode5 = bits & 0x1f;
+    let funct3 = (bits >> 5) & 0x7;
+    let rs1 = u32::from(rs1) & 0x1f;
+    let rs2 = u32::from(rs2) & 0x1f;
+
+    debug_assert!(is_signed_int(imm, 13, 1), "SB out of range {:#x}", imm);
+    let imm = imm as u32;
+
+    // 0-6: opcode
+    let mut i = 0x3;
+    i |= opcode5 << 2;
+    i |= funct3 << 12;
+    i |= rs1 << 15;
+    i |= rs2 << 20;
+
+    // The displacement is completely hashed up.
+    i |= ((imm >> 11) & 0x1) << 7;
+    i |= ((imm >> 1) & 0xf) << 8;
+    i |= ((imm >> 5) & 0x3f) << 25;
+    i |= ((imm >> 12) & 0x1) << 31;
+
+    sink.put4(i);
+}
+
+/// UJ-type jump instructions.
+///
+///   31  11 6
+///   imm rd opcode
+///    12  7      0
+///
+/// Encoding bits: `opcode[6:2]`
+fn put_uj<CS: CodeSink + ?Sized>(bits: u16, imm: i64, rd: RegUnit, sink: &mut CS) {
+    let bits = u32::from(bits);
+    let opcode5 = bits & 0x1f;
+    let rd = u32::from(rd) & 0x1f;
+
+    debug_assert!(is_signed_int(imm, 21, 1), "UJ out of range {:#x}", imm);
+    let imm = imm as u32;
+
+    // 0-6: opcode
+    let mut i = 0x3;
+    i |= opcode5 << 2;
+    i |= rd << 7;
+
+    // The displacement is completely hashed up.
+    i |= imm & 0xff000;
+    i |= ((imm >> 11) & 0x1) << 20;
+    i |= ((imm >> 1) & 0x3ff) << 21;
+    i |= ((imm >> 20) & 0x1) << 31;
+
+    sink.put4(i);
+}
--- a/cranelift/codegen/src/isa/riscv/enc_tables.rs
+++ b/cranelift/codegen/src/isa/riscv/enc_tables.rs
@@ -0,0 +1,17 @@
+//! Encoding tables for RISC-V.
+
+use super::registers::*;
+use crate::ir;
+use crate::isa;
+use crate::isa::constraints::*;
+use crate::isa::enc_tables::*;
+use crate::isa::encoding::{base_size, RecipeSizing};
+
+// Include the generated encoding tables:
+// - `LEVEL1_RV32`
+// - `LEVEL1_RV64`
+// - `LEVEL2`
+// - `ENCLIST`
+// - `INFO`
+include!(concat!(env!("OUT_DIR"), "/encoding-riscv.rs"));
+include!(concat!(env!("OUT_DIR"), "/legalize-riscv.rs"));
--- a/cranelift/codegen/src/isa/riscv/mod.rs
+++ b/cranelift/codegen/src/isa/riscv/mod.rs
@@ -0,0 +1,281 @@
+//! RISC-V Instruction Set Architecture.
+
+mod abi;
+mod binemit;
+mod enc_tables;
+mod registers;
+pub mod settings;
+
+use super::super::settings as shared_settings;
+#[cfg(feature = "testing_hooks")]
+use crate::binemit::CodeSink;
+use crate::binemit::{emit_function, MemoryCodeSink};
+use crate::ir;
+use crate::isa::enc_tables::{self as shared_enc_tables, lookup_enclist, Encodings};
+use crate::isa::Builder as IsaBuilder;
+use crate::isa::{EncInfo, RegClass, RegInfo, TargetIsa};
+use crate::regalloc;
+use core::fmt;
+use std::boxed::Box;
+use target_lexicon::{PointerWidth, Triple};
+
+#[allow(dead_code)]
+struct Isa {
+    triple: Triple,
+    shared_flags: shared_settings::Flags,
+    isa_flags: settings::Flags,
+    cpumode: &'static [shared_enc_tables::Level1Entry<u16>],
+}
+
+/// Get an ISA builder for creating RISC-V targets.
+pub fn isa_builder(triple: Triple) -> IsaBuilder {
+    IsaBuilder {
+        triple,
+        setup: settings::builder(),
+        constructor: isa_constructor,
+    }
+}
+
+fn isa_constructor(
+    triple: Triple,
+    shared_flags: shared_settings::Flags,
+    builder: shared_settings::Builder,
+) -> Box<TargetIsa> {
+    let level1 = match triple.pointer_width().unwrap() {
+        PointerWidth::U16 => panic!("16-bit RISC-V unrecognized"),
+        PointerWidth::U32 => &enc_tables::LEVEL1_RV32[..],
+        PointerWidth::U64 => &enc_tables::LEVEL1_RV64[..],
+    };
+    Box::new(Isa {
+        triple,
+        isa_flags: settings::Flags::new(&shared_flags, builder),
+        shared_flags,
+        cpumode: level1,
+    })
+}
+
+impl TargetIsa for Isa {
+    fn name(&self) -> &'static str {
+        "riscv"
+    }
+
+    fn triple(&self) -> &Triple {
+        &self.triple
+    }
+
+    fn flags(&self) -> &shared_settings::Flags {
+        &self.shared_flags
+    }
+
+    fn register_info(&self) -> RegInfo {
+        registers::INFO.clone()
+    }
+
+    fn encoding_info(&self) -> EncInfo {
+        enc_tables::INFO.clone()
+    }
+
+    fn legal_encodings<'a>(
+        &'a self,
+        func: &'a ir::Function,
+        inst: &'a ir::InstructionData,
+        ctrl_typevar: ir::Type,
+    ) -> Encodings<'a> {
+        lookup_enclist(
+            ctrl_typevar,
+            inst,
+            func,
+            self.cpumode,
+            &enc_tables::LEVEL2[..],
+            &enc_tables::ENCLISTS[..],
+            &enc_tables::LEGALIZE_ACTIONS[..],
+            &enc_tables::RECIPE_PREDICATES[..],
+            &enc_tables::INST_PREDICATES[..],
+            self.isa_flags.predicate_view(),
+        )
+    }
+
+    fn legalize_signature(&self, sig: &mut ir::Signature, current: bool) {
+        abi::legalize_signature(sig, &self.triple, &self.isa_flags, current)
+    }
+
+    fn regclass_for_abi_type(&self, ty: ir::Type) -> RegClass {
+        abi::regclass_for_abi_type(ty)
+    }
+
+    fn allocatable_registers(&self, func: &ir::Function) -> regalloc::RegisterSet {
+        abi::allocatable_registers(func, &self.isa_flags)
+    }
+
+    #[cfg(feature = "testing_hooks")]
+    fn emit_inst(
+        &self,
+        func: &ir::Function,
+        inst: ir::Inst,
+        divert: &mut regalloc::RegDiversions,
+        sink: &mut CodeSink,
+    ) {
+        binemit::emit_inst(func, inst, divert, sink)
+    }
+
+    fn emit_function_to_memory(&self, func: &ir::Function, sink: &mut MemoryCodeSink) {
+        emit_function(func, binemit::emit_inst, sink)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::ir::{immediates, types};
+    use crate::ir::{Function, InstructionData, Opcode};
+    use crate::isa;
+    use crate::settings::{self, Configurable};
+    use core::str::FromStr;
+    use std::string::{String, ToString};
+    use target_lexicon::triple;
+
+    fn encstr(isa: &isa::TargetIsa, enc: Result<isa::Encoding, isa::Legalize>) -> String {
+        match enc {
+            Ok(e) => isa.encoding_info().display(e).to_string(),
+            Err(_) => "no encoding".to_string(),
+        }
+    }
+
+    #[test]
+    fn test_64bitenc() {
+        let shared_builder = settings::builder();
+        let shared_flags = settings::Flags::new(shared_builder);
+        let isa = isa::lookup(triple!("riscv64"))
+            .unwrap()
+            .finish(shared_flags);
+
+        let mut func = Function::new();
+        let ebb = func.dfg.make_ebb();
+        let arg64 = func.dfg.append_ebb_param(ebb, types::I64);
+        let arg32 = func.dfg.append_ebb_param(ebb, types::I32);
+
+        // Try to encode iadd_imm.i64 v1, -10.
+        let inst64 = InstructionData::BinaryImm {
+            opcode: Opcode::IaddImm,
+            arg: arg64,
+            imm: immediates::Imm64::new(-10),
+        };
+
+        // ADDI is I/0b00100
+        assert_eq!(
+            encstr(&*isa, isa.encode(&func, &inst64, types::I64)),
+            "Ii#04"
+        );
+
+        // Try to encode iadd_imm.i64 v1, -10000.
+        let inst64_large = InstructionData::BinaryImm {
+            opcode: Opcode::IaddImm,
+            arg: arg64,
+            imm: immediates::Imm64::new(-10000),
+        };
+
+        // Immediate is out of range for ADDI.
+        assert!(isa.encode(&func, &inst64_large, types::I64).is_err());
+
+        // Create an iadd_imm.i32 which is encodable in RV64.
+        let inst32 = InstructionData::BinaryImm {
+            opcode: Opcode::IaddImm,
+            arg: arg32,
+            imm: immediates::Imm64::new(10),
+        };
+
+        // ADDIW is I/0b00110
+        assert_eq!(
+            encstr(&*isa, isa.encode(&func, &inst32, types::I32)),
+            "Ii#06"
+        );
+    }
+
+    // Same as above, but for RV32.
+    #[test]
+    fn test_32bitenc() {
+        let shared_builder = settings::builder();
+        let shared_flags = settings::Flags::new(shared_builder);
+        let isa = isa::lookup(triple!("riscv32"))
+            .unwrap()
+            .finish(shared_flags);
+
+        let mut func = Function::new();
+        let ebb = func.dfg.make_ebb();
+        let arg64 = func.dfg.append_ebb_param(ebb, types::I64);
+        let arg32 = func.dfg.append_ebb_param(ebb, types::I32);
+
+        // Try to encode iadd_imm.i64 v1, -10.
+        let inst64 = InstructionData::BinaryImm {
+            opcode: Opcode::IaddImm,
+            arg: arg64,
+            imm: immediates::Imm64::new(-10),
+        };
+
+        // In 32-bit mode, an i64 bit add should be narrowed.
+        assert!(isa.encode(&func, &inst64, types::I64).is_err());
+
+        // Try to encode iadd_imm.i64 v1, -10000.
+        let inst64_large = InstructionData::BinaryImm {
+            opcode: Opcode::IaddImm,
+            arg: arg64,
+            imm: immediates::Imm64::new(-10000),
+        };
+
+        // In 32-bit mode, an i64 bit add should be narrowed.
+        assert!(isa.encode(&func, &inst64_large, types::I64).is_err());
+
+        // Create an iadd_imm.i32 which is encodable in RV32.
+        let inst32 = InstructionData::BinaryImm {
+            opcode: Opcode::IaddImm,
+            arg: arg32,
+            imm: immediates::Imm64::new(10),
+        };
+
+        // ADDI is I/0b00100
+        assert_eq!(
+            encstr(&*isa, isa.encode(&func, &inst32, types::I32)),
+            "Ii#04"
+        );
+
+        // Create an imul.i32 which is encodable in RV32, but only when use_m is true.
+        let mul32 = InstructionData::Binary {
+            opcode: Opcode::Imul,
+            args: [arg32, arg32],
+        };
+
+        assert!(isa.encode(&func, &mul32, types::I32).is_err());
+    }
+
+    #[test]
+    fn test_rv32m() {
+        let shared_builder = settings::builder();
+        let shared_flags = settings::Flags::new(shared_builder);
+
+        // Set the supports_m stting which in turn enables the use_m predicate that unlocks
+        // encodings for imul.
+        let mut isa_builder = isa::lookup(triple!("riscv32")).unwrap();
+        isa_builder.enable("supports_m").unwrap();
+
+        let isa = isa_builder.finish(shared_flags);
+
+        let mut func = Function::new();
+        let ebb = func.dfg.make_ebb();
+        let arg32 = func.dfg.append_ebb_param(ebb, types::I32);
+
+        // Create an imul.i32 which is encodable in RV32M.
+        let mul32 = InstructionData::Binary {
+            opcode: Opcode::Imul,
+            args: [arg32, arg32],
+        };
+        assert_eq!(
+            encstr(&*isa, isa.encode(&func, &mul32, types::I32)),
+            "R#10c"
+        );
+    }
+}
+
+impl fmt::Display for Isa {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{}\n{}", self.shared_flags, self.isa_flags)
+    }
+}
--- a/cranelift/codegen/src/isa/riscv/registers.rs
+++ b/cranelift/codegen/src/isa/riscv/registers.rs
@@ -0,0 +1,50 @@
+//! RISC-V register descriptions.
+
+use crate::isa::registers::{RegBank, RegClass, RegClassData, RegInfo, RegUnit};
+
+include!(concat!(env!("OUT_DIR"), "/registers-riscv.rs"));
+
+#[cfg(test)]
+mod tests {
+    use super::{FPR, GPR, INFO};
+    use crate::isa::RegUnit;
+    use std::string::{String, ToString};
+
+    #[test]
+    fn unit_encodings() {
+        assert_eq!(INFO.parse_regunit("x0"), Some(0));
+        assert_eq!(INFO.parse_regunit("x31"), Some(31));
+        assert_eq!(INFO.parse_regunit("f0"), Some(32));
+        assert_eq!(INFO.parse_regunit("f31"), Some(63));
+
+        assert_eq!(INFO.parse_regunit("x32"), None);
+        assert_eq!(INFO.parse_regunit("f32"), None);
+    }
+
+    #[test]
+    fn unit_names() {
+        fn uname(ru: RegUnit) -> String {
+            INFO.display_regunit(ru).to_string()
+        }
+
+        assert_eq!(uname(0), "%x0");
+        assert_eq!(uname(1), "%x1");
+        assert_eq!(uname(31), "%x31");
+        assert_eq!(uname(32), "%f0");
+        assert_eq!(uname(33), "%f1");
+        assert_eq!(uname(63), "%f31");
+        assert_eq!(uname(64), "%INVALID64");
+    }
+
+    #[test]
+    fn classes() {
+        assert!(GPR.contains(GPR.unit(0)));
+        assert!(GPR.contains(GPR.unit(31)));
+        assert!(!FPR.contains(GPR.unit(0)));
+        assert!(!FPR.contains(GPR.unit(31)));
+        assert!(!GPR.contains(FPR.unit(0)));
+        assert!(!GPR.contains(FPR.unit(31)));
+        assert!(FPR.contains(FPR.unit(0)));
+        assert!(FPR.contains(FPR.unit(31)));
+    }
+}
--- a/cranelift/codegen/src/isa/riscv/settings.rs
+++ b/cranelift/codegen/src/isa/riscv/settings.rs
@@ -0,0 +1,54 @@
+//! RISC-V Settings.
+
+use crate::settings::{self, detail, Builder};
+use core::fmt;
+
+// Include code generated by `cranelift-codegen/meta-python/gen_settings.py`. This file contains a public
+// `Flags` struct with an impl for all of the settings defined in
+// `cranelift-codegen/meta-python/isa/riscv/settings.py`.
+include!(concat!(env!("OUT_DIR"), "/settings-riscv.rs"));
+
+#[cfg(test)]
+mod tests {
+    use super::{builder, Flags};
+    use crate::settings::{self, Configurable};
+    use std::string::ToString;
+
+    #[test]
+    fn display_default() {
+        let shared = settings::Flags::new(settings::builder());
+        let b = builder();
+        let f = Flags::new(&shared, b);
+        assert_eq!(
+            f.to_string(),
+            "[riscv]\n\
+             supports_m = false\n\
+             supports_a = false\n\
+             supports_f = false\n\
+             supports_d = false\n\
+             enable_m = true\n\
+             enable_e = false\n"
+        );
+        // Predicates are not part of the Display output.
+        assert_eq!(f.full_float(), false);
+    }
+
+    #[test]
+    fn predicates() {
+        let shared = settings::Flags::new(settings::builder());
+        let mut b = builder();
+        b.enable("supports_f").unwrap();
+        b.enable("supports_d").unwrap();
+        let f = Flags::new(&shared, b);
+        assert_eq!(f.full_float(), true);
+
+        let mut sb = settings::builder();
+        sb.set("enable_simd", "false").unwrap();
+        let shared = settings::Flags::new(sb);
+        let mut b = builder();
+        b.enable("supports_f").unwrap();
+        b.enable("supports_d").unwrap();
+        let f = Flags::new(&shared, b);
+        assert_eq!(f.full_float(), false);
+    }
+}
--- a/cranelift/codegen/src/isa/stack.rs
+++ b/cranelift/codegen/src/isa/stack.rs
@@ -0,0 +1,94 @@
+//! Low-level details of stack accesses.
+//!
+//! The `ir::StackSlots` type deals with stack slots and stack frame layout. The `StackRef` type
+//! defined in this module expresses the low-level details of accessing a stack slot from an
+//! encoded instruction.
+
+use crate::ir::stackslot::{StackOffset, StackSlotKind, StackSlots};
+use crate::ir::StackSlot;
+
+/// A method for referencing a stack slot in the current stack frame.
+///
+/// Stack slots are addressed with a constant offset from a base register. The base can be the
+/// stack pointer, the frame pointer, or (in the future) a zone register pointing to an inner zone
+/// of a large stack frame.
+#[derive(Clone, Copy, Debug)]
+pub struct StackRef {
+    /// The base register to use for addressing.
+    pub base: StackBase,
+
+    /// Immediate offset from the base register to the first byte of the stack slot.
+    pub offset: StackOffset,
+}
+
+impl StackRef {
+    /// Get a reference to the stack slot `ss` using one of the base pointers in `mask`.
+    pub fn masked(ss: StackSlot, mask: StackBaseMask, frame: &StackSlots) -> Option<Self> {
+        // Try an SP-relative reference.
+        if mask.contains(StackBase::SP) {
+            return Some(Self::sp(ss, frame));
+        }
+
+        // No reference possible with this mask.
+        None
+    }
+
+    /// Get a reference to `ss` using the stack pointer as a base.
+    pub fn sp(ss: StackSlot, frame: &StackSlots) -> Self {
+        let size = frame
+            .frame_size
+            .expect("Stack layout must be computed before referencing stack slots");
+        let slot = &frame[ss];
+        let offset = if slot.kind == StackSlotKind::OutgoingArg {
+            // Outgoing argument slots have offsets relative to our stack pointer.
+            slot.offset.unwrap()
+        } else {
+            // All other slots have offsets relative to our caller's stack frame.
+            // Offset where SP is pointing. (All ISAs have stacks growing downwards.)
+            let sp_offset = -(size as StackOffset);
+            slot.offset.unwrap() - sp_offset
+        };
+        Self {
+            base: StackBase::SP,
+            offset,
+        }
+    }
+}
+
+/// Generic base register for referencing stack slots.
+///
+/// Most ISAs have a stack pointer and an optional frame pointer, so provide generic names for
+/// those two base pointers.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum StackBase {
+    /// Use the stack pointer.
+    SP = 0,
+
+    /// Use the frame pointer (if one is present).
+    FP = 1,
+
+    /// Use an explicit zone pointer in a general-purpose register.
+    ///
+    /// This feature is not yet implemented.
+    Zone = 2,
+}
+
+/// Bit mask of supported stack bases.
+///
+/// Many instruction encodings can use different base registers while others only work with the
+/// stack pointer, say. A `StackBaseMask` is a bit mask of supported stack bases for a given
+/// instruction encoding.
+///
+/// This behaves like a set of `StackBase` variants.
+///
+/// The internal representation as a `u8` is public because stack base masks are used in constant
+/// tables generated from the Python encoding definitions.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub struct StackBaseMask(pub u8);
+
+impl StackBaseMask {
+    /// Check if this mask contains the `base` variant.
+    pub fn contains(self, base: StackBase) -> bool {
+        self.0 & (1 << base as usize) != 0
+    }
+}
--- a/cranelift/codegen/src/isa/x86/abi.rs
+++ b/cranelift/codegen/src/isa/x86/abi.rs
@@ -0,0 +1,579 @@
+//! x86 ABI implementation.
+
+use super::registers::{FPR, GPR, RU};
+use crate::abi::{legalize_args, ArgAction, ArgAssigner, ValueConversion};
+use crate::cursor::{Cursor, CursorPosition, EncCursor};
+use crate::ir;
+use crate::ir::immediates::Imm64;
+use crate::ir::stackslot::{StackOffset, StackSize};
+use crate::ir::{
+    get_probestack_funcref, AbiParam, ArgumentExtension, ArgumentLoc, ArgumentPurpose, InstBuilder,
+    ValueLoc,
+};
+use crate::isa::{CallConv, RegClass, RegUnit, TargetIsa};
+use crate::regalloc::RegisterSet;
+use crate::result::CodegenResult;
+use crate::stack_layout::layout_stack;
+use core::i32;
+use target_lexicon::{PointerWidth, Triple};
+
+/// Argument registers for x86-64
+static ARG_GPRS: [RU; 6] = [RU::rdi, RU::rsi, RU::rdx, RU::rcx, RU::r8, RU::r9];
+
+/// Return value registers.
+static RET_GPRS: [RU; 3] = [RU::rax, RU::rdx, RU::rcx];
+
+/// Argument registers for x86-64, when using windows fastcall
+static ARG_GPRS_WIN_FASTCALL_X64: [RU; 4] = [RU::rcx, RU::rdx, RU::r8, RU::r9];
+
+/// Return value registers for x86-64, when using windows fastcall
+static RET_GPRS_WIN_FASTCALL_X64: [RU; 1] = [RU::rax];
+
+struct Args {
+    pointer_bytes: u8,
+    pointer_bits: u8,
+    pointer_type: ir::Type,
+    gpr: &'static [RU],
+    gpr_used: usize,
+    fpr_limit: usize,
+    fpr_used: usize,
+    offset: u32,
+    call_conv: CallConv,
+}
+
+impl Args {
+    fn new(bits: u8, gpr: &'static [RU], fpr_limit: usize, call_conv: CallConv) -> Self {
+        let offset = if let CallConv::WindowsFastcall = call_conv {
+            // [1] "The caller is responsible for allocating space for parameters to the callee,
+            // and must always allocate sufficient space to store four register parameters"
+            32
+        } else {
+            0
+        };
+
+        Self {
+            pointer_bytes: bits / 8,
+            pointer_bits: bits,
+            pointer_type: ir::Type::int(u16::from(bits)).unwrap(),
+            gpr,
+            gpr_used: 0,
+            fpr_limit,
+            fpr_used: 0,
+            offset,
+            call_conv,
+        }
+    }
+}
+
+impl ArgAssigner for Args {
+    fn assign(&mut self, arg: &AbiParam) -> ArgAction {
+        let ty = arg.value_type;
+
+        // Check for a legal type.
+        // We don't support SIMD yet, so break all vectors down.
+        if ty.is_vector() {
+            return ValueConversion::VectorSplit.into();
+        }
+
+        // Large integers and booleans are broken down to fit in a register.
+        if !ty.is_float() && ty.bits() > u16::from(self.pointer_bits) {
+            return ValueConversion::IntSplit.into();
+        }
+
+        // Small integers are extended to the size of a pointer register.
+        if ty.is_int() && ty.bits() < u16::from(self.pointer_bits) {
+            match arg.extension {
+                ArgumentExtension::None => {}
+                ArgumentExtension::Uext => return ValueConversion::Uext(self.pointer_type).into(),
+                ArgumentExtension::Sext => return ValueConversion::Sext(self.pointer_type).into(),
+            }
+        }
+
+        // Handle special-purpose arguments.
+        if ty.is_int() && self.call_conv == CallConv::Baldrdash {
+            match arg.purpose {
+                // This is SpiderMonkey's `WasmTlsReg`.
+                ArgumentPurpose::VMContext => {
+                    return ArgumentLoc::Reg(if self.pointer_bits == 64 {
+                        RU::r14
+                    } else {
+                        RU::rsi
+                    } as RegUnit)
+                    .into();
+                }
+                // This is SpiderMonkey's `WasmTableCallSigReg`.
+                ArgumentPurpose::SignatureId => return ArgumentLoc::Reg(RU::r10 as RegUnit).into(),
+                _ => {}
+            }
+        }
+
+        // Try to use a GPR.
+        if !ty.is_float() && self.gpr_used < self.gpr.len() {
+            let reg = self.gpr[self.gpr_used] as RegUnit;
+            self.gpr_used += 1;
+            return ArgumentLoc::Reg(reg).into();
+        }
+
+        // Try to use an FPR.
+        if ty.is_float() && self.fpr_used < self.fpr_limit {
+            let reg = FPR.unit(self.fpr_used);
+            self.fpr_used += 1;
+            return ArgumentLoc::Reg(reg).into();
+        }
+
+        // Assign a stack location.
+        let loc = ArgumentLoc::Stack(self.offset as i32);
+        self.offset += u32::from(self.pointer_bytes);
+        debug_assert!(self.offset <= i32::MAX as u32);
+        loc.into()
+    }
+}
+
+/// Legalize `sig`.
+pub fn legalize_signature(sig: &mut ir::Signature, triple: &Triple, _current: bool) {
+    let bits;
+    let mut args;
+
+    match triple.pointer_width().unwrap() {
+        PointerWidth::U16 => panic!(),
+        PointerWidth::U32 => {
+            bits = 32;
+            args = Args::new(bits, &[], 0, sig.call_conv);
+        }
+        PointerWidth::U64 => {
+            bits = 64;
+            args = if sig.call_conv == CallConv::WindowsFastcall {
+                Args::new(bits, &ARG_GPRS_WIN_FASTCALL_X64[..], 4, sig.call_conv)
+            } else {
+                Args::new(bits, &ARG_GPRS[..], 8, sig.call_conv)
+            };
+        }
+    }
+
+    legalize_args(&mut sig.params, &mut args);
+
+    let regs = if sig.call_conv == CallConv::WindowsFastcall {
+        &RET_GPRS_WIN_FASTCALL_X64[..]
+    } else {
+        &RET_GPRS[..]
+    };
+
+    let mut rets = Args::new(bits, regs, 2, sig.call_conv);
+    legalize_args(&mut sig.returns, &mut rets);
+}
+
+/// Get register class for a type appearing in a legalized signature.
+pub fn regclass_for_abi_type(ty: ir::Type) -> RegClass {
+    if ty.is_int() || ty.is_bool() {
+        GPR
+    } else {
+        FPR
+    }
+}
+
+/// Get the set of allocatable registers for `func`.
+pub fn allocatable_registers(_func: &ir::Function, triple: &Triple) -> RegisterSet {
+    let mut regs = RegisterSet::new();
+    regs.take(GPR, RU::rsp as RegUnit);
+    regs.take(GPR, RU::rbp as RegUnit);
+
+    // 32-bit arch only has 8 registers.
+    if triple.pointer_width().unwrap() != PointerWidth::U64 {
+        for i in 8..16 {
+            regs.take(GPR, GPR.unit(i));
+            regs.take(FPR, FPR.unit(i));
+        }
+    }
+
+    regs
+}
+
+/// Get the set of callee-saved registers.
+fn callee_saved_gprs(isa: &TargetIsa, call_conv: CallConv) -> &'static [RU] {
+    match isa.triple().pointer_width().unwrap() {
+        PointerWidth::U16 => panic!(),
+        PointerWidth::U32 => &[RU::rbx, RU::rsi, RU::rdi],
+        PointerWidth::U64 => {
+            if call_conv == CallConv::WindowsFastcall {
+                // "registers RBX, RBP, RDI, RSI, RSP, R12, R13, R14, R15 are considered nonvolatile
+                //  and must be saved and restored by a function that uses them."
+                // as per https://msdn.microsoft.com/en-us/library/6t169e9c.aspx
+                // RSP & RSB are not listed below, since they are restored automatically during
+                // a function call. If that wasn't the case, function calls (RET) would not work.
+                &[
+                    RU::rbx,
+                    RU::rdi,
+                    RU::rsi,
+                    RU::r12,
+                    RU::r13,
+                    RU::r14,
+                    RU::r15,
+                ]
+            } else {
+                &[RU::rbx, RU::r12, RU::r13, RU::r14, RU::r15]
+            }
+        }
+    }
+}
+
+/// Get the set of callee-saved registers that are used.
+fn callee_saved_gprs_used(isa: &TargetIsa, func: &ir::Function) -> RegisterSet {
+    let mut all_callee_saved = RegisterSet::empty();
+    for reg in callee_saved_gprs(isa, func.signature.call_conv) {
+        all_callee_saved.free(GPR, *reg as RegUnit);
+    }
+
+    let mut used = RegisterSet::empty();
+    for value_loc in func.locations.values() {
+        // Note that `value_loc` here contains only a single unit of a potentially multi-unit
+        // register. We don't use registers that overlap each other in the x86 ISA, but in others
+        // we do. So this should not be blindly reused.
+        if let ValueLoc::Reg(ru) = *value_loc {
+            if !used.is_avail(GPR, ru) {
+                used.free(GPR, ru);
+            }
+        }
+    }
+
+    // regmove and regfill instructions may temporarily divert values into other registers,
+    // and these are not reflected in `func.locations`. Scan the function for such instructions
+    // and note which callee-saved registers they use.
+    //
+    // TODO: Consider re-evaluating how regmove/regfill/regspill work and whether it's possible
+    // to avoid this step.
+    for ebb in &func.layout {
+        for inst in func.layout.ebb_insts(ebb) {
+            match func.dfg[inst] {
+                ir::instructions::InstructionData::RegMove { dst, .. }
+                | ir::instructions::InstructionData::RegFill { dst, .. } => {
+                    if !used.is_avail(GPR, dst) {
+                        used.free(GPR, dst);
+                    }
+                }
+                _ => (),
+            }
+        }
+    }
+
+    used.intersect(&all_callee_saved);
+    used
+}
+
+pub fn prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> CodegenResult<()> {
+    match func.signature.call_conv {
+        // For now, just translate fast and cold as system_v.
+        CallConv::Fast | CallConv::Cold | CallConv::SystemV => {
+            system_v_prologue_epilogue(func, isa)
+        }
+        CallConv::WindowsFastcall => fastcall_prologue_epilogue(func, isa),
+        CallConv::Baldrdash => baldrdash_prologue_epilogue(func, isa),
+        CallConv::Probestack => unimplemented!("probestack calling convention"),
+    }
+}
+
+fn baldrdash_prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> CodegenResult<()> {
+    debug_assert!(
+        !isa.flags().probestack_enabled(),
+        "baldrdash does not expect cranelift to emit stack probes"
+    );
+
+    // Baldrdash on 32-bit x86 always aligns its stack pointer to 16 bytes.
+    let stack_align = 16;
+    let word_size = StackSize::from(isa.pointer_bytes());
+    let bytes = StackSize::from(isa.flags().baldrdash_prologue_words()) * word_size;
+
+    let mut ss = ir::StackSlotData::new(ir::StackSlotKind::IncomingArg, bytes);
+    ss.offset = Some(-(bytes as StackOffset));
+    func.stack_slots.push(ss);
+
+    layout_stack(&mut func.stack_slots, stack_align)?;
+    Ok(())
+}
+
+/// Implementation of the fastcall-based Win64 calling convention described at [1]
+/// [1] https://msdn.microsoft.com/en-us/library/ms235286.aspx
+fn fastcall_prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> CodegenResult<()> {
+    if isa.triple().pointer_width().unwrap() != PointerWidth::U64 {
+        panic!("TODO: windows-fastcall: x86-32 not implemented yet");
+    }
+
+    // [1] "The primary exceptions are the stack pointer and malloc or alloca memory,
+    // which are aligned to 16 bytes in order to aid performance"
+    let stack_align = 16;
+
+    let word_size = isa.pointer_bytes() as usize;
+    let reg_type = isa.pointer_type();
+
+    let csrs = callee_saved_gprs_used(isa, func);
+
+    // [1] "Space is allocated on the call stack as a shadow store for callees to save"
+    // This shadow store contains the parameters which are passed through registers (ARG_GPRS)
+    // and is eventually used by the callee to save & restore the values of the arguments.
+    //
+    // [2] https://blogs.msdn.microsoft.com/oldnewthing/20110302-00/?p=11333
+    // "Although the x64 calling convention reserves spill space for parameters,
+    //  you don’t have to use them as such"
+    //
+    // The reserved stack area is composed of:
+    //   return address + frame pointer + all callee-saved registers + shadow space
+    //
+    // Pushing the return address is an implicit function of the `call`
+    // instruction. Each of the others we will then push explicitly. Then we
+    // will adjust the stack pointer to make room for the rest of the required
+    // space for this frame.
+    const SHADOW_STORE_SIZE: i32 = 32;
+    let csr_stack_size = ((csrs.iter(GPR).len() + 2) * word_size) as i32;
+
+    // TODO: eventually use the 32 bytes (shadow store) as spill slot. This currently doesn't work
+    //       since cranelift does not support spill slots before incoming args
+
+    func.create_stack_slot(ir::StackSlotData {
+        kind: ir::StackSlotKind::IncomingArg,
+        size: csr_stack_size as u32,
+        offset: Some(-(SHADOW_STORE_SIZE + csr_stack_size)),
+    });
+
+    let total_stack_size = layout_stack(&mut func.stack_slots, stack_align)? as i32;
+    let local_stack_size = i64::from(total_stack_size - csr_stack_size);
+
+    // Add CSRs to function signature
+    let fp_arg = ir::AbiParam::special_reg(
+        reg_type,
+        ir::ArgumentPurpose::FramePointer,
+        RU::rbp as RegUnit,
+    );
+    func.signature.params.push(fp_arg);
+    func.signature.returns.push(fp_arg);
+
+    for csr in csrs.iter(GPR) {
+        let csr_arg = ir::AbiParam::special_reg(reg_type, ir::ArgumentPurpose::CalleeSaved, csr);
+        func.signature.params.push(csr_arg);
+        func.signature.returns.push(csr_arg);
+    }
+
+    // Set up the cursor and insert the prologue
+    let entry_ebb = func.layout.entry_block().expect("missing entry block");
+    let mut pos = EncCursor::new(func, isa).at_first_insertion_point(entry_ebb);
+    insert_common_prologue(&mut pos, local_stack_size, reg_type, &csrs, isa);
+
+    // Reset the cursor and insert the epilogue
+    let mut pos = pos.at_position(CursorPosition::Nowhere);
+    insert_common_epilogues(&mut pos, local_stack_size, reg_type, &csrs);
+
+    Ok(())
+}
+
+/// Insert a System V-compatible prologue and epilogue.
+fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> CodegenResult<()> {
+    // The original 32-bit x86 ELF ABI had a 4-byte aligned stack pointer, but
+    // newer versions use a 16-byte aligned stack pointer.
+    let stack_align = 16;
+    let pointer_width = isa.triple().pointer_width().unwrap();
+    let word_size = pointer_width.bytes() as usize;
+    let reg_type = ir::Type::int(u16::from(pointer_width.bits())).unwrap();
+
+    let csrs = callee_saved_gprs_used(isa, func);
+
+    // The reserved stack area is composed of:
+    //   return address + frame pointer + all callee-saved registers
+    //
+    // Pushing the return address is an implicit function of the `call`
+    // instruction. Each of the others we will then push explicitly. Then we
+    // will adjust the stack pointer to make room for the rest of the required
+    // space for this frame.
+    let csr_stack_size = ((csrs.iter(GPR).len() + 2) * word_size) as i32;
+    func.create_stack_slot(ir::StackSlotData {
+        kind: ir::StackSlotKind::IncomingArg,
+        size: csr_stack_size as u32,
+        offset: Some(-csr_stack_size),
+    });
+
+    let total_stack_size = layout_stack(&mut func.stack_slots, stack_align)? as i32;
+    let local_stack_size = i64::from(total_stack_size - csr_stack_size);
+
+    // Add CSRs to function signature
+    let fp_arg = ir::AbiParam::special_reg(
+        reg_type,
+        ir::ArgumentPurpose::FramePointer,
+        RU::rbp as RegUnit,
+    );
+    func.signature.params.push(fp_arg);
+    func.signature.returns.push(fp_arg);
+
+    for csr in csrs.iter(GPR) {
+        let csr_arg = ir::AbiParam::special_reg(reg_type, ir::ArgumentPurpose::CalleeSaved, csr);
+        func.signature.params.push(csr_arg);
+        func.signature.returns.push(csr_arg);
+    }
+
+    // Set up the cursor and insert the prologue
+    let entry_ebb = func.layout.entry_block().expect("missing entry block");
+    let mut pos = EncCursor::new(func, isa).at_first_insertion_point(entry_ebb);
+    insert_common_prologue(&mut pos, local_stack_size, reg_type, &csrs, isa);
+
+    // Reset the cursor and insert the epilogue
+    let mut pos = pos.at_position(CursorPosition::Nowhere);
+    insert_common_epilogues(&mut pos, local_stack_size, reg_type, &csrs);
+
+    Ok(())
+}
+
+/// Insert the prologue for a given function.
+/// This is used by common calling conventions such as System V.
+fn insert_common_prologue(
+    pos: &mut EncCursor,
+    stack_size: i64,
+    reg_type: ir::types::Type,
+    csrs: &RegisterSet,
+    isa: &TargetIsa,
+) {
+    if stack_size > 0 {
+        // Check if there is a special stack limit parameter. If so insert stack check.
+        if let Some(stack_limit_arg) = pos.func.special_param(ArgumentPurpose::StackLimit) {
+            // Total stack size is the size of all stack area used by the function, including
+            // pushed CSRs, frame pointer.
+            // Also, the size of a return address, implicitly pushed by a x86 `call` instruction,
+            // also should be accounted for.
+            // TODO: Check if the function body actually contains a `call` instruction.
+            let word_size = isa.pointer_bytes();
+            let total_stack_size = (csrs.iter(GPR).len() + 1 + 1) as i64 * word_size as i64;
+
+            insert_stack_check(pos, total_stack_size, stack_limit_arg);
+        }
+    }
+
+    // Append param to entry EBB
+    let ebb = pos.current_ebb().expect("missing ebb under cursor");
+    let fp = pos.func.dfg.append_ebb_param(ebb, reg_type);
+    pos.func.locations[fp] = ir::ValueLoc::Reg(RU::rbp as RegUnit);
+
+    pos.ins().x86_push(fp);
+    pos.ins()
+        .copy_special(RU::rsp as RegUnit, RU::rbp as RegUnit);
+
+    for reg in csrs.iter(GPR) {
+        // Append param to entry EBB
+        let csr_arg = pos.func.dfg.append_ebb_param(ebb, reg_type);
+
+        // Assign it a location
+        pos.func.locations[csr_arg] = ir::ValueLoc::Reg(reg);
+
+        // Remember it so we can push it momentarily
+        pos.ins().x86_push(csr_arg);
+    }
+
+    // Allocate stack frame storage.
+    if stack_size > 0 {
+        if isa.flags().probestack_enabled()
+            && stack_size > (1 << isa.flags().probestack_size_log2())
+        {
+            // Emit a stack probe.
+            let rax = RU::rax as RegUnit;
+            let rax_val = ir::ValueLoc::Reg(rax);
+
+            // The probestack function expects its input in %rax.
+            let arg = pos.ins().iconst(reg_type, stack_size);
+            pos.func.locations[arg] = rax_val;
+
+            // Call the probestack function.
+            let callee = get_probestack_funcref(pos.func, reg_type, rax, isa);
+
+            // Make the call.
+            let call = if !isa.flags().is_pic()
+                && isa.triple().pointer_width().unwrap() == PointerWidth::U64
+                && !pos.func.dfg.ext_funcs[callee].colocated
+            {
+                // 64-bit non-PIC non-colocated calls need to be legalized to call_indirect.
+                // Use r11 as it may be clobbered under all supported calling conventions.
+                let r11 = RU::r11 as RegUnit;
+                let sig = pos.func.dfg.ext_funcs[callee].signature;
+                let addr = pos.ins().func_addr(reg_type, callee);
+                pos.func.locations[addr] = ir::ValueLoc::Reg(r11);
+                pos.ins().call_indirect(sig, addr, &[arg])
+            } else {
+                // Otherwise just do a normal call.
+                pos.ins().call(callee, &[arg])
+            };
+
+            // If the probestack function doesn't adjust sp, do it ourselves.
+            if !isa.flags().probestack_func_adjusts_sp() {
+                let result = pos.func.dfg.inst_results(call)[0];
+                pos.func.locations[result] = rax_val;
+                pos.ins().adjust_sp_down(result);
+            }
+        } else {
+            // Simply decrement the stack pointer.
+            pos.ins().adjust_sp_down_imm(Imm64::new(stack_size));
+        }
+    }
+}
+
+/// Insert a check that generates a trap if the stack pointer goes
+/// below a value in `stack_limit_arg`.
+fn insert_stack_check(pos: &mut EncCursor, stack_size: i64, stack_limit_arg: ir::Value) {
+    use crate::ir::condcodes::IntCC;
+
+    // Copy `stack_limit_arg` into a %rax and use it for calculating
+    // a SP threshold.
+    let stack_limit_copy = pos.ins().copy(stack_limit_arg);
+    pos.func.locations[stack_limit_copy] = ir::ValueLoc::Reg(RU::rax as RegUnit);
+    let sp_threshold = pos.ins().iadd_imm(stack_limit_copy, stack_size);
+    pos.func.locations[sp_threshold] = ir::ValueLoc::Reg(RU::rax as RegUnit);
+
+    // If the stack pointer currently reaches the SP threshold or below it then after opening
+    // the current stack frame, the current stack pointer will reach the limit.
+    let cflags = pos.ins().ifcmp_sp(sp_threshold);
+    pos.func.locations[cflags] = ir::ValueLoc::Reg(RU::rflags as RegUnit);
+    pos.ins().trapif(
+        IntCC::UnsignedGreaterThanOrEqual,
+        cflags,
+        ir::TrapCode::StackOverflow,
+    );
+}
+
+/// Find all `return` instructions and insert epilogues before them.
+fn insert_common_epilogues(
+    pos: &mut EncCursor,
+    stack_size: i64,
+    reg_type: ir::types::Type,
+    csrs: &RegisterSet,
+) {
+    while let Some(ebb) = pos.next_ebb() {
+        pos.goto_last_inst(ebb);
+        if let Some(inst) = pos.current_inst() {
+            if pos.func.dfg[inst].opcode().is_return() {
+                insert_common_epilogue(inst, stack_size, pos, reg_type, csrs);
+            }
+        }
+    }
+}
+
+/// Insert an epilogue given a specific `return` instruction.
+/// This is used by common calling conventions such as System V.
+fn insert_common_epilogue(
+    inst: ir::Inst,
+    stack_size: i64,
+    pos: &mut EncCursor,
+    reg_type: ir::types::Type,
+    csrs: &RegisterSet,
+) {
+    if stack_size > 0 {
+        pos.ins().adjust_sp_up_imm(Imm64::new(stack_size));
+    }
+
+    // Pop all the callee-saved registers, stepping backward each time to
+    // preserve the correct order.
+    let fp_ret = pos.ins().x86_pop(reg_type);
+    pos.prev_inst();
+
+    pos.func.locations[fp_ret] = ir::ValueLoc::Reg(RU::rbp as RegUnit);
+    pos.func.dfg.append_inst_arg(inst, fp_ret);
+
+    for reg in csrs.iter(GPR) {
+        let csr_ret = pos.ins().x86_pop(reg_type);
+        pos.prev_inst();
+
+        pos.func.locations[csr_ret] = ir::ValueLoc::Reg(reg);
+        pos.func.dfg.append_inst_arg(inst, csr_ret);
+    }
+}
--- a/cranelift/codegen/src/isa/x86/binemit.rs
+++ b/cranelift/codegen/src/isa/x86/binemit.rs
@@ -0,0 +1,342 @@
+//! Emitting binary x86 machine code.
+
+use super::enc_tables::{needs_offset, needs_sib_byte};
+use super::registers::RU;
+use crate::binemit::{bad_encoding, CodeSink, Reloc};
+use crate::ir::condcodes::{CondCode, FloatCC, IntCC};
+use crate::ir::{Ebb, Function, Inst, InstructionData, JumpTable, Opcode, TrapCode};
+use crate::isa::{RegUnit, StackBase, StackBaseMask, StackRef};
+use crate::regalloc::RegDiversions;
+
+include!(concat!(env!("OUT_DIR"), "/binemit-x86.rs"));
+
+// Convert a stack base to the corresponding register.
+fn stk_base(base: StackBase) -> RegUnit {
+    let ru = match base {
+        StackBase::SP => RU::rsp,
+        StackBase::FP => RU::rbp,
+        StackBase::Zone => unimplemented!(),
+    };
+    ru as RegUnit
+}
+
+// Mandatory prefix bytes for Mp* opcodes.
+const PREFIX: [u8; 3] = [0x66, 0xf3, 0xf2];
+
+// Second byte for three-byte opcodes for mm=0b10 and mm=0b11.
+const OP3_BYTE2: [u8; 2] = [0x38, 0x3a];
+
+// A REX prefix with no bits set: 0b0100WRXB.
+const BASE_REX: u8 = 0b0100_0000;
+
+// Create a single-register REX prefix, setting the B bit to bit 3 of the register.
+// This is used for instructions that encode a register in the low 3 bits of the opcode and for
+// instructions that use the ModR/M `reg` field for something else.
+fn rex1(reg_b: RegUnit) -> u8 {
+    let b = ((reg_b >> 3) & 1) as u8;
+    BASE_REX | b
+}
+
+// Create a dual-register REX prefix, setting:
+//
+// REX.B = bit 3 of r/m register, or SIB base register when a SIB byte is present.
+// REX.R = bit 3 of reg register.
+fn rex2(rm: RegUnit, reg: RegUnit) -> u8 {
+    let b = ((rm >> 3) & 1) as u8;
+    let r = ((reg >> 3) & 1) as u8;
+    BASE_REX | b | (r << 2)
+}
+
+// Create a three-register REX prefix, setting:
+//
+// REX.B = bit 3 of r/m register, or SIB base register when a SIB byte is present.
+// REX.R = bit 3 of reg register.
+// REX.X = bit 3 of SIB index register.
+fn rex3(rm: RegUnit, reg: RegUnit, index: RegUnit) -> u8 {
+    let b = ((rm >> 3) & 1) as u8;
+    let r = ((reg >> 3) & 1) as u8;
+    let x = ((index >> 3) & 1) as u8;
+    BASE_REX | b | (x << 1) | (r << 2)
+}
+
+// Emit a REX prefix.
+//
+// The R, X, and B bits are computed from registers using the functions above. The W bit is
+// extracted from `bits`.
+fn rex_prefix<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
+    debug_assert_eq!(rex & 0xf8, BASE_REX);
+    let w = ((bits >> 15) & 1) as u8;
+    sink.put1(rex | (w << 3));
+}
+
+// Emit a single-byte opcode with no REX prefix.
+fn put_op1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
+    debug_assert_eq!(bits & 0x8f00, 0, "Invalid encoding bits for Op1*");
+    debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Op1 encoding");
+    sink.put1(bits as u8);
+}
+
+// Emit a single-byte opcode with REX prefix.
+fn put_rexop1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
+    debug_assert_eq!(bits & 0x0f00, 0, "Invalid encoding bits for Op1*");
+    rex_prefix(bits, rex, sink);
+    sink.put1(bits as u8);
+}
+
+// Emit two-byte opcode: 0F XX
+fn put_op2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
+    debug_assert_eq!(bits & 0x8f00, 0x0400, "Invalid encoding bits for Op2*");
+    debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Op2 encoding");
+    sink.put1(0x0f);
+    sink.put1(bits as u8);
+}
+
+// Emit two-byte opcode: 0F XX with REX prefix.
+fn put_rexop2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
+    debug_assert_eq!(bits & 0x0f00, 0x0400, "Invalid encoding bits for RexOp2*");
+    rex_prefix(bits, rex, sink);
+    sink.put1(0x0f);
+    sink.put1(bits as u8);
+}
+
+// Emit single-byte opcode with mandatory prefix.
+fn put_mp1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
+    debug_assert_eq!(bits & 0x8c00, 0, "Invalid encoding bits for Mp1*");
+    let pp = (bits >> 8) & 3;
+    sink.put1(PREFIX[(pp - 1) as usize]);
+    debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Mp1 encoding");
+    sink.put1(bits as u8);
+}
+
+// Emit single-byte opcode with mandatory prefix and REX.
+fn put_rexmp1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
+    debug_assert_eq!(bits & 0x0c00, 0, "Invalid encoding bits for Mp1*");
+    let pp = (bits >> 8) & 3;
+    sink.put1(PREFIX[(pp - 1) as usize]);
+    rex_prefix(bits, rex, sink);
+    sink.put1(bits as u8);
+}
+
+// Emit two-byte opcode (0F XX) with mandatory prefix.
+fn put_mp2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
+    debug_assert_eq!(bits & 0x8c00, 0x0400, "Invalid encoding bits for Mp2*");
+    let pp = (bits >> 8) & 3;
+    sink.put1(PREFIX[(pp - 1) as usize]);
+    debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Mp2 encoding");
+    sink.put1(0x0f);
+    sink.put1(bits as u8);
+}
+
+// Emit two-byte opcode (0F XX) with mandatory prefix and REX.
+fn put_rexmp2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
+    debug_assert_eq!(bits & 0x0c00, 0x0400, "Invalid encoding bits for Mp2*");
+    let pp = (bits >> 8) & 3;
+    sink.put1(PREFIX[(pp - 1) as usize]);
+    rex_prefix(bits, rex, sink);
+    sink.put1(0x0f);
+    sink.put1(bits as u8);
+}
+
+// Emit three-byte opcode (0F 3[8A] XX) with mandatory prefix.
+fn put_mp3<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
+    debug_assert_eq!(bits & 0x8800, 0x0800, "Invalid encoding bits for Mp3*");
+    let pp = (bits >> 8) & 3;
+    sink.put1(PREFIX[(pp - 1) as usize]);
+    debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Mp3 encoding");
+    let mm = (bits >> 10) & 3;
+    sink.put1(0x0f);
+    sink.put1(OP3_BYTE2[(mm - 2) as usize]);
+    sink.put1(bits as u8);
+}
+
+// Emit three-byte opcode (0F 3[8A] XX) with mandatory prefix and REX
+fn put_rexmp3<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
+    debug_assert_eq!(bits & 0x0800, 0x0800, "Invalid encoding bits for Mp3*");
+    let pp = (bits >> 8) & 3;
+    sink.put1(PREFIX[(pp - 1) as usize]);
+    rex_prefix(bits, rex, sink);
+    let mm = (bits >> 10) & 3;
+    sink.put1(0x0f);
+    sink.put1(OP3_BYTE2[(mm - 2) as usize]);
+    sink.put1(bits as u8);
+}
+
+/// Emit a ModR/M byte for reg-reg operands.
+fn modrm_rr<CS: CodeSink + ?Sized>(rm: RegUnit, reg: RegUnit, sink: &mut CS) {
+    let reg = reg as u8 & 7;
+    let rm = rm as u8 & 7;
+    let mut b = 0b11000000;
+    b |= reg << 3;
+    b |= rm;
+    sink.put1(b);
+}
+
+/// Emit a ModR/M byte where the reg bits are part of the opcode.
+fn modrm_r_bits<CS: CodeSink + ?Sized>(rm: RegUnit, bits: u16, sink: &mut CS) {
+    let reg = (bits >> 12) as u8 & 7;
+    let rm = rm as u8 & 7;
+    let mut b = 0b11000000;
+    b |= reg << 3;
+    b |= rm;
+    sink.put1(b);
+}
+
+/// Emit a mode 00 ModR/M byte. This is a register-indirect addressing mode with no offset.
+/// Registers %rsp and %rbp are invalid for `rm`, %rsp indicates a SIB byte, and %rbp indicates an
+/// absolute immediate 32-bit address.
+fn modrm_rm<CS: CodeSink + ?Sized>(rm: RegUnit, reg: RegUnit, sink: &mut CS) {
+    let reg = reg as u8 & 7;
+    let rm = rm as u8 & 7;
+    let mut b = 0b00000000;
+    b |= reg << 3;
+    b |= rm;
+    sink.put1(b);
+}
+
+/// Emit a mode 00 Mod/RM byte, with a rip-relative displacement in 64-bit mode. Effective address
+/// is calculated by adding displacement to 64-bit rip of next instruction. See intel Sw dev manual
+/// section 2.2.1.6.
+fn modrm_riprel<CS: CodeSink + ?Sized>(reg: RegUnit, sink: &mut CS) {
+    modrm_rm(0b101, reg, sink)
+}
+
+/// Emit a mode 01 ModR/M byte. This is a register-indirect addressing mode with 8-bit
+/// displacement.
+/// Register %rsp is invalid for `rm`. It indicates the presence of a SIB byte.
+fn modrm_disp8<CS: CodeSink + ?Sized>(rm: RegUnit, reg: RegUnit, sink: &mut CS) {
+    let reg = reg as u8 & 7;
+    let rm = rm as u8 & 7;
+    let mut b = 0b01000000;
+    b |= reg << 3;
+    b |= rm;
+    sink.put1(b);
+}
+
+/// Emit a mode 10 ModR/M byte. This is a register-indirect addressing mode with 32-bit
+/// displacement.
+/// Register %rsp is invalid for `rm`. It indicates the presence of a SIB byte.
+fn modrm_disp32<CS: CodeSink + ?Sized>(rm: RegUnit, reg: RegUnit, sink: &mut CS) {
+    let reg = reg as u8 & 7;
+    let rm = rm as u8 & 7;
+    let mut b = 0b10000000;
+    b |= reg << 3;
+    b |= rm;
+    sink.put1(b);
+}
+
+/// Emit a mode 00 ModR/M with a 100 RM indicating a SIB byte is present.
+fn modrm_sib<CS: CodeSink + ?Sized>(reg: RegUnit, sink: &mut CS) {
+    modrm_rm(0b100, reg, sink);
+}
+
+/// Emit a mode 01 ModR/M with a 100 RM indicating a SIB byte and 8-bit
+/// displacement are present.
+fn modrm_sib_disp8<CS: CodeSink + ?Sized>(reg: RegUnit, sink: &mut CS) {
+    modrm_disp8(0b100, reg, sink);
+}
+
+/// Emit a mode 10 ModR/M with a 100 RM indicating a SIB byte and 32-bit
+/// displacement are present.
+fn modrm_sib_disp32<CS: CodeSink + ?Sized>(reg: RegUnit, sink: &mut CS) {
+    modrm_disp32(0b100, reg, sink);
+}
+
+/// Emit a SIB byte with a base register and no scale+index.
+fn sib_noindex<CS: CodeSink + ?Sized>(base: RegUnit, sink: &mut CS) {
+    let base = base as u8 & 7;
+    // SIB        SS_III_BBB.
+    let mut b = 0b00_100_000;
+    b |= base;
+    sink.put1(b);
+}
+
+/// Emit a SIB byte with a scale, base, and index.
+fn sib<CS: CodeSink + ?Sized>(scale: u8, index: RegUnit, base: RegUnit, sink: &mut CS) {
+    // SIB        SS_III_BBB.
+    debug_assert_eq!(scale & !0x03, 0, "Scale out of range");
+    let scale = scale & 3;
+    let index = index as u8 & 7;
+    let base = base as u8 & 7;
+    let b: u8 = (scale << 6) | (index << 3) | base;
+    sink.put1(b);
+}
+
+/// Get the low 4 bits of an opcode for an integer condition code.
+///
+/// Add this offset to a base opcode for:
+///
+/// ---- 0x70: Short conditional branch.
+/// 0x0f 0x80: Long conditional branch.
+/// 0x0f 0x90: SetCC.
+///
+fn icc2opc(cond: IntCC) -> u16 {
+    use crate::ir::condcodes::IntCC::*;
+    match cond {
+        // 0x0 = Overflow.
+        // 0x1 = !Overflow.
+        UnsignedLessThan => 0x2,
+        UnsignedGreaterThanOrEqual => 0x3,
+        Equal => 0x4,
+        NotEqual => 0x5,
+        UnsignedLessThanOrEqual => 0x6,
+        UnsignedGreaterThan => 0x7,
+        // 0x8 = Sign.
+        // 0x9 = !Sign.
+        // 0xa = Parity even.
+        // 0xb = Parity odd.
+        SignedLessThan => 0xc,
+        SignedGreaterThanOrEqual => 0xd,
+        SignedLessThanOrEqual => 0xe,
+        SignedGreaterThan => 0xf,
+    }
+}
+
+/// Get the low 4 bits of an opcode for a floating point condition code.
+///
+/// The ucomiss/ucomisd instructions set the FLAGS bits CF/PF/CF like this:
+///
+///    ZPC OSA
+/// UN 111 000
+/// GT 000 000
+/// LT 001 000
+/// EQ 100 000
+///
+/// Not all floating point condition codes are supported.
+fn fcc2opc(cond: FloatCC) -> u16 {
+    use crate::ir::condcodes::FloatCC::*;
+    match cond {
+        Ordered                    => 0xb, // EQ|LT|GT => *np (P=0)
+        Unordered                  => 0xa, // UN       => *p  (P=1)
+        OrderedNotEqual            => 0x5, // LT|GT    => *ne (Z=0),
+        UnorderedOrEqual           => 0x4, // UN|EQ    => *e  (Z=1)
+        GreaterThan                => 0x7, // GT       => *a  (C=0&Z=0)
+        GreaterThanOrEqual         => 0x3, // GT|EQ    => *ae (C=0)
+        UnorderedOrLessThan        => 0x2, // UN|LT    => *b  (C=1)
+        UnorderedOrLessThanOrEqual => 0x6, // UN|LT|EQ => *be (Z=1|C=1)
+        Equal |                            // EQ
+        NotEqual |                         // UN|LT|GT
+        LessThan |                         // LT
+        LessThanOrEqual |                  // LT|EQ
+        UnorderedOrGreaterThan |           // UN|GT
+        UnorderedOrGreaterThanOrEqual      // UN|GT|EQ
+        => panic!("{} not supported", cond),
+    }
+}
+
+/// Emit a single-byte branch displacement to `destination`.
+fn disp1<CS: CodeSink + ?Sized>(destination: Ebb, func: &Function, sink: &mut CS) {
+    let delta = func.offsets[destination].wrapping_sub(sink.offset() + 1);
+    sink.put1(delta as u8);
+}
+
+/// Emit a four-byte branch displacement to `destination`.
+fn disp4<CS: CodeSink + ?Sized>(destination: Ebb, func: &Function, sink: &mut CS) {
+    let delta = func.offsets[destination].wrapping_sub(sink.offset() + 4);
+    sink.put4(delta);
+}
+
+/// Emit a four-byte displacement to jump table `jt`.
+fn jt_disp4<CS: CodeSink + ?Sized>(jt: JumpTable, func: &Function, sink: &mut CS) {
+    let delta = func.jt_offsets[jt].wrapping_sub(sink.offset() + 4);
+    sink.put4(delta);
+}
--- a/cranelift/codegen/src/isa/x86/enc_tables.rs
+++ b/cranelift/codegen/src/isa/x86/enc_tables.rs
@@ -0,0 +1,778 @@
+//! Encoding tables for x86 ISAs.
+
+use super::registers::*;
+use crate::bitset::BitSet;
+use crate::cursor::{Cursor, FuncCursor};
+use crate::flowgraph::ControlFlowGraph;
+use crate::ir::condcodes::IntCC;
+use crate::ir::{self, Function, Inst, InstBuilder};
+use crate::isa;
+use crate::isa::constraints::*;
+use crate::isa::enc_tables::*;
+use crate::isa::encoding::base_size;
+use crate::isa::encoding::RecipeSizing;
+use crate::isa::RegUnit;
+use crate::regalloc::RegDiversions;
+
+include!(concat!(env!("OUT_DIR"), "/encoding-x86.rs"));
+include!(concat!(env!("OUT_DIR"), "/legalize-x86.rs"));
+
+pub fn needs_sib_byte(reg: RegUnit) -> bool {
+    reg == RU::r12 as RegUnit || reg == RU::rsp as RegUnit
+}
+pub fn needs_offset(reg: RegUnit) -> bool {
+    reg == RU::r13 as RegUnit || reg == RU::rbp as RegUnit
+}
+pub fn needs_sib_byte_or_offset(reg: RegUnit) -> bool {
+    needs_sib_byte(reg) || needs_offset(reg)
+}
+
+fn additional_size_if(
+    op_index: usize,
+    inst: Inst,
+    divert: &RegDiversions,
+    func: &Function,
+    condition_func: fn(RegUnit) -> bool,
+) -> u8 {
+    let addr_reg = divert.reg(func.dfg.inst_args(inst)[op_index], &func.locations);
+    if condition_func(addr_reg) {
+        1
+    } else {
+        0
+    }
+}
+
+fn size_plus_maybe_offset_for_in_reg_0(
+    sizing: &RecipeSizing,
+    inst: Inst,
+    divert: &RegDiversions,
+    func: &Function,
+) -> u8 {
+    sizing.base_size + additional_size_if(0, inst, divert, func, needs_offset)
+}
+fn size_plus_maybe_offset_for_in_reg_1(
+    sizing: &RecipeSizing,
+    inst: Inst,
+    divert: &RegDiversions,
+    func: &Function,
+) -> u8 {
+    sizing.base_size + additional_size_if(1, inst, divert, func, needs_offset)
+}
+fn size_plus_maybe_sib_for_in_reg_0(
+    sizing: &RecipeSizing,
+    inst: Inst,
+    divert: &RegDiversions,
+    func: &Function,
+) -> u8 {
+    sizing.base_size + additional_size_if(0, inst, divert, func, needs_sib_byte)
+}
+fn size_plus_maybe_sib_for_in_reg_1(
+    sizing: &RecipeSizing,
+    inst: Inst,
+    divert: &RegDiversions,
+    func: &Function,
+) -> u8 {
+    sizing.base_size + additional_size_if(1, inst, divert, func, needs_sib_byte)
+}
+fn size_plus_maybe_sib_or_offset_for_in_reg_0(
+    sizing: &RecipeSizing,
+    inst: Inst,
+    divert: &RegDiversions,
+    func: &Function,
+) -> u8 {
+    sizing.base_size + additional_size_if(0, inst, divert, func, needs_sib_byte_or_offset)
+}
+fn size_plus_maybe_sib_or_offset_for_in_reg_1(
+    sizing: &RecipeSizing,
+    inst: Inst,
+    divert: &RegDiversions,
+    func: &Function,
+) -> u8 {
+    sizing.base_size + additional_size_if(1, inst, divert, func, needs_sib_byte_or_offset)
+}
+
+/// Expand the `sdiv` and `srem` instructions using `x86_sdivmodx`.
+fn expand_sdivrem(
+    inst: ir::Inst,
+    func: &mut ir::Function,
+    cfg: &mut ControlFlowGraph,
+    isa: &isa::TargetIsa,
+) {
+    let (x, y, is_srem) = match func.dfg[inst] {
+        ir::InstructionData::Binary {
+            opcode: ir::Opcode::Sdiv,
+            args,
+        } => (args[0], args[1], false),
+        ir::InstructionData::Binary {
+            opcode: ir::Opcode::Srem,
+            args,
+        } => (args[0], args[1], true),
+        _ => panic!("Need sdiv/srem: {}", func.dfg.display_inst(inst, None)),
+    };
+    let avoid_div_traps = isa.flags().avoid_div_traps();
+    let old_ebb = func.layout.pp_ebb(inst);
+    let result = func.dfg.first_result(inst);
+    let ty = func.dfg.value_type(result);
+
+    let mut pos = FuncCursor::new(func).at_inst(inst);
+    pos.use_srcloc(inst);
+    pos.func.dfg.clear_results(inst);
+
+    // If we can tolerate native division traps, sdiv doesn't need branching.
+    if !avoid_div_traps && !is_srem {
+        let xhi = pos.ins().sshr_imm(x, i64::from(ty.lane_bits()) - 1);
+        pos.ins().with_result(result).x86_sdivmodx(x, xhi, y);
+        pos.remove_inst();
+        return;
+    }
+
+    // EBB handling the -1 divisor case.
+    let minus_one = pos.func.dfg.make_ebb();
+
+    // Final EBB with one argument representing the final result value.
+    let done = pos.func.dfg.make_ebb();
+
+    // Move the `inst` result value onto the `done` EBB.
+    pos.func.dfg.attach_ebb_param(done, result);
+
+    // Start by checking for a -1 divisor which needs to be handled specially.
+    let is_m1 = pos.ins().ifcmp_imm(y, -1);
+    pos.ins().brif(IntCC::Equal, is_m1, minus_one, &[]);
+
+    // Put in an explicit division-by-zero trap if the environment requires it.
+    if avoid_div_traps {
+        pos.ins().trapz(y, ir::TrapCode::IntegerDivisionByZero);
+    }
+
+    // Now it is safe to execute the `x86_sdivmodx` instruction which will still trap on division
+    // by zero.
+    let xhi = pos.ins().sshr_imm(x, i64::from(ty.lane_bits()) - 1);
+    let (quot, rem) = pos.ins().x86_sdivmodx(x, xhi, y);
+    let divres = if is_srem { rem } else { quot };
+    pos.ins().jump(done, &[divres]);
+
+    // Now deal with the -1 divisor case.
+    pos.insert_ebb(minus_one);
+    let m1_result = if is_srem {
+        // x % -1 = 0.
+        pos.ins().iconst(ty, 0)
+    } else {
+        // Explicitly check for overflow: Trap when x == INT_MIN.
+        debug_assert!(avoid_div_traps, "Native trapping divide handled above");
+        let f = pos.ins().ifcmp_imm(x, -1 << (ty.lane_bits() - 1));
+        pos.ins()
+            .trapif(IntCC::Equal, f, ir::TrapCode::IntegerOverflow);
+        // x / -1 = -x.
+        pos.ins().irsub_imm(x, 0)
+    };
+
+    // Recycle the original instruction as a jump.
+    pos.func.dfg.replace(inst).jump(done, &[m1_result]);
+
+    // Finally insert a label for the completion.
+    pos.next_inst();
+    pos.insert_ebb(done);
+
+    cfg.recompute_ebb(pos.func, old_ebb);
+    cfg.recompute_ebb(pos.func, minus_one);
+    cfg.recompute_ebb(pos.func, done);
+}
+
+/// Expand the `udiv` and `urem` instructions using `x86_udivmodx`.
+fn expand_udivrem(
+    inst: ir::Inst,
+    func: &mut ir::Function,
+    _cfg: &mut ControlFlowGraph,
+    isa: &isa::TargetIsa,
+) {
+    let (x, y, is_urem) = match func.dfg[inst] {
+        ir::InstructionData::Binary {
+            opcode: ir::Opcode::Udiv,
+            args,
+        } => (args[0], args[1], false),
+        ir::InstructionData::Binary {
+            opcode: ir::Opcode::Urem,
+            args,
+        } => (args[0], args[1], true),
+        _ => panic!("Need udiv/urem: {}", func.dfg.display_inst(inst, None)),
+    };
+    let avoid_div_traps = isa.flags().avoid_div_traps();
+    let result = func.dfg.first_result(inst);
+    let ty = func.dfg.value_type(result);
+
+    let mut pos = FuncCursor::new(func).at_inst(inst);
+    pos.use_srcloc(inst);
+    pos.func.dfg.clear_results(inst);
+
+    // Put in an explicit division-by-zero trap if the environment requires it.
+    if avoid_div_traps {
+        pos.ins().trapz(y, ir::TrapCode::IntegerDivisionByZero);
+    }
+
+    // Now it is safe to execute the `x86_udivmodx` instruction.
+    let xhi = pos.ins().iconst(ty, 0);
+    let reuse = if is_urem {
+        [None, Some(result)]
+    } else {
+        [Some(result), None]
+    };
+    pos.ins().with_results(reuse).x86_udivmodx(x, xhi, y);
+    pos.remove_inst();
+}
+
+/// Expand the `fmin` and `fmax` instructions using the x86 `x86_fmin` and `x86_fmax`
+/// instructions.
+fn expand_minmax(
+    inst: ir::Inst,
+    func: &mut ir::Function,
+    cfg: &mut ControlFlowGraph,
+    _isa: &isa::TargetIsa,
+) {
+    use crate::ir::condcodes::FloatCC;
+
+    let (x, y, x86_opc, bitwise_opc) = match func.dfg[inst] {
+        ir::InstructionData::Binary {
+            opcode: ir::Opcode::Fmin,
+            args,
+        } => (args[0], args[1], ir::Opcode::X86Fmin, ir::Opcode::Bor),
+        ir::InstructionData::Binary {
+            opcode: ir::Opcode::Fmax,
+            args,
+        } => (args[0], args[1], ir::Opcode::X86Fmax, ir::Opcode::Band),
+        _ => panic!("Expected fmin/fmax: {}", func.dfg.display_inst(inst, None)),
+    };
+    let old_ebb = func.layout.pp_ebb(inst);
+
+    // We need to handle the following conditions, depending on how x and y compare:
+    //
+    // 1. LT or GT: The native `x86_opc` min/max instruction does what we need.
+    // 2. EQ: We need to use `bitwise_opc` to make sure that
+    //    fmin(0.0, -0.0) -> -0.0 and fmax(0.0, -0.0) -> 0.0.
+    // 3. UN: We need to produce a quiet NaN that is canonical if the inputs are canonical.
+
+    // EBB handling case 3) where one operand is NaN.
+    let uno_ebb = func.dfg.make_ebb();
+
+    // EBB that handles the unordered or equal cases 2) and 3).
+    let ueq_ebb = func.dfg.make_ebb();
+
+    // Final EBB with one argument representing the final result value.
+    let done = func.dfg.make_ebb();
+
+    // The basic blocks are laid out to minimize branching for the common cases:
+    //
+    // 1) One branch not taken, one jump.
+    // 2) One branch taken.
+    // 3) Two branches taken, one jump.
+
+    // Move the `inst` result value onto the `done` EBB.
+    let result = func.dfg.first_result(inst);
+    let ty = func.dfg.value_type(result);
+    func.dfg.clear_results(inst);
+    func.dfg.attach_ebb_param(done, result);
+
+    // Test for case 1) ordered and not equal.
+    let mut pos = FuncCursor::new(func).at_inst(inst);
+    pos.use_srcloc(inst);
+    let cmp_ueq = pos.ins().fcmp(FloatCC::UnorderedOrEqual, x, y);
+    pos.ins().brnz(cmp_ueq, ueq_ebb, &[]);
+
+    // Handle the common ordered, not equal (LT|GT) case.
+    let one_inst = pos.ins().Binary(x86_opc, ty, x, y).0;
+    let one_result = pos.func.dfg.first_result(one_inst);
+    pos.ins().jump(done, &[one_result]);
+
+    // Case 3) Unordered.
+    // We know that at least one operand is a NaN that needs to be propagated. We simply use an
+    // `fadd` instruction which has the same NaN propagation semantics.
+    pos.insert_ebb(uno_ebb);
+    let uno_result = pos.ins().fadd(x, y);
+    pos.ins().jump(done, &[uno_result]);
+
+    // Case 2) or 3).
+    pos.insert_ebb(ueq_ebb);
+    // Test for case 3) (UN) one value is NaN.
+    // TODO: When we get support for flag values, we can reuse the above comparison.
+    let cmp_uno = pos.ins().fcmp(FloatCC::Unordered, x, y);
+    pos.ins().brnz(cmp_uno, uno_ebb, &[]);
+
+    // We are now in case 2) where x and y compare EQ.
+    // We need a bitwise operation to get the sign right.
+    let bw_inst = pos.ins().Binary(bitwise_opc, ty, x, y).0;
+    let bw_result = pos.func.dfg.first_result(bw_inst);
+    // This should become a fall-through for this second most common case.
+    // Recycle the original instruction as a jump.
+    pos.func.dfg.replace(inst).jump(done, &[bw_result]);
+
+    // Finally insert a label for the completion.
+    pos.next_inst();
+    pos.insert_ebb(done);
+
+    cfg.recompute_ebb(pos.func, old_ebb);
+    cfg.recompute_ebb(pos.func, ueq_ebb);
+    cfg.recompute_ebb(pos.func, uno_ebb);
+    cfg.recompute_ebb(pos.func, done);
+}
+
+/// x86 has no unsigned-to-float conversions. We handle the easy case of zero-extending i32 to
+/// i64 with a pattern, the rest needs more code.
+fn expand_fcvt_from_uint(
+    inst: ir::Inst,
+    func: &mut ir::Function,
+    cfg: &mut ControlFlowGraph,
+    _isa: &isa::TargetIsa,
+) {
+    use crate::ir::condcodes::IntCC;
+
+    let x;
+    match func.dfg[inst] {
+        ir::InstructionData::Unary {
+            opcode: ir::Opcode::FcvtFromUint,
+            arg,
+        } => x = arg,
+        _ => panic!("Need fcvt_from_uint: {}", func.dfg.display_inst(inst, None)),
+    }
+    let xty = func.dfg.value_type(x);
+    let result = func.dfg.first_result(inst);
+    let ty = func.dfg.value_type(result);
+    let mut pos = FuncCursor::new(func).at_inst(inst);
+    pos.use_srcloc(inst);
+
+    // Conversion from unsigned 32-bit is easy on x86-64.
+    // TODO: This should be guarded by an ISA check.
+    if xty == ir::types::I32 {
+        let wide = pos.ins().uextend(ir::types::I64, x);
+        pos.func.dfg.replace(inst).fcvt_from_sint(ty, wide);
+        return;
+    }
+
+    let old_ebb = pos.func.layout.pp_ebb(inst);
+
+    // EBB handling the case where x < 0.
+    let neg_ebb = pos.func.dfg.make_ebb();
+
+    // Final EBB with one argument representing the final result value.
+    let done = pos.func.dfg.make_ebb();
+
+    // Move the `inst` result value onto the `done` EBB.
+    pos.func.dfg.clear_results(inst);
+    pos.func.dfg.attach_ebb_param(done, result);
+
+    // If x as a signed int is not negative, we can use the existing `fcvt_from_sint` instruction.
+    let is_neg = pos.ins().icmp_imm(IntCC::SignedLessThan, x, 0);
+    pos.ins().brnz(is_neg, neg_ebb, &[]);
+
+    // Easy case: just use a signed conversion.
+    let posres = pos.ins().fcvt_from_sint(ty, x);
+    pos.ins().jump(done, &[posres]);
+
+    // Now handle the negative case.
+    pos.insert_ebb(neg_ebb);
+
+    // Divide x by two to get it in range for the signed conversion, keep the LSB, and scale it
+    // back up on the FP side.
+    let ihalf = pos.ins().ushr_imm(x, 1);
+    let lsb = pos.ins().band_imm(x, 1);
+    let ifinal = pos.ins().bor(ihalf, lsb);
+    let fhalf = pos.ins().fcvt_from_sint(ty, ifinal);
+    let negres = pos.ins().fadd(fhalf, fhalf);
+
+    // Recycle the original instruction as a jump.
+    pos.func.dfg.replace(inst).jump(done, &[negres]);
+
+    // Finally insert a label for the completion.
+    pos.next_inst();
+    pos.insert_ebb(done);
+
+    cfg.recompute_ebb(pos.func, old_ebb);
+    cfg.recompute_ebb(pos.func, neg_ebb);
+    cfg.recompute_ebb(pos.func, done);
+}
+
+fn expand_fcvt_to_sint(
+    inst: ir::Inst,
+    func: &mut ir::Function,
+    cfg: &mut ControlFlowGraph,
+    _isa: &isa::TargetIsa,
+) {
+    use crate::ir::condcodes::{FloatCC, IntCC};
+    use crate::ir::immediates::{Ieee32, Ieee64};
+
+    let x = match func.dfg[inst] {
+        ir::InstructionData::Unary {
+            opcode: ir::Opcode::FcvtToSint,
+            arg,
+        } => arg,
+        _ => panic!("Need fcvt_to_sint: {}", func.dfg.display_inst(inst, None)),
+    };
+    let old_ebb = func.layout.pp_ebb(inst);
+    let xty = func.dfg.value_type(x);
+    let result = func.dfg.first_result(inst);
+    let ty = func.dfg.value_type(result);
+
+    // Final EBB after the bad value checks.
+    let done = func.dfg.make_ebb();
+
+    // The `x86_cvtt2si` performs the desired conversion, but it doesn't trap on NaN or overflow.
+    // It produces an INT_MIN result instead.
+    func.dfg.replace(inst).x86_cvtt2si(ty, x);
+
+    let mut pos = FuncCursor::new(func).after_inst(inst);
+    pos.use_srcloc(inst);
+
+    let is_done = pos
+        .ins()
+        .icmp_imm(IntCC::NotEqual, result, 1 << (ty.lane_bits() - 1));
+    pos.ins().brnz(is_done, done, &[]);
+
+    // We now have the following possibilities:
+    //
+    // 1. INT_MIN was actually the correct conversion result.
+    // 2. The input was NaN -> trap bad_toint
+    // 3. The input was out of range -> trap int_ovf
+    //
+
+    // Check for NaN.
+    let is_nan = pos.ins().fcmp(FloatCC::Unordered, x, x);
+    pos.ins()
+        .trapnz(is_nan, ir::TrapCode::BadConversionToInteger);
+
+    // Check for case 1: INT_MIN is the correct result.
+    // Determine the smallest floating point number that would convert to INT_MIN.
+    let mut overflow_cc = FloatCC::LessThan;
+    let output_bits = ty.lane_bits();
+    let flimit = match xty {
+        ir::types::F32 =>
+        // An f32 can represent `i16::min_value() - 1` exactly with precision to spare, so
+        // there are values less than -2^(N-1) that convert correctly to INT_MIN.
+        {
+            pos.ins().f32const(if output_bits < 32 {
+                overflow_cc = FloatCC::LessThanOrEqual;
+                Ieee32::fcvt_to_sint_negative_overflow(output_bits)
+            } else {
+                Ieee32::pow2(output_bits - 1).neg()
+            })
+        }
+        ir::types::F64 =>
+        // An f64 can represent `i32::min_value() - 1` exactly with precision to spare, so
+        // there are values less than -2^(N-1) that convert correctly to INT_MIN.
+        {
+            pos.ins().f64const(if output_bits < 64 {
+                overflow_cc = FloatCC::LessThanOrEqual;
+                Ieee64::fcvt_to_sint_negative_overflow(output_bits)
+            } else {
+                Ieee64::pow2(output_bits - 1).neg()
+            })
+        }
+        _ => panic!("Can't convert {}", xty),
+    };
+    let overflow = pos.ins().fcmp(overflow_cc, x, flimit);
+    pos.ins().trapnz(overflow, ir::TrapCode::IntegerOverflow);
+
+    // Finally, we could have a positive value that is too large.
+    let fzero = match xty {
+        ir::types::F32 => pos.ins().f32const(Ieee32::with_bits(0)),
+        ir::types::F64 => pos.ins().f64const(Ieee64::with_bits(0)),
+        _ => panic!("Can't convert {}", xty),
+    };
+    let overflow = pos.ins().fcmp(FloatCC::GreaterThanOrEqual, x, fzero);
+    pos.ins().trapnz(overflow, ir::TrapCode::IntegerOverflow);
+
+    pos.ins().jump(done, &[]);
+    pos.insert_ebb(done);
+
+    cfg.recompute_ebb(pos.func, old_ebb);
+    cfg.recompute_ebb(pos.func, done);
+}
+
+fn expand_fcvt_to_sint_sat(
+    inst: ir::Inst,
+    func: &mut ir::Function,
+    cfg: &mut ControlFlowGraph,
+    _isa: &isa::TargetIsa,
+) {
+    use crate::ir::condcodes::{FloatCC, IntCC};
+    use crate::ir::immediates::{Ieee32, Ieee64};
+
+    let x = match func.dfg[inst] {
+        ir::InstructionData::Unary {
+            opcode: ir::Opcode::FcvtToSintSat,
+            arg,
+        } => arg,
+        _ => panic!(
+            "Need fcvt_to_sint_sat: {}",
+            func.dfg.display_inst(inst, None)
+        ),
+    };
+
+    let old_ebb = func.layout.pp_ebb(inst);
+    let xty = func.dfg.value_type(x);
+    let result = func.dfg.first_result(inst);
+    let ty = func.dfg.value_type(result);
+
+    // Final EBB after the bad value checks.
+    let done_ebb = func.dfg.make_ebb();
+    func.dfg.clear_results(inst);
+    func.dfg.attach_ebb_param(done_ebb, result);
+
+    let mut pos = FuncCursor::new(func).at_inst(inst);
+    pos.use_srcloc(inst);
+
+    // The `x86_cvtt2si` performs the desired conversion, but it doesn't trap on NaN or
+    // overflow. It produces an INT_MIN result instead.
+    let cvtt2si = pos.ins().x86_cvtt2si(ty, x);
+
+    let is_done = pos
+        .ins()
+        .icmp_imm(IntCC::NotEqual, cvtt2si, 1 << (ty.lane_bits() - 1));
+    pos.ins().brnz(is_done, done_ebb, &[cvtt2si]);
+
+    // We now have the following possibilities:
+    //
+    // 1. INT_MIN was actually the correct conversion result.
+    // 2. The input was NaN -> replace the result value with 0.
+    // 3. The input was out of range -> saturate the result to the min/max value.
+
+    // Check for NaN, which is truncated to 0.
+    let zero = pos.ins().iconst(ty, 0);
+    let is_nan = pos.ins().fcmp(FloatCC::Unordered, x, x);
+    pos.ins().brnz(is_nan, done_ebb, &[zero]);
+
+    // Check for case 1: INT_MIN is the correct result.
+    // Determine the smallest floating point number that would convert to INT_MIN.
+    let mut overflow_cc = FloatCC::LessThan;
+    let output_bits = ty.lane_bits();
+    let flimit = match xty {
+        ir::types::F32 =>
+        // An f32 can represent `i16::min_value() - 1` exactly with precision to spare, so
+        // there are values less than -2^(N-1) that convert correctly to INT_MIN.
+        {
+            pos.ins().f32const(if output_bits < 32 {
+                overflow_cc = FloatCC::LessThanOrEqual;
+                Ieee32::fcvt_to_sint_negative_overflow(output_bits)
+            } else {
+                Ieee32::pow2(output_bits - 1).neg()
+            })
+        }
+        ir::types::F64 =>
+        // An f64 can represent `i32::min_value() - 1` exactly with precision to spare, so
+        // there are values less than -2^(N-1) that convert correctly to INT_MIN.
+        {
+            pos.ins().f64const(if output_bits < 64 {
+                overflow_cc = FloatCC::LessThanOrEqual;
+                Ieee64::fcvt_to_sint_negative_overflow(output_bits)
+            } else {
+                Ieee64::pow2(output_bits - 1).neg()
+            })
+        }
+        _ => panic!("Can't convert {}", xty),
+    };
+
+    let overflow = pos.ins().fcmp(overflow_cc, x, flimit);
+    let min_imm = match ty {
+        ir::types::I32 => i32::min_value() as i64,
+        ir::types::I64 => i64::min_value(),
+        _ => panic!("Don't know the min value for {}", ty),
+    };
+    let min_value = pos.ins().iconst(ty, min_imm);
+    pos.ins().brnz(overflow, done_ebb, &[min_value]);
+
+    // Finally, we could have a positive value that is too large.
+    let fzero = match xty {
+        ir::types::F32 => pos.ins().f32const(Ieee32::with_bits(0)),
+        ir::types::F64 => pos.ins().f64const(Ieee64::with_bits(0)),
+        _ => panic!("Can't convert {}", xty),
+    };
+
+    let max_imm = match ty {
+        ir::types::I32 => i32::max_value() as i64,
+        ir::types::I64 => i64::max_value(),
+        _ => panic!("Don't know the max value for {}", ty),
+    };
+    let max_value = pos.ins().iconst(ty, max_imm);
+
+    let overflow = pos.ins().fcmp(FloatCC::GreaterThanOrEqual, x, fzero);
+    pos.ins().brnz(overflow, done_ebb, &[max_value]);
+
+    // Recycle the original instruction.
+    pos.func.dfg.replace(inst).jump(done_ebb, &[cvtt2si]);
+
+    // Finally insert a label for the completion.
+    pos.next_inst();
+    pos.insert_ebb(done_ebb);
+
+    cfg.recompute_ebb(pos.func, old_ebb);
+    cfg.recompute_ebb(pos.func, done_ebb);
+}
+
+fn expand_fcvt_to_uint(
+    inst: ir::Inst,
+    func: &mut ir::Function,
+    cfg: &mut ControlFlowGraph,
+    _isa: &isa::TargetIsa,
+) {
+    use crate::ir::condcodes::{FloatCC, IntCC};
+    use crate::ir::immediates::{Ieee32, Ieee64};
+
+    let x = match func.dfg[inst] {
+        ir::InstructionData::Unary {
+            opcode: ir::Opcode::FcvtToUint,
+            arg,
+        } => arg,
+        _ => panic!("Need fcvt_to_uint: {}", func.dfg.display_inst(inst, None)),
+    };
+
+    let old_ebb = func.layout.pp_ebb(inst);
+    let xty = func.dfg.value_type(x);
+    let result = func.dfg.first_result(inst);
+    let ty = func.dfg.value_type(result);
+
+    // EBB handling numbers >= 2^(N-1).
+    let large = func.dfg.make_ebb();
+
+    // Final EBB after the bad value checks.
+    let done = func.dfg.make_ebb();
+
+    // Move the `inst` result value onto the `done` EBB.
+    func.dfg.clear_results(inst);
+    func.dfg.attach_ebb_param(done, result);
+
+    let mut pos = FuncCursor::new(func).at_inst(inst);
+    pos.use_srcloc(inst);
+
+    // Start by materializing the floating point constant 2^(N-1) where N is the number of bits in
+    // the destination integer type.
+    let pow2nm1 = match xty {
+        ir::types::F32 => pos.ins().f32const(Ieee32::pow2(ty.lane_bits() - 1)),
+        ir::types::F64 => pos.ins().f64const(Ieee64::pow2(ty.lane_bits() - 1)),
+        _ => panic!("Can't convert {}", xty),
+    };
+    let is_large = pos.ins().ffcmp(x, pow2nm1);
+    pos.ins()
+        .brff(FloatCC::GreaterThanOrEqual, is_large, large, &[]);
+
+    // We need to generate a specific trap code when `x` is NaN, so reuse the flags from the
+    // previous comparison.
+    pos.ins().trapff(
+        FloatCC::Unordered,
+        is_large,
+        ir::TrapCode::BadConversionToInteger,
+    );
+
+    // Now we know that x < 2^(N-1) and not NaN.
+    let sres = pos.ins().x86_cvtt2si(ty, x);
+    let is_neg = pos.ins().ifcmp_imm(sres, 0);
+    pos.ins()
+        .brif(IntCC::SignedGreaterThanOrEqual, is_neg, done, &[sres]);
+    pos.ins().trap(ir::TrapCode::IntegerOverflow);
+
+    // Handle the case where x >= 2^(N-1) and not NaN.
+    pos.insert_ebb(large);
+    let adjx = pos.ins().fsub(x, pow2nm1);
+    let lres = pos.ins().x86_cvtt2si(ty, adjx);
+    let is_neg = pos.ins().ifcmp_imm(lres, 0);
+    pos.ins()
+        .trapif(IntCC::SignedLessThan, is_neg, ir::TrapCode::IntegerOverflow);
+    let lfinal = pos.ins().iadd_imm(lres, 1 << (ty.lane_bits() - 1));
+
+    // Recycle the original instruction as a jump.
+    pos.func.dfg.replace(inst).jump(done, &[lfinal]);
+
+    // Finally insert a label for the completion.
+    pos.next_inst();
+    pos.insert_ebb(done);
+
+    cfg.recompute_ebb(pos.func, old_ebb);
+    cfg.recompute_ebb(pos.func, large);
+    cfg.recompute_ebb(pos.func, done);
+}
+
+fn expand_fcvt_to_uint_sat(
+    inst: ir::Inst,
+    func: &mut ir::Function,
+    cfg: &mut ControlFlowGraph,
+    _isa: &isa::TargetIsa,
+) {
+    use crate::ir::condcodes::{FloatCC, IntCC};
+    use crate::ir::immediates::{Ieee32, Ieee64};
+
+    let x = match func.dfg[inst] {
+        ir::InstructionData::Unary {
+            opcode: ir::Opcode::FcvtToUintSat,
+            arg,
+        } => arg,
+        _ => panic!(
+            "Need fcvt_to_uint_sat: {}",
+            func.dfg.display_inst(inst, None)
+        ),
+    };
+
+    let old_ebb = func.layout.pp_ebb(inst);
+    let xty = func.dfg.value_type(x);
+    let result = func.dfg.first_result(inst);
+    let ty = func.dfg.value_type(result);
+
+    // EBB handling numbers >= 2^(N-1).
+    let large = func.dfg.make_ebb();
+
+    // Final EBB after the bad value checks.
+    let done = func.dfg.make_ebb();
+
+    // Move the `inst` result value onto the `done` EBB.
+    func.dfg.clear_results(inst);
+    func.dfg.attach_ebb_param(done, result);
+
+    let mut pos = FuncCursor::new(func).at_inst(inst);
+    pos.use_srcloc(inst);
+
+    // Start by materializing the floating point constant 2^(N-1) where N is the number of bits in
+    // the destination integer type.
+    let pow2nm1 = match xty {
+        ir::types::F32 => pos.ins().f32const(Ieee32::pow2(ty.lane_bits() - 1)),
+        ir::types::F64 => pos.ins().f64const(Ieee64::pow2(ty.lane_bits() - 1)),
+        _ => panic!("Can't convert {}", xty),
+    };
+    let zero = pos.ins().iconst(ty, 0);
+    let is_large = pos.ins().ffcmp(x, pow2nm1);
+    pos.ins()
+        .brff(FloatCC::GreaterThanOrEqual, is_large, large, &[]);
+
+    // We need to generate zero when `x` is NaN, so reuse the flags from the previous comparison.
+    pos.ins().brff(FloatCC::Unordered, is_large, done, &[zero]);
+
+    // Now we know that x < 2^(N-1) and not NaN. If the result of the cvtt2si is positive, we're
+    // done; otherwise saturate to the minimum unsigned value, that is 0.
+    let sres = pos.ins().x86_cvtt2si(ty, x);
+    let is_neg = pos.ins().ifcmp_imm(sres, 0);
+    pos.ins()
+        .brif(IntCC::SignedGreaterThanOrEqual, is_neg, done, &[sres]);
+    pos.ins().jump(done, &[zero]);
+
+    // Handle the case where x >= 2^(N-1) and not NaN.
+    pos.insert_ebb(large);
+    let adjx = pos.ins().fsub(x, pow2nm1);
+    let lres = pos.ins().x86_cvtt2si(ty, adjx);
+    let max_value = pos.ins().iconst(
+        ty,
+        match ty {
+            ir::types::I32 => u32::max_value() as i64,
+            ir::types::I64 => u64::max_value() as i64,
+            _ => panic!("Can't convert {}", ty),
+        },
+    );
+    let is_neg = pos.ins().ifcmp_imm(lres, 0);
+    pos.ins()
+        .brif(IntCC::SignedLessThan, is_neg, done, &[max_value]);
+    let lfinal = pos.ins().iadd_imm(lres, 1 << (ty.lane_bits() - 1));
+
+    // Recycle the original instruction as a jump.
+    pos.func.dfg.replace(inst).jump(done, &[lfinal]);
+
+    // Finally insert a label for the completion.
+    pos.next_inst();
+    pos.insert_ebb(done);
+
+    cfg.recompute_ebb(pos.func, old_ebb);
+    cfg.recompute_ebb(pos.func, large);
+    cfg.recompute_ebb(pos.func, done);
+}
--- a/cranelift/codegen/src/isa/x86/mod.rs
+++ b/cranelift/codegen/src/isa/x86/mod.rs
@@ -0,0 +1,145 @@
+//! x86 Instruction Set Architectures.
+
+mod abi;
+mod binemit;
+mod enc_tables;
+mod registers;
+pub mod settings;
+
+use super::super::settings as shared_settings;
+#[cfg(feature = "testing_hooks")]
+use crate::binemit::CodeSink;
+use crate::binemit::{emit_function, MemoryCodeSink};
+use crate::ir;
+use crate::isa::enc_tables::{self as shared_enc_tables, lookup_enclist, Encodings};
+use crate::isa::Builder as IsaBuilder;
+use crate::isa::{EncInfo, RegClass, RegInfo, TargetIsa};
+use crate::regalloc;
+use crate::result::CodegenResult;
+use crate::timing;
+use core::fmt;
+use std::boxed::Box;
+use target_lexicon::{PointerWidth, Triple};
+
+#[allow(dead_code)]
+struct Isa {
+    triple: Triple,
+    shared_flags: shared_settings::Flags,
+    isa_flags: settings::Flags,
+    cpumode: &'static [shared_enc_tables::Level1Entry<u16>],
+}
+
+/// Get an ISA builder for creating x86 targets.
+pub fn isa_builder(triple: Triple) -> IsaBuilder {
+    IsaBuilder {
+        triple,
+        setup: settings::builder(),
+        constructor: isa_constructor,
+    }
+}
+
+fn isa_constructor(
+    triple: Triple,
+    shared_flags: shared_settings::Flags,
+    builder: shared_settings::Builder,
+) -> Box<TargetIsa> {
+    let level1 = match triple.pointer_width().unwrap() {
+        PointerWidth::U16 => unimplemented!("x86-16"),
+        PointerWidth::U32 => &enc_tables::LEVEL1_I32[..],
+        PointerWidth::U64 => &enc_tables::LEVEL1_I64[..],
+    };
+    Box::new(Isa {
+        triple,
+        isa_flags: settings::Flags::new(&shared_flags, builder),
+        shared_flags,
+        cpumode: level1,
+    })
+}
+
+impl TargetIsa for Isa {
+    fn name(&self) -> &'static str {
+        "x86"
+    }
+
+    fn triple(&self) -> &Triple {
+        &self.triple
+    }
+
+    fn flags(&self) -> &shared_settings::Flags {
+        &self.shared_flags
+    }
+
+    fn uses_cpu_flags(&self) -> bool {
+        true
+    }
+
+    fn uses_complex_addresses(&self) -> bool {
+        true
+    }
+
+    fn register_info(&self) -> RegInfo {
+        registers::INFO.clone()
+    }
+
+    fn encoding_info(&self) -> EncInfo {
+        enc_tables::INFO.clone()
+    }
+
+    fn legal_encodings<'a>(
+        &'a self,
+        func: &'a ir::Function,
+        inst: &'a ir::InstructionData,
+        ctrl_typevar: ir::Type,
+    ) -> Encodings<'a> {
+        lookup_enclist(
+            ctrl_typevar,
+            inst,
+            func,
+            self.cpumode,
+            &enc_tables::LEVEL2[..],
+            &enc_tables::ENCLISTS[..],
+            &enc_tables::LEGALIZE_ACTIONS[..],
+            &enc_tables::RECIPE_PREDICATES[..],
+            &enc_tables::INST_PREDICATES[..],
+            self.isa_flags.predicate_view(),
+        )
+    }
+
+    fn legalize_signature(&self, sig: &mut ir::Signature, current: bool) {
+        abi::legalize_signature(sig, &self.triple, current)
+    }
+
+    fn regclass_for_abi_type(&self, ty: ir::Type) -> RegClass {
+        abi::regclass_for_abi_type(ty)
+    }
+
+    fn allocatable_registers(&self, func: &ir::Function) -> regalloc::RegisterSet {
+        abi::allocatable_registers(func, &self.triple)
+    }
+
+    #[cfg(feature = "testing_hooks")]
+    fn emit_inst(
+        &self,
+        func: &ir::Function,
+        inst: ir::Inst,
+        divert: &mut regalloc::RegDiversions,
+        sink: &mut CodeSink,
+    ) {
+        binemit::emit_inst(func, inst, divert, sink)
+    }
+
+    fn emit_function_to_memory(&self, func: &ir::Function, sink: &mut MemoryCodeSink) {
+        emit_function(func, binemit::emit_inst, sink)
+    }
+
+    fn prologue_epilogue(&self, func: &mut ir::Function) -> CodegenResult<()> {
+        let _tt = timing::prologue_epilogue();
+        abi::prologue_epilogue(func, self)
+    }
+}
+
+impl fmt::Display for Isa {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{}\n{}", self.shared_flags, self.isa_flags)
+    }
+}
--- a/cranelift/codegen/src/isa/x86/registers.rs
+++ b/cranelift/codegen/src/isa/x86/registers.rs
@@ -0,0 +1,63 @@
+//! x86 register descriptions.
+
+use crate::isa::registers::{RegBank, RegClass, RegClassData, RegInfo, RegUnit};
+
+include!(concat!(env!("OUT_DIR"), "/registers-x86.rs"));
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::isa::RegUnit;
+    use std::string::{String, ToString};
+
+    #[test]
+    fn unit_encodings() {
+        // The encoding of integer registers is not alphabetical.
+        assert_eq!(INFO.parse_regunit("rax"), Some(0));
+        assert_eq!(INFO.parse_regunit("rbx"), Some(3));
+        assert_eq!(INFO.parse_regunit("rcx"), Some(1));
+        assert_eq!(INFO.parse_regunit("rdx"), Some(2));
+        assert_eq!(INFO.parse_regunit("rsi"), Some(6));
+        assert_eq!(INFO.parse_regunit("rdi"), Some(7));
+        assert_eq!(INFO.parse_regunit("rbp"), Some(5));
+        assert_eq!(INFO.parse_regunit("rsp"), Some(4));
+        assert_eq!(INFO.parse_regunit("r8"), Some(8));
+        assert_eq!(INFO.parse_regunit("r15"), Some(15));
+
+        assert_eq!(INFO.parse_regunit("xmm0"), Some(16));
+        assert_eq!(INFO.parse_regunit("xmm15"), Some(31));
+    }
+
+    #[test]
+    fn unit_names() {
+        fn uname(ru: RegUnit) -> String {
+            INFO.display_regunit(ru).to_string()
+        }
+
+        assert_eq!(uname(0), "%rax");
+        assert_eq!(uname(3), "%rbx");
+        assert_eq!(uname(1), "%rcx");
+        assert_eq!(uname(2), "%rdx");
+        assert_eq!(uname(6), "%rsi");
+        assert_eq!(uname(7), "%rdi");
+        assert_eq!(uname(5), "%rbp");
+        assert_eq!(uname(4), "%rsp");
+        assert_eq!(uname(8), "%r8");
+        assert_eq!(uname(15), "%r15");
+        assert_eq!(uname(16), "%xmm0");
+        assert_eq!(uname(31), "%xmm15");
+    }
+
+    #[test]
+    fn regclasses() {
+        assert_eq!(GPR.intersect_index(GPR), Some(GPR.into()));
+        assert_eq!(GPR.intersect_index(ABCD), Some(ABCD.into()));
+        assert_eq!(GPR.intersect_index(FPR), None);
+        assert_eq!(ABCD.intersect_index(GPR), Some(ABCD.into()));
+        assert_eq!(ABCD.intersect_index(ABCD), Some(ABCD.into()));
+        assert_eq!(ABCD.intersect_index(FPR), None);
+        assert_eq!(FPR.intersect_index(FPR), Some(FPR.into()));
+        assert_eq!(FPR.intersect_index(GPR), None);
+        assert_eq!(FPR.intersect_index(ABCD), None);
+    }
+}
--- a/cranelift/codegen/src/isa/x86/settings.rs
+++ b/cranelift/codegen/src/isa/x86/settings.rs
@@ -0,0 +1,52 @@
+//! x86 Settings.
+
+use crate::settings::{self, detail, Builder};
+use core::fmt;
+
+// Include code generated by `cranelift-codegen/meta-python/gen_settings.py`. This file contains a public
+// `Flags` struct with an impl for all of the settings defined in
+// `cranelift-codegen/meta-python/isa/x86/settings.py`.
+include!(concat!(env!("OUT_DIR"), "/settings-x86.rs"));
+
+#[cfg(test)]
+mod tests {
+    use super::{builder, Flags};
+    use crate::settings::{self, Configurable};
+
+    #[test]
+    fn presets() {
+        let shared = settings::Flags::new(settings::builder());
+
+        // Nehalem has SSE4.1 but not BMI1.
+        let mut b0 = builder();
+        b0.enable("nehalem").unwrap();
+        let f0 = Flags::new(&shared, b0);
+        assert_eq!(f0.has_sse41(), true);
+        assert_eq!(f0.has_bmi1(), false);
+
+        let mut b1 = builder();
+        b1.enable("haswell").unwrap();
+        let f1 = Flags::new(&shared, b1);
+        assert_eq!(f1.has_sse41(), true);
+        assert_eq!(f1.has_bmi1(), true);
+    }
+    #[test]
+    fn display_presets() {
+        // Spot check that the flags Display impl does not cause a panic
+        let shared = settings::Flags::new(settings::builder());
+
+        let b0 = builder();
+        let f0 = Flags::new(&shared, b0);
+        let _ = format!("{}", f0);
+
+        let mut b1 = builder();
+        b1.enable("nehalem").unwrap();
+        let f1 = Flags::new(&shared, b1);
+        let _ = format!("{}", f1);
+
+        let mut b2 = builder();
+        b2.enable("haswell").unwrap();
+        let f2 = Flags::new(&shared, b2);
+        let _ = format!("{}", f2);
+    }
+}
--- a/cranelift/codegen/src/iterators.rs
+++ b/cranelift/codegen/src/iterators.rs
@@ -0,0 +1,93 @@
+//! Iterator utilities.
+
+/// Extra methods for iterators.
+pub trait IteratorExtras: Iterator {
+    /// Create an iterator that produces adjacent pairs of elements from the iterator.
+    fn adjacent_pairs(mut self) -> AdjacentPairs<Self>
+    where
+        Self: Sized,
+        Self::Item: Clone,
+    {
+        let elem = self.next();
+        AdjacentPairs { iter: self, elem }
+    }
+}
+
+impl<T> IteratorExtras for T where T: Iterator {}
+
+/// Adjacent pairs iterator returned by `adjacent_pairs()`.
+///
+/// This wraps another iterator and produces a sequence of adjacent pairs of elements.
+pub struct AdjacentPairs<I>
+where
+    I: Iterator,
+    I::Item: Clone,
+{
+    iter: I,
+    elem: Option<I::Item>,
+}
+
+impl<I> Iterator for AdjacentPairs<I>
+where
+    I: Iterator,
+    I::Item: Clone,
+{
+    type Item = (I::Item, I::Item);
+
+    fn next(&mut self) -> Option<Self::Item> {
+        self.elem.take().and_then(|e| {
+            self.elem = self.iter.next();
+            self.elem.clone().map(|n| (e, n))
+        })
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::vec::Vec;
+
+    #[test]
+    fn adjpairs() {
+        use super::IteratorExtras;
+
+        assert_eq!(
+            [1, 2, 3, 4]
+                .iter()
+                .cloned()
+                .adjacent_pairs()
+                .collect::<Vec<_>>(),
+            vec![(1, 2), (2, 3), (3, 4)]
+        );
+        assert_eq!(
+            [2, 3, 4]
+                .iter()
+                .cloned()
+                .adjacent_pairs()
+                .collect::<Vec<_>>(),
+            vec![(2, 3), (3, 4)]
+        );
+        assert_eq!(
+            [2, 3, 4]
+                .iter()
+                .cloned()
+                .adjacent_pairs()
+                .collect::<Vec<_>>(),
+            vec![(2, 3), (3, 4)]
+        );
+        assert_eq!(
+            [3, 4].iter().cloned().adjacent_pairs().collect::<Vec<_>>(),
+            vec![(3, 4)]
+        );
+        assert_eq!(
+            [4].iter().cloned().adjacent_pairs().collect::<Vec<_>>(),
+            vec![]
+        );
+        assert_eq!(
+            [].iter()
+                .cloned()
+                .adjacent_pairs()
+                .collect::<Vec<(i32, i32)>>(),
+            vec![]
+        );
+    }
+}
--- a/cranelift/codegen/src/legalizer/boundary.rs
+++ b/cranelift/codegen/src/legalizer/boundary.rs
@@ -0,0 +1,716 @@
+//! Legalize ABI boundaries.
+//!
+//! This legalizer sub-module contains code for dealing with ABI boundaries:
+//!
+//! - Function arguments passed to the entry block.
+//! - Function arguments passed to call instructions.
+//! - Return values from call instructions.
+//! - Return values passed to return instructions.
+//!
+//! The ABI boundary legalization happens in two phases:
+//!
+//! 1. The `legalize_signatures` function rewrites all the preamble signatures with ABI information
+//!    and possibly new argument types. It also rewrites the entry block arguments to match.
+//! 2. The `handle_call_abi` and `handle_return_abi` functions rewrite call and return instructions
+//!    to match the new ABI signatures.
+//!
+//! Between the two phases, preamble signatures and call/return arguments don't match. This
+//! intermediate state doesn't type check.
+
+use crate::abi::{legalize_abi_value, ValueConversion};
+use crate::cursor::{Cursor, FuncCursor};
+use crate::flowgraph::ControlFlowGraph;
+use crate::ir::instructions::CallInfo;
+use crate::ir::{
+    AbiParam, ArgumentLoc, ArgumentPurpose, DataFlowGraph, Ebb, Function, Inst, InstBuilder,
+    SigRef, Signature, Type, Value, ValueLoc,
+};
+use crate::isa::TargetIsa;
+use crate::legalizer::split::{isplit, vsplit};
+use log::debug;
+use std::vec::Vec;
+
+/// Legalize all the function signatures in `func`.
+///
+/// This changes all signatures to be ABI-compliant with full `ArgumentLoc` annotations. It doesn't
+/// change the entry block arguments, calls, or return instructions, so this can leave the function
+/// in a state with type discrepancies.
+pub fn legalize_signatures(func: &mut Function, isa: &TargetIsa) {
+    legalize_signature(&mut func.signature, true, isa);
+    for sig_data in func.dfg.signatures.values_mut() {
+        legalize_signature(sig_data, false, isa);
+    }
+
+    if let Some(entry) = func.layout.entry_block() {
+        legalize_entry_params(func, entry);
+        spill_entry_params(func, entry);
+    }
+}
+
+/// Legalize the libcall signature, which we may generate on the fly after
+/// `legalize_signatures` has been called.
+pub fn legalize_libcall_signature(signature: &mut Signature, isa: &TargetIsa) {
+    legalize_signature(signature, false, isa);
+}
+
+/// Legalize the given signature.
+///
+/// `current` is true if this is the signature for the current function.
+fn legalize_signature(signature: &mut Signature, current: bool, isa: &TargetIsa) {
+    isa.legalize_signature(signature, current);
+}
+
+/// Legalize the entry block parameters after `func`'s signature has been legalized.
+///
+/// The legalized signature may contain more parameters than the original signature, and the
+/// parameter types have been changed. This function goes through the parameters of the entry EBB
+/// and replaces them with parameters of the right type for the ABI.
+///
+/// The original entry EBB parameters are computed from the new ABI parameters by code inserted at
+/// the top of the entry block.
+fn legalize_entry_params(func: &mut Function, entry: Ebb) {
+    let mut has_sret = false;
+    let mut has_link = false;
+    let mut has_vmctx = false;
+    let mut has_sigid = false;
+    let mut has_stack_limit = false;
+
+    // Insert position for argument conversion code.
+    // We want to insert instructions before the first instruction in the entry block.
+    // If the entry block is empty, append instructions to it instead.
+    let mut pos = FuncCursor::new(func).at_first_inst(entry);
+
+    // Keep track of the argument types in the ABI-legalized signature.
+    let mut abi_arg = 0;
+
+    // Process the EBB parameters one at a time, possibly replacing one argument with multiple new
+    // ones. We do this by detaching the entry EBB parameters first.
+    let ebb_params = pos.func.dfg.detach_ebb_params(entry);
+    let mut old_arg = 0;
+    while let Some(arg) = ebb_params.get(old_arg, &pos.func.dfg.value_lists) {
+        old_arg += 1;
+
+        let abi_type = pos.func.signature.params[abi_arg];
+        let arg_type = pos.func.dfg.value_type(arg);
+        if arg_type == abi_type.value_type {
+            // No value translation is necessary, this argument matches the ABI type.
+            // Just use the original EBB argument value. This is the most common case.
+            pos.func.dfg.attach_ebb_param(entry, arg);
+            match abi_type.purpose {
+                ArgumentPurpose::Normal => {}
+                ArgumentPurpose::FramePointer => {}
+                ArgumentPurpose::CalleeSaved => {}
+                ArgumentPurpose::StructReturn => {
+                    debug_assert!(!has_sret, "Multiple sret arguments found");
+                    has_sret = true;
+                }
+                ArgumentPurpose::VMContext => {
+                    debug_assert!(!has_vmctx, "Multiple vmctx arguments found");
+                    has_vmctx = true;
+                }
+                ArgumentPurpose::SignatureId => {
+                    debug_assert!(!has_sigid, "Multiple sigid arguments found");
+                    has_sigid = true;
+                }
+                ArgumentPurpose::StackLimit => {
+                    debug_assert!(!has_stack_limit, "Multiple stack_limit arguments found");
+                    has_stack_limit = true;
+                }
+                _ => panic!("Unexpected special-purpose arg {}", abi_type),
+            }
+            abi_arg += 1;
+        } else {
+            // Compute the value we want for `arg` from the legalized ABI parameters.
+            let mut get_arg = |func: &mut Function, ty| {
+                let abi_type = func.signature.params[abi_arg];
+                debug_assert_eq!(
+                    abi_type.purpose,
+                    ArgumentPurpose::Normal,
+                    "Can't legalize special-purpose argument"
+                );
+                if ty == abi_type.value_type {
+                    abi_arg += 1;
+                    Ok(func.dfg.append_ebb_param(entry, ty))
+                } else {
+                    Err(abi_type)
+                }
+            };
+            let converted = convert_from_abi(&mut pos, arg_type, Some(arg), &mut get_arg);
+            // The old `arg` is no longer an attached EBB argument, but there are probably still
+            // uses of the value.
+            debug_assert_eq!(pos.func.dfg.resolve_aliases(arg), converted);
+        }
+    }
+
+    // The legalized signature may contain additional parameters representing special-purpose
+    // registers.
+    for &arg in &pos.func.signature.params[abi_arg..] {
+        match arg.purpose {
+            // Any normal parameters should have been processed above.
+            ArgumentPurpose::Normal => {
+                panic!("Leftover arg: {}", arg);
+            }
+            // The callee-save parameters should not appear until after register allocation is
+            // done.
+            ArgumentPurpose::FramePointer | ArgumentPurpose::CalleeSaved => {
+                panic!("Premature callee-saved arg {}", arg);
+            }
+            // These can be meaningfully added by `legalize_signature()`.
+            ArgumentPurpose::Link => {
+                debug_assert!(!has_link, "Multiple link parameters found");
+                has_link = true;
+            }
+            ArgumentPurpose::StructReturn => {
+                debug_assert!(!has_sret, "Multiple sret parameters found");
+                has_sret = true;
+            }
+            ArgumentPurpose::VMContext => {
+                debug_assert!(!has_vmctx, "Multiple vmctx parameters found");
+                has_vmctx = true;
+            }
+            ArgumentPurpose::SignatureId => {
+                debug_assert!(!has_sigid, "Multiple sigid parameters found");
+                has_sigid = true;
+            }
+            ArgumentPurpose::StackLimit => {
+                debug_assert!(!has_stack_limit, "Multiple stack_limit parameters found");
+                has_stack_limit = true;
+            }
+        }
+
+        // Just create entry block values to match here. We will use them in `handle_return_abi()`
+        // below.
+        pos.func.dfg.append_ebb_param(entry, arg.value_type);
+    }
+}
+
+/// Legalize the results returned from a call instruction to match the ABI signature.
+///
+/// The cursor `pos` points to a call instruction with at least one return value. The cursor will
+/// be left pointing after the instructions inserted to convert the return values.
+///
+/// This function is very similar to the `legalize_entry_params` function above.
+///
+/// Returns the possibly new instruction representing the call.
+fn legalize_inst_results<ResType>(pos: &mut FuncCursor, mut get_abi_type: ResType) -> Inst
+where
+    ResType: FnMut(&Function, usize) -> AbiParam,
+{
+    let call = pos
+        .current_inst()
+        .expect("Cursor must point to a call instruction");
+
+    // We theoretically allow for call instructions that return a number of fixed results before
+    // the call return values. In practice, it doesn't happen.
+    debug_assert_eq!(
+        pos.func.dfg[call]
+            .opcode()
+            .constraints()
+            .num_fixed_results(),
+        0,
+        "Fixed results on calls not supported"
+    );
+
+    let results = pos.func.dfg.detach_results(call);
+    let mut next_res = 0;
+    let mut abi_res = 0;
+
+    // Point immediately after the call.
+    pos.next_inst();
+
+    while let Some(res) = results.get(next_res, &pos.func.dfg.value_lists) {
+        next_res += 1;
+
+        let res_type = pos.func.dfg.value_type(res);
+        if res_type == get_abi_type(pos.func, abi_res).value_type {
+            // No value translation is necessary, this result matches the ABI type.
+            pos.func.dfg.attach_result(call, res);
+            abi_res += 1;
+        } else {
+            let mut get_res = |func: &mut Function, ty| {
+                let abi_type = get_abi_type(func, abi_res);
+                if ty == abi_type.value_type {
+                    let last_res = func.dfg.append_result(call, ty);
+                    abi_res += 1;
+                    Ok(last_res)
+                } else {
+                    Err(abi_type)
+                }
+            };
+            let v = convert_from_abi(pos, res_type, Some(res), &mut get_res);
+            debug_assert_eq!(pos.func.dfg.resolve_aliases(res), v);
+        }
+    }
+
+    call
+}
+
+/// Compute original value of type `ty` from the legalized ABI arguments.
+///
+/// The conversion is recursive, controlled by the `get_arg` closure which is called to retrieve an
+/// ABI argument. It returns:
+///
+/// - `Ok(arg)` if the requested type matches the next ABI argument.
+/// - `Err(arg_type)` if further conversions are needed from the ABI argument `arg_type`.
+///
+/// If the `into_result` value is provided, the converted result will be written into that value.
+fn convert_from_abi<GetArg>(
+    pos: &mut FuncCursor,
+    ty: Type,
+    into_result: Option<Value>,
+    get_arg: &mut GetArg,
+) -> Value
+where
+    GetArg: FnMut(&mut Function, Type) -> Result<Value, AbiParam>,
+{
+    // Terminate the recursion when we get the desired type.
+    let arg_type = match get_arg(pos.func, ty) {
+        Ok(v) => {
+            debug_assert_eq!(pos.func.dfg.value_type(v), ty);
+            debug_assert_eq!(into_result, None);
+            return v;
+        }
+        Err(t) => t,
+    };
+
+    // Reconstruct how `ty` was legalized into the `arg_type` argument.
+    let conversion = legalize_abi_value(ty, &arg_type);
+
+    debug!("convert_from_abi({}): {:?}", ty, conversion);
+
+    // The conversion describes value to ABI argument. We implement the reverse conversion here.
+    match conversion {
+        // Construct a `ty` by concatenating two ABI integers.
+        ValueConversion::IntSplit => {
+            let abi_ty = ty.half_width().expect("Invalid type for conversion");
+            let lo = convert_from_abi(pos, abi_ty, None, get_arg);
+            let hi = convert_from_abi(pos, abi_ty, None, get_arg);
+            debug!(
+                "intsplit {}: {}, {}: {}",
+                lo,
+                pos.func.dfg.value_type(lo),
+                hi,
+                pos.func.dfg.value_type(hi)
+            );
+            pos.ins().with_results([into_result]).iconcat(lo, hi)
+        }
+        // Construct a `ty` by concatenating two halves of a vector.
+        ValueConversion::VectorSplit => {
+            let abi_ty = ty.half_vector().expect("Invalid type for conversion");
+            let lo = convert_from_abi(pos, abi_ty, None, get_arg);
+            let hi = convert_from_abi(pos, abi_ty, None, get_arg);
+            pos.ins().with_results([into_result]).vconcat(lo, hi)
+        }
+        // Construct a `ty` by bit-casting from an integer type.
+        ValueConversion::IntBits => {
+            debug_assert!(!ty.is_int());
+            let abi_ty = Type::int(ty.bits()).expect("Invalid type for conversion");
+            let arg = convert_from_abi(pos, abi_ty, None, get_arg);
+            pos.ins().with_results([into_result]).bitcast(ty, arg)
+        }
+        // ABI argument is a sign-extended version of the value we want.
+        ValueConversion::Sext(abi_ty) => {
+            let arg = convert_from_abi(pos, abi_ty, None, get_arg);
+            // TODO: Currently, we don't take advantage of the ABI argument being sign-extended.
+            // We could insert an `assert_sreduce` which would fold with a following `sextend` of
+            // this value.
+            pos.ins().with_results([into_result]).ireduce(ty, arg)
+        }
+        ValueConversion::Uext(abi_ty) => {
+            let arg = convert_from_abi(pos, abi_ty, None, get_arg);
+            // TODO: Currently, we don't take advantage of the ABI argument being sign-extended.
+            // We could insert an `assert_ureduce` which would fold with a following `uextend` of
+            // this value.
+            pos.ins().with_results([into_result]).ireduce(ty, arg)
+        }
+    }
+}
+
+/// Convert `value` to match an ABI signature by inserting instructions at `pos`.
+///
+/// This may require expanding the value to multiple ABI arguments. The conversion process is
+/// recursive and controlled by the `put_arg` closure. When a candidate argument value is presented
+/// to the closure, it will perform one of two actions:
+///
+/// 1. If the suggested argument has an acceptable value type, consume it by adding it to the list
+///    of arguments and return `Ok(())`.
+/// 2. If the suggested argument doesn't have the right value type, don't change anything, but
+///    return the `Err(AbiParam)` that is needed.
+///
+fn convert_to_abi<PutArg>(
+    pos: &mut FuncCursor,
+    cfg: &ControlFlowGraph,
+    value: Value,
+    put_arg: &mut PutArg,
+) where
+    PutArg: FnMut(&mut Function, Value) -> Result<(), AbiParam>,
+{
+    // Start by invoking the closure to either terminate the recursion or get the argument type
+    // we're trying to match.
+    let arg_type = match put_arg(pos.func, value) {
+        Ok(_) => return,
+        Err(t) => t,
+    };
+
+    let ty = pos.func.dfg.value_type(value);
+    match legalize_abi_value(ty, &arg_type) {
+        ValueConversion::IntSplit => {
+            let curpos = pos.position();
+            let srcloc = pos.srcloc();
+            let (lo, hi) = isplit(&mut pos.func, cfg, curpos, srcloc, value);
+            convert_to_abi(pos, cfg, lo, put_arg);
+            convert_to_abi(pos, cfg, hi, put_arg);
+        }
+        ValueConversion::VectorSplit => {
+            let curpos = pos.position();
+            let srcloc = pos.srcloc();
+            let (lo, hi) = vsplit(&mut pos.func, cfg, curpos, srcloc, value);
+            convert_to_abi(pos, cfg, lo, put_arg);
+            convert_to_abi(pos, cfg, hi, put_arg);
+        }
+        ValueConversion::IntBits => {
+            debug_assert!(!ty.is_int());
+            let abi_ty = Type::int(ty.bits()).expect("Invalid type for conversion");
+            let arg = pos.ins().bitcast(abi_ty, value);
+            convert_to_abi(pos, cfg, arg, put_arg);
+        }
+        ValueConversion::Sext(abi_ty) => {
+            let arg = pos.ins().sextend(abi_ty, value);
+            convert_to_abi(pos, cfg, arg, put_arg);
+        }
+        ValueConversion::Uext(abi_ty) => {
+            let arg = pos.ins().uextend(abi_ty, value);
+            convert_to_abi(pos, cfg, arg, put_arg);
+        }
+    }
+}
+
+/// Check if a sequence of arguments match a desired sequence of argument types.
+fn check_arg_types(dfg: &DataFlowGraph, args: &[Value], types: &[AbiParam]) -> bool {
+    let arg_types = args.iter().map(|&v| dfg.value_type(v));
+    let sig_types = types.iter().map(|&at| at.value_type);
+    arg_types.eq(sig_types)
+}
+
+/// Check if the arguments of the call `inst` match the signature.
+///
+/// Returns `Ok(())` if the signature matches and no changes are needed, or `Err(sig_ref)` if the
+/// signature doesn't match.
+fn check_call_signature(dfg: &DataFlowGraph, inst: Inst) -> Result<(), SigRef> {
+    // Extract the signature and argument values.
+    let (sig_ref, args) = match dfg[inst].analyze_call(&dfg.value_lists) {
+        CallInfo::Direct(func, args) => (dfg.ext_funcs[func].signature, args),
+        CallInfo::Indirect(sig_ref, args) => (sig_ref, args),
+        CallInfo::NotACall => panic!("Expected call, got {:?}", dfg[inst]),
+    };
+    let sig = &dfg.signatures[sig_ref];
+
+    if check_arg_types(dfg, args, &sig.params[..])
+        && check_arg_types(dfg, dfg.inst_results(inst), &sig.returns[..])
+    {
+        // All types check out.
+        Ok(())
+    } else {
+        // Call types need fixing.
+        Err(sig_ref)
+    }
+}
+
+/// Check if the arguments of the return `inst` match the signature.
+fn check_return_signature(dfg: &DataFlowGraph, inst: Inst, sig: &Signature) -> bool {
+    check_arg_types(dfg, dfg.inst_variable_args(inst), &sig.returns)
+}
+
+/// Insert ABI conversion code for the arguments to the call or return instruction at `pos`.
+///
+/// - `abi_args` is the number of arguments that the ABI signature requires.
+/// - `get_abi_type` is a closure that can provide the desired `AbiParam` for a given ABI
+///   argument number in `0..abi_args`.
+///
+fn legalize_inst_arguments<ArgType>(
+    pos: &mut FuncCursor,
+    cfg: &ControlFlowGraph,
+    abi_args: usize,
+    mut get_abi_type: ArgType,
+) where
+    ArgType: FnMut(&Function, usize) -> AbiParam,
+{
+    let inst = pos
+        .current_inst()
+        .expect("Cursor must point to a call instruction");
+
+    // Lift the value list out of the call instruction so we modify it.
+    let mut vlist = pos.func.dfg[inst]
+        .take_value_list()
+        .expect("Call must have a value list");
+
+    // The value list contains all arguments to the instruction, including the callee on an
+    // indirect call which isn't part of the call arguments that must match the ABI signature.
+    // Figure out how many fixed values are at the front of the list. We won't touch those.
+    let num_fixed_values = pos.func.dfg[inst]
+        .opcode()
+        .constraints()
+        .num_fixed_value_arguments();
+    let have_args = vlist.len(&pos.func.dfg.value_lists) - num_fixed_values;
+
+    // Grow the value list to the right size and shift all the existing arguments to the right.
+    // This lets us write the new argument values into the list without overwriting the old
+    // arguments.
+    //
+    // Before:
+    //
+    //    <-->              fixed_values
+    //        <-----------> have_args
+    //   [FFFFOOOOOOOOOOOOO]
+    //
+    // After grow_at():
+    //
+    //    <-->                     fixed_values
+    //               <-----------> have_args
+    //        <------------------> abi_args
+    //   [FFFF-------OOOOOOOOOOOOO]
+    //               ^
+    //               old_arg_offset
+    //
+    // After writing the new arguments:
+    //
+    //    <-->                     fixed_values
+    //        <------------------> abi_args
+    //   [FFFFNNNNNNNNNNNNNNNNNNNN]
+    //
+    vlist.grow_at(
+        num_fixed_values,
+        abi_args - have_args,
+        &mut pos.func.dfg.value_lists,
+    );
+    let old_arg_offset = num_fixed_values + abi_args - have_args;
+
+    let mut abi_arg = 0;
+    for old_arg in 0..have_args {
+        let old_value = vlist
+            .get(old_arg_offset + old_arg, &pos.func.dfg.value_lists)
+            .unwrap();
+        let mut put_arg = |func: &mut Function, arg| {
+            let abi_type = get_abi_type(func, abi_arg);
+            if func.dfg.value_type(arg) == abi_type.value_type {
+                // This is the argument type we need.
+                vlist.as_mut_slice(&mut func.dfg.value_lists)[num_fixed_values + abi_arg] = arg;
+                abi_arg += 1;
+                Ok(())
+            } else {
+                Err(abi_type)
+            }
+        };
+        convert_to_abi(pos, cfg, old_value, &mut put_arg);
+    }
+
+    // Put the modified value list back.
+    pos.func.dfg[inst].put_value_list(vlist);
+}
+
+/// Insert ABI conversion code before and after the call instruction at `pos`.
+///
+/// Instructions inserted before the call will compute the appropriate ABI values for the
+/// callee's new ABI-legalized signature. The function call arguments are rewritten in place to
+/// match the new signature.
+///
+/// Instructions will be inserted after the call to convert returned ABI values back to the
+/// original return values. The call's result values will be adapted to match the new signature.
+///
+/// Returns `true` if any instructions were inserted.
+pub fn handle_call_abi(mut inst: Inst, func: &mut Function, cfg: &ControlFlowGraph) -> bool {
+    let pos = &mut FuncCursor::new(func).at_inst(inst);
+    pos.use_srcloc(inst);
+
+    // Start by checking if the argument types already match the signature.
+    let sig_ref = match check_call_signature(&pos.func.dfg, inst) {
+        Ok(_) => return spill_call_arguments(pos),
+        Err(s) => s,
+    };
+
+    // OK, we need to fix the call arguments to match the ABI signature.
+    let abi_args = pos.func.dfg.signatures[sig_ref].params.len();
+    legalize_inst_arguments(pos, cfg, abi_args, |func, abi_arg| {
+        func.dfg.signatures[sig_ref].params[abi_arg]
+    });
+
+    if !pos.func.dfg.signatures[sig_ref].returns.is_empty() {
+        inst = legalize_inst_results(pos, |func, abi_res| {
+            func.dfg.signatures[sig_ref].returns[abi_res]
+        });
+    }
+
+    debug_assert!(
+        check_call_signature(&pos.func.dfg, inst).is_ok(),
+        "Signature still wrong: {}, {}{}",
+        pos.func.dfg.display_inst(inst, None),
+        sig_ref,
+        pos.func.dfg.signatures[sig_ref]
+    );
+
+    // Go back and insert spills for any stack arguments.
+    pos.goto_inst(inst);
+    spill_call_arguments(pos);
+
+    // Yes, we changed stuff.
+    true
+}
+
+/// Insert ABI conversion code before and after the return instruction at `inst`.
+///
+/// Return `true` if any instructions were inserted.
+pub fn handle_return_abi(inst: Inst, func: &mut Function, cfg: &ControlFlowGraph) -> bool {
+    // Check if the returned types already match the signature.
+    if check_return_signature(&func.dfg, inst, &func.signature) {
+        return false;
+    }
+
+    // Count the special-purpose return values (`link`, `sret`, and `vmctx`) that were appended to
+    // the legalized signature.
+    let special_args = func
+        .signature
+        .returns
+        .iter()
+        .rev()
+        .take_while(|&rt| {
+            rt.purpose == ArgumentPurpose::Link
+                || rt.purpose == ArgumentPurpose::StructReturn
+                || rt.purpose == ArgumentPurpose::VMContext
+        })
+        .count();
+    let abi_args = func.signature.returns.len() - special_args;
+
+    let pos = &mut FuncCursor::new(func).at_inst(inst);
+    pos.use_srcloc(inst);
+
+    legalize_inst_arguments(pos, cfg, abi_args, |func, abi_arg| {
+        func.signature.returns[abi_arg]
+    });
+    debug_assert_eq!(pos.func.dfg.inst_variable_args(inst).len(), abi_args);
+
+    // Append special return arguments for any `sret`, `link`, and `vmctx` return values added to
+    // the legalized signature. These values should simply be propagated from the entry block
+    // arguments.
+    if special_args > 0 {
+        debug!(
+            "Adding {} special-purpose arguments to {}",
+            special_args,
+            pos.func.dfg.display_inst(inst, None)
+        );
+        let mut vlist = pos.func.dfg[inst].take_value_list().unwrap();
+        for arg in &pos.func.signature.returns[abi_args..] {
+            match arg.purpose {
+                ArgumentPurpose::Link
+                | ArgumentPurpose::StructReturn
+                | ArgumentPurpose::VMContext => {}
+                ArgumentPurpose::Normal => panic!("unexpected return value {}", arg),
+                _ => panic!("Unsupported special purpose return value {}", arg),
+            }
+            // A `link`/`sret`/`vmctx` return value can only appear in a signature that has a
+            // unique matching argument. They are appended at the end, so search the signature from
+            // the end.
+            let idx = pos
+                .func
+                .signature
+                .params
+                .iter()
+                .rposition(|t| t.purpose == arg.purpose)
+                .expect("No matching special purpose argument.");
+            // Get the corresponding entry block value and add it to the return instruction's
+            // arguments.
+            let val = pos
+                .func
+                .dfg
+                .ebb_params(pos.func.layout.entry_block().unwrap())[idx];
+            debug_assert_eq!(pos.func.dfg.value_type(val), arg.value_type);
+            vlist.push(val, &mut pos.func.dfg.value_lists);
+        }
+        pos.func.dfg[inst].put_value_list(vlist);
+    }
+
+    debug_assert!(
+        check_return_signature(&pos.func.dfg, inst, &pos.func.signature),
+        "Signature still wrong: {} / signature {}",
+        pos.func.dfg.display_inst(inst, None),
+        pos.func.signature
+    );
+
+    // Yes, we changed stuff.
+    true
+}
+
+/// Assign stack slots to incoming function parameters on the stack.
+///
+/// Values that are passed into the function on the stack must be assigned to an `IncomingArg`
+/// stack slot already during legalization.
+fn spill_entry_params(func: &mut Function, entry: Ebb) {
+    for (abi, &arg) in func.signature.params.iter().zip(func.dfg.ebb_params(entry)) {
+        if let ArgumentLoc::Stack(offset) = abi.location {
+            let ss = func.stack_slots.make_incoming_arg(abi.value_type, offset);
+            func.locations[arg] = ValueLoc::Stack(ss);
+        }
+    }
+}
+
+/// Assign stack slots to outgoing function arguments on the stack.
+///
+/// Values that are passed to a called function on the stack must be assigned to a matching
+/// `OutgoingArg` stack slot. The assignment must happen immediately before the call.
+///
+/// TODO: The outgoing stack slots can be written a bit earlier, as long as there are no branches
+/// or calls between writing the stack slots and the call instruction. Writing the slots earlier
+/// could help reduce register pressure before the call.
+fn spill_call_arguments(pos: &mut FuncCursor) -> bool {
+    let inst = pos
+        .current_inst()
+        .expect("Cursor must point to a call instruction");
+    let sig_ref = pos
+        .func
+        .dfg
+        .call_signature(inst)
+        .expect("Call instruction expected.");
+
+    // Start by building a list of stack slots and arguments to be replaced.
+    // This requires borrowing `pos.func.dfg`, so we can't change anything.
+    let arglist = {
+        let locations = &pos.func.locations;
+        let stack_slots = &mut pos.func.stack_slots;
+        pos.func
+            .dfg
+            .inst_variable_args(inst)
+            .iter()
+            .zip(&pos.func.dfg.signatures[sig_ref].params)
+            .enumerate()
+            .filter_map(|(idx, (&arg, abi))| {
+                match abi.location {
+                    ArgumentLoc::Stack(offset) => {
+                        // Assign `arg` to a new stack slot, unless it's already in the correct
+                        // slot. The legalization needs to be idempotent, so we should see a
+                        // correct outgoing slot on the second pass.
+                        let ss = stack_slots.get_outgoing_arg(abi.value_type, offset);
+                        if locations[arg] != ValueLoc::Stack(ss) {
+                            Some((idx, arg, ss))
+                        } else {
+                            None
+                        }
+                    }
+                    _ => None,
+                }
+            })
+            .collect::<Vec<_>>()
+    };
+
+    if arglist.is_empty() {
+        return false;
+    }
+
+    // Insert the spill instructions and rewrite call arguments.
+    for (idx, arg, ss) in arglist {
+        let stack_val = pos.ins().spill(arg);
+        pos.func.locations[stack_val] = ValueLoc::Stack(ss);
+        pos.func.dfg.inst_variable_args_mut(inst)[idx] = stack_val;
+    }
+
+    // We changed stuff.
+    true
+}
--- a/cranelift/codegen/src/legalizer/call.rs
+++ b/cranelift/codegen/src/legalizer/call.rs
@@ -0,0 +1,54 @@
+//! Legalization of calls.
+//!
+//! This module exports the `expand_call` function which transforms a `call`
+//! instruction into `func_addr` and `call_indirect` instructions.
+
+use crate::cursor::{Cursor, FuncCursor};
+use crate::flowgraph::ControlFlowGraph;
+use crate::ir::{self, InstBuilder};
+use crate::isa::TargetIsa;
+
+/// Expand a `call` instruction. This lowers it to a `call_indirect`, which
+/// is only done if the ABI doesn't support direct calls.
+pub fn expand_call(
+    inst: ir::Inst,
+    func: &mut ir::Function,
+    _cfg: &mut ControlFlowGraph,
+    isa: &TargetIsa,
+) {
+    // Unpack the instruction.
+    let (func_ref, old_args) = match func.dfg[inst] {
+        ir::InstructionData::Call {
+            opcode,
+            ref args,
+            func_ref,
+        } => {
+            debug_assert_eq!(opcode, ir::Opcode::Call);
+            (func_ref, args.clone())
+        }
+        _ => panic!("Wanted call: {}", func.dfg.display_inst(inst, None)),
+    };
+
+    let ptr_ty = isa.pointer_type();
+
+    let sig = func.dfg.ext_funcs[func_ref].signature;
+
+    let callee = {
+        let mut pos = FuncCursor::new(func).at_inst(inst);
+        pos.use_srcloc(inst);
+        pos.ins().func_addr(ptr_ty, func_ref)
+    };
+
+    let mut new_args = ir::ValueList::default();
+    new_args.push(callee, &mut func.dfg.value_lists);
+    for i in 0..old_args.len(&func.dfg.value_lists) {
+        new_args.push(
+            old_args.as_slice(&func.dfg.value_lists)[i],
+            &mut func.dfg.value_lists,
+        );
+    }
+
+    func.dfg
+        .replace(inst)
+        .CallIndirect(ir::Opcode::CallIndirect, ptr_ty, sig, new_args);
+}
--- a/cranelift/codegen/src/legalizer/globalvalue.rs
+++ b/cranelift/codegen/src/legalizer/globalvalue.rs
@@ -0,0 +1,129 @@
+//! Legalization of global values.
+//!
+//! This module exports the `expand_global_value` function which transforms a `global_value`
+//! instruction into code that depends on the kind of global value referenced.
+
+use crate::cursor::{Cursor, FuncCursor};
+use crate::flowgraph::ControlFlowGraph;
+use crate::ir::{self, InstBuilder};
+use crate::isa::TargetIsa;
+
+/// Expand a `global_value` instruction according to the definition of the global value.
+pub fn expand_global_value(
+    inst: ir::Inst,
+    func: &mut ir::Function,
+    _cfg: &mut ControlFlowGraph,
+    isa: &TargetIsa,
+) {
+    // Unpack the instruction.
+    let gv = match func.dfg[inst] {
+        ir::InstructionData::UnaryGlobalValue {
+            opcode,
+            global_value,
+        } => {
+            debug_assert_eq!(opcode, ir::Opcode::GlobalValue);
+            global_value
+        }
+        _ => panic!("Wanted global_value: {}", func.dfg.display_inst(inst, None)),
+    };
+
+    match func.global_values[gv] {
+        ir::GlobalValueData::VMContext => vmctx_addr(inst, func),
+        ir::GlobalValueData::IAddImm {
+            base,
+            offset,
+            global_type,
+        } => iadd_imm_addr(inst, func, base, offset.into(), global_type),
+        ir::GlobalValueData::Load {
+            base,
+            offset,
+            global_type,
+            readonly,
+        } => load_addr(inst, func, base, offset, global_type, readonly, isa),
+        ir::GlobalValueData::Symbol { .. } => symbol(inst, func, gv, isa),
+    }
+}
+
+/// Expand a `global_value` instruction for a vmctx global.
+fn vmctx_addr(inst: ir::Inst, func: &mut ir::Function) {
+    // Get the value representing the `vmctx` argument.
+    let vmctx = func
+        .special_param(ir::ArgumentPurpose::VMContext)
+        .expect("Missing vmctx parameter");
+
+    // Replace the `global_value` instruction's value with an alias to the vmctx arg.
+    let result = func.dfg.first_result(inst);
+    func.dfg.clear_results(inst);
+    func.dfg.change_to_alias(result, vmctx);
+    func.layout.remove_inst(inst);
+}
+
+/// Expand a `global_value` instruction for an iadd_imm global.
+fn iadd_imm_addr(
+    inst: ir::Inst,
+    func: &mut ir::Function,
+    base: ir::GlobalValue,
+    offset: i64,
+    global_type: ir::Type,
+) {
+    let mut pos = FuncCursor::new(func).at_inst(inst);
+
+    // Get the value for the lhs. For tidiness, expand VMContext here so that we avoid
+    // `vmctx_addr` which creates an otherwise unneeded value alias.
+    let lhs = if let ir::GlobalValueData::VMContext = pos.func.global_values[base] {
+        pos.func
+            .special_param(ir::ArgumentPurpose::VMContext)
+            .expect("Missing vmctx parameter")
+    } else {
+        pos.ins().global_value(global_type, base)
+    };
+
+    // Simply replace the `global_value` instruction with an `iadd_imm`, reusing the result value.
+    pos.func.dfg.replace(inst).iadd_imm(lhs, offset);
+}
+
+/// Expand a `global_value` instruction for a load global.
+fn load_addr(
+    inst: ir::Inst,
+    func: &mut ir::Function,
+    base: ir::GlobalValue,
+    offset: ir::immediates::Offset32,
+    global_type: ir::Type,
+    readonly: bool,
+    isa: &TargetIsa,
+) {
+    // We need to load a pointer from the `base` global value, so insert a new `global_value`
+    // instruction. This depends on the iterative legalization loop. Note that the IR verifier
+    // detects any cycles in the `load` globals.
+    let ptr_ty = isa.pointer_type();
+    let mut pos = FuncCursor::new(func).at_inst(inst);
+    pos.use_srcloc(inst);
+
+    // Get the value for the base. For tidiness, expand VMContext here so that we avoid
+    // `vmctx_addr` which creates an otherwise unneeded value alias.
+    let base_addr = if let ir::GlobalValueData::VMContext = pos.func.global_values[base] {
+        pos.func
+            .special_param(ir::ArgumentPurpose::VMContext)
+            .expect("Missing vmctx parameter")
+    } else {
+        pos.ins().global_value(ptr_ty, base)
+    };
+
+    // Global-value loads are always notrap and aligned. They may be readonly.
+    let mut mflags = ir::MemFlags::trusted();
+    if readonly {
+        mflags.set_readonly();
+    }
+
+    // Perform the load.
+    pos.func
+        .dfg
+        .replace(inst)
+        .load(global_type, mflags, base_addr, offset);
+}
+
+/// Expand a `global_value` instruction for a symbolic name global.
+fn symbol(inst: ir::Inst, func: &mut ir::Function, gv: ir::GlobalValue, isa: &TargetIsa) {
+    let ptr_ty = isa.pointer_type();
+    func.dfg.replace(inst).symbol_value(ptr_ty, gv);
+}
--- a/cranelift/codegen/src/legalizer/heap.rs
+++ b/cranelift/codegen/src/legalizer/heap.rs
@@ -0,0 +1,161 @@
+//! Legalization of heaps.
+//!
+//! This module exports the `expand_heap_addr` function which transforms a `heap_addr`
+//! instruction into code that depends on the kind of heap referenced.
+
+use crate::cursor::{Cursor, FuncCursor};
+use crate::flowgraph::ControlFlowGraph;
+use crate::ir::condcodes::IntCC;
+use crate::ir::{self, InstBuilder};
+use crate::isa::TargetIsa;
+
+/// Expand a `heap_addr` instruction according to the definition of the heap.
+pub fn expand_heap_addr(
+    inst: ir::Inst,
+    func: &mut ir::Function,
+    cfg: &mut ControlFlowGraph,
+    _isa: &TargetIsa,
+) {
+    // Unpack the instruction.
+    let (heap, offset, access_size) = match func.dfg[inst] {
+        ir::InstructionData::HeapAddr {
+            opcode,
+            heap,
+            arg,
+            imm,
+        } => {
+            debug_assert_eq!(opcode, ir::Opcode::HeapAddr);
+            (heap, arg, imm.into())
+        }
+        _ => panic!("Wanted heap_addr: {}", func.dfg.display_inst(inst, None)),
+    };
+
+    match func.heaps[heap].style {
+        ir::HeapStyle::Dynamic { bound_gv } => {
+            dynamic_addr(inst, heap, offset, access_size, bound_gv, func)
+        }
+        ir::HeapStyle::Static { bound } => {
+            static_addr(inst, heap, offset, access_size, bound.into(), func, cfg)
+        }
+    }
+}
+
+/// Expand a `heap_addr` for a dynamic heap.
+fn dynamic_addr(
+    inst: ir::Inst,
+    heap: ir::Heap,
+    offset: ir::Value,
+    access_size: u32,
+    bound_gv: ir::GlobalValue,
+    func: &mut ir::Function,
+) {
+    let access_size = u64::from(access_size);
+    let offset_ty = func.dfg.value_type(offset);
+    let addr_ty = func.dfg.value_type(func.dfg.first_result(inst));
+    let min_size = func.heaps[heap].min_size.into();
+    let mut pos = FuncCursor::new(func).at_inst(inst);
+    pos.use_srcloc(inst);
+
+    // Start with the bounds check. Trap if `offset + access_size > bound`.
+    let bound = pos.ins().global_value(offset_ty, bound_gv);
+    let oob;
+    if access_size == 1 {
+        // `offset > bound - 1` is the same as `offset >= bound`.
+        oob = pos
+            .ins()
+            .icmp(IntCC::UnsignedGreaterThanOrEqual, offset, bound);
+    } else if access_size <= min_size {
+        // We know that bound >= min_size, so here we can compare `offset > bound - access_size`
+        // without wrapping.
+        let adj_bound = pos.ins().iadd_imm(bound, -(access_size as i64));
+        oob = pos
+            .ins()
+            .icmp(IntCC::UnsignedGreaterThan, offset, adj_bound);
+    } else {
+        // We need an overflow check for the adjusted offset.
+        let access_size_val = pos.ins().iconst(offset_ty, access_size as i64);
+        let (adj_offset, overflow) = pos.ins().iadd_cout(offset, access_size_val);
+        pos.ins().trapnz(overflow, ir::TrapCode::HeapOutOfBounds);
+        oob = pos
+            .ins()
+            .icmp(IntCC::UnsignedGreaterThan, adj_offset, bound);
+    }
+    pos.ins().trapnz(oob, ir::TrapCode::HeapOutOfBounds);
+
+    compute_addr(inst, heap, addr_ty, offset, offset_ty, pos.func);
+}
+
+/// Expand a `heap_addr` for a static heap.
+fn static_addr(
+    inst: ir::Inst,
+    heap: ir::Heap,
+    offset: ir::Value,
+    access_size: u32,
+    bound: u64,
+    func: &mut ir::Function,
+    cfg: &mut ControlFlowGraph,
+) {
+    let access_size = u64::from(access_size);
+    let offset_ty = func.dfg.value_type(offset);
+    let addr_ty = func.dfg.value_type(func.dfg.first_result(inst));
+    let mut pos = FuncCursor::new(func).at_inst(inst);
+    pos.use_srcloc(inst);
+
+    // Start with the bounds check. Trap if `offset + access_size > bound`.
+    if access_size > bound {
+        // This will simply always trap since `offset >= 0`.
+        pos.ins().trap(ir::TrapCode::HeapOutOfBounds);
+        pos.func.dfg.replace(inst).iconst(addr_ty, 0);
+
+        // Split Ebb, as the trap is a terminator instruction.
+        let curr_ebb = pos.current_ebb().expect("Cursor is not in an ebb");
+        let new_ebb = pos.func.dfg.make_ebb();
+        pos.insert_ebb(new_ebb);
+        cfg.recompute_ebb(pos.func, curr_ebb);
+        cfg.recompute_ebb(pos.func, new_ebb);
+        return;
+    }
+
+    // Check `offset > limit` which is now known non-negative.
+    let limit = bound - access_size;
+
+    // We may be able to omit the check entirely for 32-bit offsets if the heap bound is 4 GB or
+    // more.
+    if offset_ty != ir::types::I32 || limit < 0xffff_ffff {
+        let oob = if limit & 1 == 1 {
+            // Prefer testing `offset >= limit - 1` when limit is odd because an even number is
+            // likely to be a convenient constant on ARM and other RISC architectures.
+            pos.ins()
+                .icmp_imm(IntCC::UnsignedGreaterThanOrEqual, offset, limit as i64 - 1)
+        } else {
+            pos.ins()
+                .icmp_imm(IntCC::UnsignedGreaterThan, offset, limit as i64)
+        };
+        pos.ins().trapnz(oob, ir::TrapCode::HeapOutOfBounds);
+    }
+
+    compute_addr(inst, heap, addr_ty, offset, offset_ty, pos.func);
+}
+
+/// Emit code for the base address computation of a `heap_addr` instruction.
+fn compute_addr(
+    inst: ir::Inst,
+    heap: ir::Heap,
+    addr_ty: ir::Type,
+    mut offset: ir::Value,
+    offset_ty: ir::Type,
+    func: &mut ir::Function,
+) {
+    let mut pos = FuncCursor::new(func).at_inst(inst);
+    pos.use_srcloc(inst);
+
+    // Convert `offset` to `addr_ty`.
+    if offset_ty != addr_ty {
+        offset = pos.ins().uextend(addr_ty, offset);
+    }
+
+    // Add the heap base address base
+    let base_gv = pos.func.heaps[heap].base;
+    let base = pos.ins().global_value(addr_ty, base_gv);
+    pos.func.dfg.replace(inst).iadd(base, offset);
+}
--- a/cranelift/codegen/src/legalizer/libcall.rs
+++ b/cranelift/codegen/src/legalizer/libcall.rs
@@ -0,0 +1,31 @@
+//! Expanding instructions as runtime library calls.
+
+use crate::ir;
+use crate::ir::{get_libcall_funcref, InstBuilder};
+use crate::isa::TargetIsa;
+use crate::legalizer::boundary::legalize_libcall_signature;
+use std::vec::Vec;
+
+/// Try to expand `inst` as a library call, returning true is successful.
+pub fn expand_as_libcall(inst: ir::Inst, func: &mut ir::Function, isa: &TargetIsa) -> bool {
+    // Does the opcode/ctrl_type combo even have a well-known runtime library name.
+    let libcall = match ir::LibCall::for_inst(func.dfg[inst].opcode(), func.dfg.ctrl_typevar(inst))
+    {
+        Some(lc) => lc,
+        None => return false,
+    };
+
+    // Now we convert `inst` to a call. First save the arguments.
+    let mut args = Vec::new();
+    args.extend_from_slice(func.dfg.inst_args(inst));
+    // The replace builder will preserve the instruction result values.
+    let funcref = get_libcall_funcref(libcall, func, inst, isa);
+    func.dfg.replace(inst).call(funcref, &args);
+
+    // Ask the ISA to legalize the signature.
+    let fn_data = &func.dfg.ext_funcs[funcref];
+    let sig_data = &mut func.dfg.signatures[fn_data.signature];
+    legalize_libcall_signature(sig_data, isa);
+
+    true
+}
--- a/cranelift/codegen/src/legalizer/mod.rs
+++ b/cranelift/codegen/src/legalizer/mod.rs
@@ -0,0 +1,440 @@
+//! Legalize instructions.
+//!
+//! A legal instruction is one that can be mapped directly to a machine code instruction for the
+//! target ISA. The `legalize_function()` function takes as input any function and transforms it
+//! into an equivalent function using only legal instructions.
+//!
+//! The characteristics of legal instructions depend on the target ISA, so any given instruction
+//! can be legal for one ISA and illegal for another.
+//!
+//! Besides transforming instructions, the legalizer also fills out the `function.encodings` map
+//! which provides a legal encoding recipe for every instruction.
+//!
+//! The legalizer does not deal with register allocation constraints. These constraints are derived
+//! from the encoding recipes, and solved later by the register allocator.
+
+use crate::bitset::BitSet;
+use crate::cursor::{Cursor, FuncCursor};
+use crate::flowgraph::ControlFlowGraph;
+use crate::ir::types::I32;
+use crate::ir::{self, InstBuilder, MemFlags};
+use crate::isa::TargetIsa;
+use crate::timing;
+
+mod boundary;
+mod call;
+mod globalvalue;
+mod heap;
+mod libcall;
+mod split;
+mod table;
+
+use self::call::expand_call;
+use self::globalvalue::expand_global_value;
+use self::heap::expand_heap_addr;
+use self::libcall::expand_as_libcall;
+use self::table::expand_table_addr;
+
+/// Legalize `inst` for `isa`. Return true if any changes to the code were
+/// made; return false if the instruction was successfully encoded as is.
+fn legalize_inst(
+    inst: ir::Inst,
+    pos: &mut FuncCursor,
+    cfg: &mut ControlFlowGraph,
+    isa: &TargetIsa,
+) -> bool {
+    let opcode = pos.func.dfg[inst].opcode();
+
+    // Check for ABI boundaries that need to be converted to the legalized signature.
+    if opcode.is_call() {
+        if boundary::handle_call_abi(inst, pos.func, cfg) {
+            return true;
+        }
+    } else if opcode.is_return() {
+        if boundary::handle_return_abi(inst, pos.func, cfg) {
+            return true;
+        }
+    } else if opcode.is_branch() {
+        split::simplify_branch_arguments(&mut pos.func.dfg, inst);
+    }
+
+    match pos.func.update_encoding(inst, isa) {
+        Ok(()) => false,
+        Err(action) => {
+            // We should transform the instruction into legal equivalents.
+            // If the current instruction was replaced, we need to double back and revisit
+            // the expanded sequence. This is both to assign encodings and possible to
+            // expand further.
+            // There's a risk of infinite looping here if the legalization patterns are
+            // unsound. Should we attempt to detect that?
+            if action(inst, pos.func, cfg, isa) {
+                return true;
+            }
+
+            // We don't have any pattern expansion for this instruction either.
+            // Try converting it to a library call as a last resort.
+            expand_as_libcall(inst, pos.func, isa)
+        }
+    }
+}
+
+/// Legalize `func` for `isa`.
+///
+/// - Transform any instructions that don't have a legal representation in `isa`.
+/// - Fill out `func.encodings`.
+///
+pub fn legalize_function(func: &mut ir::Function, cfg: &mut ControlFlowGraph, isa: &TargetIsa) {
+    let _tt = timing::legalize();
+    debug_assert!(cfg.is_valid());
+
+    boundary::legalize_signatures(func, isa);
+
+    func.encodings.resize(func.dfg.num_insts());
+
+    let mut pos = FuncCursor::new(func);
+
+    // Process EBBs in layout order. Some legalization actions may split the current EBB or append
+    // new ones to the end. We need to make sure we visit those new EBBs too.
+    while let Some(_ebb) = pos.next_ebb() {
+        // Keep track of the cursor position before the instruction being processed, so we can
+        // double back when replacing instructions.
+        let mut prev_pos = pos.position();
+
+        while let Some(inst) = pos.next_inst() {
+            if legalize_inst(inst, &mut pos, cfg, isa) {
+                // Go back and legalize the inserted return value conversion instructions.
+                pos.set_position(prev_pos);
+            } else {
+                // Remember this position in case we need to double back.
+                prev_pos = pos.position();
+            }
+        }
+    }
+
+    // Now that we've lowered all br_tables, we don't need the jump tables anymore.
+    if !isa.flags().jump_tables_enabled() {
+        pos.func.jump_tables.clear();
+    }
+}
+
+// Include legalization patterns that were generated by `gen_legalizer.py` from the `XForms` in
+// `cranelift-codegen/meta-python/base/legalize.py`.
+//
+// Concretely, this defines private functions `narrow()`, and `expand()`.
+include!(concat!(env!("OUT_DIR"), "/legalizer.rs"));
+
+/// Custom expansion for conditional trap instructions.
+/// TODO: Add CFG support to the Python patterns so we won't have to do this.
+fn expand_cond_trap(
+    inst: ir::Inst,
+    func: &mut ir::Function,
+    cfg: &mut ControlFlowGraph,
+    _isa: &TargetIsa,
+) {
+    // Parse the instruction.
+    let trapz;
+    let (arg, code) = match func.dfg[inst] {
+        ir::InstructionData::CondTrap { opcode, arg, code } => {
+            // We want to branch *over* an unconditional trap.
+            trapz = match opcode {
+                ir::Opcode::Trapz => true,
+                ir::Opcode::Trapnz => false,
+                _ => panic!("Expected cond trap: {}", func.dfg.display_inst(inst, None)),
+            };
+            (arg, code)
+        }
+        _ => panic!("Expected cond trap: {}", func.dfg.display_inst(inst, None)),
+    };
+
+    // Split the EBB after `inst`:
+    //
+    //     trapnz arg
+    //
+    // Becomes:
+    //
+    //     brz arg, new_ebb
+    //     trap
+    //   new_ebb:
+    //
+    let old_ebb = func.layout.pp_ebb(inst);
+    let new_ebb = func.dfg.make_ebb();
+    if trapz {
+        func.dfg.replace(inst).brnz(arg, new_ebb, &[]);
+    } else {
+        func.dfg.replace(inst).brz(arg, new_ebb, &[]);
+    }
+
+    let mut pos = FuncCursor::new(func).after_inst(inst);
+    pos.use_srcloc(inst);
+    pos.ins().trap(code);
+    pos.insert_ebb(new_ebb);
+
+    // Finally update the CFG.
+    cfg.recompute_ebb(pos.func, old_ebb);
+    cfg.recompute_ebb(pos.func, new_ebb);
+}
+
+/// Jump tables.
+fn expand_br_table(
+    inst: ir::Inst,
+    func: &mut ir::Function,
+    cfg: &mut ControlFlowGraph,
+    isa: &TargetIsa,
+) {
+    if isa.flags().jump_tables_enabled() {
+        expand_br_table_jt(inst, func, cfg, isa);
+    } else {
+        expand_br_table_conds(inst, func, cfg, isa);
+    }
+}
+
+/// Expand br_table to jump table.
+fn expand_br_table_jt(
+    inst: ir::Inst,
+    func: &mut ir::Function,
+    cfg: &mut ControlFlowGraph,
+    isa: &TargetIsa,
+) {
+    use crate::ir::condcodes::IntCC;
+
+    let (arg, default_ebb, table) = match func.dfg[inst] {
+        ir::InstructionData::BranchTable {
+            opcode: ir::Opcode::BrTable,
+            arg,
+            destination,
+            table,
+        } => (arg, destination, table),
+        _ => panic!("Expected br_table: {}", func.dfg.display_inst(inst, None)),
+    };
+
+    let table_size = func.jump_tables[table].len();
+    let addr_ty = isa.pointer_type();
+    let entry_ty = I32;
+
+    let mut pos = FuncCursor::new(func).at_inst(inst);
+    pos.use_srcloc(inst);
+
+    // Bounds check
+    let oob = pos
+        .ins()
+        .icmp_imm(IntCC::UnsignedGreaterThanOrEqual, arg, table_size as i64);
+
+    pos.ins().brnz(oob, default_ebb, &[]);
+
+    let base_addr = pos.ins().jump_table_base(addr_ty, table);
+    let entry = pos
+        .ins()
+        .jump_table_entry(addr_ty, arg, base_addr, entry_ty.bytes() as u8, table);
+
+    let addr = pos.ins().iadd(base_addr, entry);
+    pos.ins().indirect_jump_table_br(addr, table);
+
+    let ebb = pos.current_ebb().unwrap();
+    pos.remove_inst();
+    cfg.recompute_ebb(pos.func, ebb);
+}
+
+/// Expand br_table to series of conditionals.
+fn expand_br_table_conds(
+    inst: ir::Inst,
+    func: &mut ir::Function,
+    cfg: &mut ControlFlowGraph,
+    _isa: &TargetIsa,
+) {
+    use crate::ir::condcodes::IntCC;
+
+    let (arg, default_ebb, table) = match func.dfg[inst] {
+        ir::InstructionData::BranchTable {
+            opcode: ir::Opcode::BrTable,
+            arg,
+            destination,
+            table,
+        } => (arg, destination, table),
+        _ => panic!("Expected br_table: {}", func.dfg.display_inst(inst, None)),
+    };
+
+    // This is a poor man's jump table using just a sequence of conditional branches.
+    let table_size = func.jump_tables[table].len();
+    let mut pos = FuncCursor::new(func).at_inst(inst);
+    pos.use_srcloc(inst);
+
+    for i in 0..table_size {
+        let dest = pos.func.jump_tables[table].as_slice()[i];
+        let t = pos.ins().icmp_imm(IntCC::Equal, arg, i as i64);
+        pos.ins().brnz(t, dest, &[]);
+    }
+
+    // `br_table` jumps to the default destination if nothing matches
+    pos.ins().jump(default_ebb, &[]);
+
+    let ebb = pos.current_ebb().unwrap();
+    pos.remove_inst();
+    cfg.recompute_ebb(pos.func, ebb);
+}
+
+/// Expand the select instruction.
+///
+/// Conditional moves are available in some ISAs for some register classes. The remaining selects
+/// are handled by a branch.
+fn expand_select(
+    inst: ir::Inst,
+    func: &mut ir::Function,
+    cfg: &mut ControlFlowGraph,
+    _isa: &TargetIsa,
+) {
+    let (ctrl, tval, fval) = match func.dfg[inst] {
+        ir::InstructionData::Ternary {
+            opcode: ir::Opcode::Select,
+            args,
+        } => (args[0], args[1], args[2]),
+        _ => panic!("Expected select: {}", func.dfg.display_inst(inst, None)),
+    };
+
+    // Replace `result = select ctrl, tval, fval` with:
+    //
+    //   brnz ctrl, new_ebb(tval)
+    //   jump new_ebb(fval)
+    // new_ebb(result):
+    let old_ebb = func.layout.pp_ebb(inst);
+    let result = func.dfg.first_result(inst);
+    func.dfg.clear_results(inst);
+    let new_ebb = func.dfg.make_ebb();
+    func.dfg.attach_ebb_param(new_ebb, result);
+
+    func.dfg.replace(inst).brnz(ctrl, new_ebb, &[tval]);
+    let mut pos = FuncCursor::new(func).after_inst(inst);
+    pos.use_srcloc(inst);
+    pos.ins().jump(new_ebb, &[fval]);
+    pos.insert_ebb(new_ebb);
+
+    cfg.recompute_ebb(pos.func, new_ebb);
+    cfg.recompute_ebb(pos.func, old_ebb);
+}
+
+fn expand_br_icmp(
+    inst: ir::Inst,
+    func: &mut ir::Function,
+    cfg: &mut ControlFlowGraph,
+    _isa: &TargetIsa,
+) {
+    let (cond, a, b, destination, ebb_args) = match func.dfg[inst] {
+        ir::InstructionData::BranchIcmp {
+            cond,
+            destination,
+            ref args,
+            ..
+        } => (
+            cond,
+            args.get(0, &func.dfg.value_lists).unwrap(),
+            args.get(1, &func.dfg.value_lists).unwrap(),
+            destination,
+            args.as_slice(&func.dfg.value_lists)[2..].to_vec(),
+        ),
+        _ => panic!("Expected br_icmp {}", func.dfg.display_inst(inst, None)),
+    };
+
+    let old_ebb = func.layout.pp_ebb(inst);
+    func.dfg.clear_results(inst);
+
+    let icmp_res = func.dfg.replace(inst).icmp(cond, a, b);
+    let mut pos = FuncCursor::new(func).after_inst(inst);
+    pos.use_srcloc(inst);
+    pos.ins().brnz(icmp_res, destination, &ebb_args);
+
+    cfg.recompute_ebb(pos.func, destination);
+    cfg.recompute_ebb(pos.func, old_ebb);
+}
+
+/// Expand illegal `f32const` and `f64const` instructions.
+fn expand_fconst(
+    inst: ir::Inst,
+    func: &mut ir::Function,
+    _cfg: &mut ControlFlowGraph,
+    _isa: &TargetIsa,
+) {
+    let ty = func.dfg.value_type(func.dfg.first_result(inst));
+    debug_assert!(!ty.is_vector(), "Only scalar fconst supported: {}", ty);
+
+    // In the future, we may want to generate constant pool entries for these constants, but for
+    // now use an `iconst` and a bit cast.
+    let mut pos = FuncCursor::new(func).at_inst(inst);
+    pos.use_srcloc(inst);
+    let ival = match pos.func.dfg[inst] {
+        ir::InstructionData::UnaryIeee32 {
+            opcode: ir::Opcode::F32const,
+            imm,
+        } => pos.ins().iconst(ir::types::I32, i64::from(imm.bits())),
+        ir::InstructionData::UnaryIeee64 {
+            opcode: ir::Opcode::F64const,
+            imm,
+        } => pos.ins().iconst(ir::types::I64, imm.bits() as i64),
+        _ => panic!("Expected fconst: {}", pos.func.dfg.display_inst(inst, None)),
+    };
+    pos.func.dfg.replace(inst).bitcast(ty, ival);
+}
+
+/// Expand illegal `stack_load` instructions.
+fn expand_stack_load(
+    inst: ir::Inst,
+    func: &mut ir::Function,
+    _cfg: &mut ControlFlowGraph,
+    isa: &TargetIsa,
+) {
+    let ty = func.dfg.value_type(func.dfg.first_result(inst));
+    let addr_ty = isa.pointer_type();
+
+    let mut pos = FuncCursor::new(func).at_inst(inst);
+    pos.use_srcloc(inst);
+
+    let (stack_slot, offset) = match pos.func.dfg[inst] {
+        ir::InstructionData::StackLoad {
+            opcode: _opcode,
+            stack_slot,
+            offset,
+        } => (stack_slot, offset),
+        _ => panic!(
+            "Expected stack_load: {}",
+            pos.func.dfg.display_inst(inst, None)
+        ),
+    };
+
+    let addr = pos.ins().stack_addr(addr_ty, stack_slot, offset);
+
+    // Stack slots are required to be accessible and aligned.
+    let mflags = MemFlags::trusted();
+    pos.func.dfg.replace(inst).load(ty, mflags, addr, 0);
+}
+
+/// Expand illegal `stack_store` instructions.
+fn expand_stack_store(
+    inst: ir::Inst,
+    func: &mut ir::Function,
+    _cfg: &mut ControlFlowGraph,
+    isa: &TargetIsa,
+) {
+    let addr_ty = isa.pointer_type();
+
+    let mut pos = FuncCursor::new(func).at_inst(inst);
+    pos.use_srcloc(inst);
+
+    let (val, stack_slot, offset) = match pos.func.dfg[inst] {
+        ir::InstructionData::StackStore {
+            opcode: _opcode,
+            arg,
+            stack_slot,
+            offset,
+        } => (arg, stack_slot, offset),
+        _ => panic!(
+            "Expected stack_store: {}",
+            pos.func.dfg.display_inst(inst, None)
+        ),
+    };
+
+    let addr = pos.ins().stack_addr(addr_ty, stack_slot, offset);
+
+    let mut mflags = MemFlags::new();
+    // Stack slots are required to be accessible and aligned.
+    mflags.set_notrap();
+    mflags.set_aligned();
+    pos.func.dfg.replace(inst).store(mflags, val, addr, 0);
+}
--- a/cranelift/codegen/src/legalizer/split.rs
+++ b/cranelift/codegen/src/legalizer/split.rs
@@ -0,0 +1,345 @@
+//! Value splitting.
+//!
+//! Some value types are too large to fit in registers, so they need to be split into smaller parts
+//! that the ISA can operate on. There's two dimensions of splitting, represented by two
+//! complementary instruction pairs:
+//!
+//! - `isplit` and `iconcat` for splitting integer types into smaller integers.
+//! - `vsplit` and `vconcat` for splitting vector types into smaller vector types with the same
+//!   lane types.
+//!
+//! There is no floating point splitting. If an ISA doesn't support `f64` values, they probably
+//! have to be bit-cast to `i64` and possibly split into two `i32` values that fit in registers.
+//! This breakdown is handled by the ABI lowering.
+//!
+//! When legalizing a single instruction, it is wrapped in splits and concatenations:
+//!
+//!```clif
+//!     v1 = bxor.i64 v2, v3
+//! ```
+//!
+//! becomes:
+//!
+//!```clif
+//!     v20, v21 = isplit v2
+//!     v30, v31 = isplit v3
+//!     v10 = bxor.i32 v20, v30
+//!     v11 = bxor.i32 v21, v31
+//!     v1 = iconcat v10, v11
+//! ```
+//!
+//! This local expansion approach still leaves the original `i64` values in the code as operands on
+//! the `split` and `concat` instructions. It also creates a lot of redundant code to clean up as
+//! values are constantly split and concatenated.
+//!
+//! # Optimized splitting
+//!
+//! We can eliminate a lot of the splitting code quite easily. Whenever we need to split a value,
+//! first check if the value is defined by the corresponding concatenation. If so, then just use
+//! the two concatenation inputs directly:
+//!
+//! ```clif
+//!     v4 = iadd_imm.i64 v1, 1
+//! ```
+//!
+//! becomes, using the expanded code from above:
+//!
+//! ```clif
+//!     v40, v5 = iadd_imm_cout.i32 v10, 1
+//!     v6 = bint.i32
+//!     v41 = iadd.i32 v11, v6
+//!     v4 = iconcat v40, v41
+//! ```
+//!
+//! This means that the `iconcat` instructions defining `v1` and `v4` end up with no uses, so they
+//! can be trivially deleted by a dead code elimination pass.
+//!
+//! # EBB arguments
+//!
+//! If all instructions that produce an `i64` value are legalized as above, we will eventually end
+//! up with no `i64` values anywhere, except for EBB arguments. We can work around this by
+//! iteratively splitting EBB arguments too. That should leave us with no illegal value types
+//! anywhere.
+//!
+//! It is possible to have circular dependencies of EBB arguments that are never used by any real
+//! instructions. These loops will remain in the program.
+
+use crate::cursor::{Cursor, CursorPosition, FuncCursor};
+use crate::flowgraph::{BasicBlock, ControlFlowGraph};
+use crate::ir::{self, Ebb, Inst, InstBuilder, InstructionData, Opcode, Type, Value, ValueDef};
+use core::iter;
+use std::vec::Vec;
+
+/// Split `value` into two values using the `isplit` semantics. Do this by reusing existing values
+/// if possible.
+pub fn isplit(
+    func: &mut ir::Function,
+    cfg: &ControlFlowGraph,
+    pos: CursorPosition,
+    srcloc: ir::SourceLoc,
+    value: Value,
+) -> (Value, Value) {
+    split_any(func, cfg, pos, srcloc, value, Opcode::Iconcat)
+}
+
+/// Split `value` into halves using the `vsplit` semantics. Do this by reusing existing values if
+/// possible.
+pub fn vsplit(
+    func: &mut ir::Function,
+    cfg: &ControlFlowGraph,
+    pos: CursorPosition,
+    srcloc: ir::SourceLoc,
+    value: Value,
+) -> (Value, Value) {
+    split_any(func, cfg, pos, srcloc, value, Opcode::Vconcat)
+}
+
+/// After splitting an EBB argument, we need to go back and fix up all of the predecessor
+/// instructions. This is potentially a recursive operation, but we don't implement it recursively
+/// since that could use up too muck stack.
+///
+/// Instead, the repairs are deferred and placed on a work list in stack form.
+struct Repair {
+    concat: Opcode,
+    // The argument type after splitting.
+    split_type: Type,
+    // The destination EBB whose arguments have been split.
+    ebb: Ebb,
+    // Number of the original EBB argument which has been replaced by the low part.
+    num: usize,
+    // Number of the new EBB argument which represents the high part after the split.
+    hi_num: usize,
+}
+
+/// Generic version of `isplit` and `vsplit` controlled by the `concat` opcode.
+fn split_any(
+    func: &mut ir::Function,
+    cfg: &ControlFlowGraph,
+    pos: CursorPosition,
+    srcloc: ir::SourceLoc,
+    value: Value,
+    concat: Opcode,
+) -> (Value, Value) {
+    let mut repairs = Vec::new();
+    let pos = &mut FuncCursor::new(func).at_position(pos).with_srcloc(srcloc);
+    let result = split_value(pos, value, concat, &mut repairs);
+
+    // We have split the value requested, and now we may need to fix some EBB predecessors.
+    while let Some(repair) = repairs.pop() {
+        for BasicBlock { inst, .. } in cfg.pred_iter(repair.ebb) {
+            let branch_opc = pos.func.dfg[inst].opcode();
+            debug_assert!(
+                branch_opc.is_branch(),
+                "Predecessor not a branch: {}",
+                pos.func.dfg.display_inst(inst, None)
+            );
+            let num_fixed_args = branch_opc.constraints().num_fixed_value_arguments();
+            let mut args = pos.func.dfg[inst]
+                .take_value_list()
+                .expect("Branches must have value lists.");
+            let num_args = args.len(&pos.func.dfg.value_lists);
+            // Get the old value passed to the EBB argument we're repairing.
+            let old_arg = args
+                .get(num_fixed_args + repair.num, &pos.func.dfg.value_lists)
+                .expect("Too few branch arguments");
+
+            // It's possible that the CFG's predecessor list has duplicates. Detect them here.
+            if pos.func.dfg.value_type(old_arg) == repair.split_type {
+                pos.func.dfg[inst].put_value_list(args);
+                continue;
+            }
+
+            // Split the old argument, possibly causing more repairs to be scheduled.
+            pos.goto_inst(inst);
+            let (lo, hi) = split_value(pos, old_arg, repair.concat, &mut repairs);
+
+            // The `lo` part replaces the original argument.
+            *args
+                .get_mut(num_fixed_args + repair.num, &mut pos.func.dfg.value_lists)
+                .unwrap() = lo;
+
+            // The `hi` part goes at the end. Since multiple repairs may have been scheduled to the
+            // same EBB, there could be multiple arguments missing.
+            if num_args > num_fixed_args + repair.hi_num {
+                *args
+                    .get_mut(
+                        num_fixed_args + repair.hi_num,
+                        &mut pos.func.dfg.value_lists,
+                    )
+                    .unwrap() = hi;
+            } else {
+                // We need to append one or more arguments. If we're adding more than one argument,
+                // there must be pending repairs on the stack that will fill in the correct values
+                // instead of `hi`.
+                args.extend(
+                    iter::repeat(hi).take(1 + num_fixed_args + repair.hi_num - num_args),
+                    &mut pos.func.dfg.value_lists,
+                );
+            }
+
+            // Put the value list back after manipulating it.
+            pos.func.dfg[inst].put_value_list(args);
+        }
+    }
+
+    result
+}
+
+/// Split a single value using the integer or vector semantics given by the `concat` opcode.
+///
+/// If the value is defined by a `concat` instruction, just reuse the operand values of that
+/// instruction.
+///
+/// Return the two new values representing the parts of `value`.
+fn split_value(
+    pos: &mut FuncCursor,
+    value: Value,
+    concat: Opcode,
+    repairs: &mut Vec<Repair>,
+) -> (Value, Value) {
+    let value = pos.func.dfg.resolve_aliases(value);
+    let mut reuse = None;
+
+    match pos.func.dfg.value_def(value) {
+        ValueDef::Result(inst, num) => {
+            // This is an instruction result. See if the value was created by a `concat`
+            // instruction.
+            if let InstructionData::Binary { opcode, args, .. } = pos.func.dfg[inst] {
+                debug_assert_eq!(num, 0);
+                if opcode == concat {
+                    reuse = Some((args[0], args[1]));
+                }
+            }
+        }
+        ValueDef::Param(ebb, num) => {
+            // This is an EBB parameter. We can split the parameter value unless this is the entry
+            // block.
+            if pos.func.layout.entry_block() != Some(ebb) {
+                // We are going to replace the parameter at `num` with two new arguments.
+                // Determine the new value types.
+                let ty = pos.func.dfg.value_type(value);
+                let split_type = match concat {
+                    Opcode::Iconcat => ty.half_width().expect("Invalid type for isplit"),
+                    Opcode::Vconcat => ty.half_vector().expect("Invalid type for vsplit"),
+                    _ => panic!("Unhandled concat opcode: {}", concat),
+                };
+
+                // Since the `repairs` stack potentially contains other parameter numbers for
+                // `ebb`, avoid shifting and renumbering EBB parameters. It could invalidate other
+                // `repairs` entries.
+                //
+                // Replace the original `value` with the low part, and append the high part at the
+                // end of the argument list.
+                let lo = pos.func.dfg.replace_ebb_param(value, split_type);
+                let hi_num = pos.func.dfg.num_ebb_params(ebb);
+                let hi = pos.func.dfg.append_ebb_param(ebb, split_type);
+                reuse = Some((lo, hi));
+
+                // Now the original value is dangling. Insert a concatenation instruction that can
+                // compute it from the two new parameters. This also serves as a record of what we
+                // did so a future call to this function doesn't have to redo the work.
+                //
+                // Note that it is safe to move `pos` here since `reuse` was set above, so we don't
+                // need to insert a split instruction before returning.
+                pos.goto_first_inst(ebb);
+                pos.ins()
+                    .with_result(value)
+                    .Binary(concat, split_type, lo, hi);
+
+                // Finally, splitting the EBB parameter is not enough. We also have to repair all
+                // of the predecessor instructions that branch here.
+                add_repair(concat, split_type, ebb, num, hi_num, repairs);
+            }
+        }
+    }
+
+    // Did the code above succeed in finding values we can reuse?
+    if let Some(pair) = reuse {
+        pair
+    } else {
+        // No, we'll just have to insert the requested split instruction at `pos`. Note that `pos`
+        // has not been moved by the EBB argument code above when `reuse` is `None`.
+        match concat {
+            Opcode::Iconcat => pos.ins().isplit(value),
+            Opcode::Vconcat => pos.ins().vsplit(value),
+            _ => panic!("Unhandled concat opcode: {}", concat),
+        }
+    }
+}
+
+// Add a repair entry to the work list.
+fn add_repair(
+    concat: Opcode,
+    split_type: Type,
+    ebb: Ebb,
+    num: usize,
+    hi_num: usize,
+    repairs: &mut Vec<Repair>,
+) {
+    repairs.push(Repair {
+        concat,
+        split_type,
+        ebb,
+        num,
+        hi_num,
+    });
+}
+
+/// Strip concat-split chains. Return a simpler way of computing the same value.
+///
+/// Given this input:
+///
+/// ```clif
+///     v10 = iconcat v1, v2
+///     v11, v12 = isplit v10
+/// ```
+///
+/// This function resolves `v11` to `v1` and `v12` to `v2`.
+fn resolve_splits(dfg: &ir::DataFlowGraph, value: Value) -> Value {
+    let value = dfg.resolve_aliases(value);
+
+    // Deconstruct a split instruction.
+    let split_res;
+    let concat_opc;
+    let split_arg;
+    if let ValueDef::Result(inst, num) = dfg.value_def(value) {
+        split_res = num;
+        concat_opc = match dfg[inst].opcode() {
+            Opcode::Isplit => Opcode::Iconcat,
+            Opcode::Vsplit => Opcode::Vconcat,
+            _ => return value,
+        };
+        split_arg = dfg.inst_args(inst)[0];
+    } else {
+        return value;
+    }
+
+    // See if split_arg is defined by a concatenation instruction.
+    if let ValueDef::Result(inst, _) = dfg.value_def(split_arg) {
+        if dfg[inst].opcode() == concat_opc {
+            return dfg.inst_args(inst)[split_res];
+        }
+    }
+
+    value
+}
+
+/// Simplify the arguments to a branch *after* the instructions leading up to the branch have been
+/// legalized.
+///
+/// The branch argument repairs performed by `split_any()` above may be performed on branches that
+/// have not yet been legalized. The repaired arguments can be defined by actual split
+/// instructions in that case.
+///
+/// After legalizing the instructions computing the value that was split, it is likely that we can
+/// avoid depending on the split instruction. Its input probably comes from a concatenation.
+pub fn simplify_branch_arguments(dfg: &mut ir::DataFlowGraph, branch: Inst) {
+    let mut new_args = Vec::new();
+
+    for &arg in dfg.inst_args(branch) {
+        let new_arg = resolve_splits(dfg, arg);
+        new_args.push(new_arg);
+    }
+
+    dfg.inst_args_mut(branch).copy_from_slice(&new_args);
+}
--- a/cranelift/codegen/src/legalizer/table.rs
+++ b/cranelift/codegen/src/legalizer/table.rs
@@ -0,0 +1,113 @@
+//! Legalization of tables.
+//!
+//! This module exports the `expand_table_addr` function which transforms a `table_addr`
+//! instruction into code that depends on the kind of table referenced.
+
+use crate::cursor::{Cursor, FuncCursor};
+use crate::flowgraph::ControlFlowGraph;
+use crate::ir::condcodes::IntCC;
+use crate::ir::immediates::Offset32;
+use crate::ir::{self, InstBuilder};
+use crate::isa::TargetIsa;
+
+/// Expand a `table_addr` instruction according to the definition of the table.
+pub fn expand_table_addr(
+    inst: ir::Inst,
+    func: &mut ir::Function,
+    _cfg: &mut ControlFlowGraph,
+    _isa: &TargetIsa,
+) {
+    // Unpack the instruction.
+    let (table, index, element_offset) = match func.dfg[inst] {
+        ir::InstructionData::TableAddr {
+            opcode,
+            table,
+            arg,
+            offset,
+        } => {
+            debug_assert_eq!(opcode, ir::Opcode::TableAddr);
+            (table, arg, offset)
+        }
+        _ => panic!("Wanted table_addr: {}", func.dfg.display_inst(inst, None)),
+    };
+
+    dynamic_addr(inst, table, index, element_offset, func);
+}
+
+/// Expand a `table_addr` for a dynamic table.
+fn dynamic_addr(
+    inst: ir::Inst,
+    table: ir::Table,
+    index: ir::Value,
+    element_offset: Offset32,
+    func: &mut ir::Function,
+) {
+    let bound_gv = func.tables[table].bound_gv;
+    let index_ty = func.dfg.value_type(index);
+    let addr_ty = func.dfg.value_type(func.dfg.first_result(inst));
+    let mut pos = FuncCursor::new(func).at_inst(inst);
+    pos.use_srcloc(inst);
+
+    // Start with the bounds check. Trap if `index + 1 > bound`.
+    let bound = pos.ins().global_value(index_ty, bound_gv);
+
+    // `index > bound - 1` is the same as `index >= bound`.
+    let oob = pos
+        .ins()
+        .icmp(IntCC::UnsignedGreaterThanOrEqual, index, bound);
+    pos.ins().trapnz(oob, ir::TrapCode::TableOutOfBounds);
+
+    compute_addr(
+        inst,
+        table,
+        addr_ty,
+        index,
+        index_ty,
+        element_offset,
+        pos.func,
+    );
+}
+
+/// Emit code for the base address computation of a `table_addr` instruction.
+fn compute_addr(
+    inst: ir::Inst,
+    table: ir::Table,
+    addr_ty: ir::Type,
+    mut index: ir::Value,
+    index_ty: ir::Type,
+    element_offset: Offset32,
+    func: &mut ir::Function,
+) {
+    let mut pos = FuncCursor::new(func).at_inst(inst);
+    pos.use_srcloc(inst);
+
+    // Convert `index` to `addr_ty`.
+    if index_ty != addr_ty {
+        index = pos.ins().uextend(addr_ty, index);
+    }
+
+    // Add the table base address base
+    let base_gv = pos.func.tables[table].base_gv;
+    let base = pos.ins().global_value(addr_ty, base_gv);
+
+    let element_size = pos.func.tables[table].element_size;
+    let mut offset;
+    let element_size: u64 = element_size.into();
+    if element_size == 1 {
+        offset = index;
+    } else if element_size.is_power_of_two() {
+        offset = pos
+            .ins()
+            .ishl_imm(index, i64::from(element_size.trailing_zeros()));
+    } else {
+        offset = pos.ins().imul_imm(index, element_size as i64);
+    }
+
+    if element_offset == Offset32::new(0) {
+        pos.func.dfg.replace(inst).iadd(base, offset);
+    } else {
+        let imm: i64 = element_offset.into();
+        offset = pos.ins().iadd(base, offset);
+        pos.func.dfg.replace(inst).iadd_imm(offset, imm);
+    }
+}
--- a/cranelift/codegen/src/lib.rs
+++ b/cranelift/codegen/src/lib.rs
@@ -0,0 +1,110 @@
+//! Cranelift code generation library.
+
+#![deny(missing_docs, trivial_numeric_casts, unused_extern_crates)]
+#![warn(unused_import_braces)]
+#![cfg_attr(feature = "std", deny(unstable_features))]
+#![cfg_attr(feature = "clippy", plugin(clippy(conf_file = "../../clippy.toml")))]
+#![cfg_attr(feature="cargo-clippy", allow(
+// Produces only a false positive:
+                clippy::while_let_loop,
+// Produces many false positives, but did produce some valid lints, now fixed:
+                clippy::needless_lifetimes,
+// Generated code makes some style transgressions, but readability doesn't suffer much:
+                clippy::many_single_char_names,
+                clippy::identity_op,
+                clippy::needless_borrow,
+                clippy::cast_lossless,
+                clippy::unreadable_literal,
+                clippy::assign_op_pattern,
+                clippy::empty_line_after_outer_attr,
+// Hard to avoid in generated code:
+                clippy::cyclomatic_complexity,
+                clippy::too_many_arguments,
+// Code generator doesn't have a way to collapse identical arms:
+                clippy::match_same_arms,
+// These are relatively minor style issues, but would be easy to fix:
+                clippy::new_without_default,
+                clippy::new_without_default_derive,
+                clippy::should_implement_trait,
+                clippy::len_without_is_empty))]
+#![cfg_attr(
+    feature = "cargo-clippy",
+    warn(
+        clippy::float_arithmetic,
+        clippy::mut_mut,
+        clippy::nonminimal_bool,
+        clippy::option_map_unwrap_or,
+        clippy::option_map_unwrap_or_else,
+        clippy::print_stdout,
+        clippy::unicode_not_nfc,
+        clippy::use_self
+    )
+)]
+#![no_std]
+#![cfg_attr(not(feature = "std"), feature(alloc))]
+
+#[cfg(not(feature = "std"))]
+#[macro_use]
+extern crate alloc as std;
+#[cfg(feature = "std")]
+#[macro_use]
+extern crate std;
+
+#[cfg(not(feature = "std"))]
+use hashmap_core::{map as hash_map, HashMap, HashSet};
+#[cfg(feature = "std")]
+use std::collections::{hash_map, HashMap, HashSet};
+
+pub use crate::context::Context;
+pub use crate::legalizer::legalize_function;
+pub use crate::verifier::verify_function;
+pub use crate::write::write_function;
+
+pub use cranelift_bforest as bforest;
+pub use cranelift_entity as entity;
+
+pub mod binemit;
+pub mod cfg_printer;
+pub mod cursor;
+pub mod dbg;
+pub mod dominator_tree;
+pub mod flowgraph;
+pub mod ir;
+pub mod isa;
+pub mod loop_analysis;
+pub mod print_errors;
+pub mod settings;
+pub mod timing;
+pub mod verifier;
+pub mod write;
+
+pub use crate::entity::packed_option;
+
+mod abi;
+mod bitset;
+mod constant_hash;
+mod context;
+mod dce;
+mod divconst_magic_numbers;
+mod fx;
+mod iterators;
+mod legalizer;
+mod licm;
+mod nan_canonicalization;
+mod partition_slice;
+mod postopt;
+mod predicates;
+mod ref_slice;
+mod regalloc;
+mod result;
+mod scoped_hash_map;
+mod simple_gvn;
+mod simple_preopt;
+mod stack_layout;
+mod topo_order;
+mod unreachable_code;
+
+pub use crate::result::{CodegenError, CodegenResult};
+
+/// Version number of this crate.
+pub const VERSION: &str = env!("CARGO_PKG_VERSION");
--- a/cranelift/codegen/src/licm.rs
+++ b/cranelift/codegen/src/licm.rs
@@ -0,0 +1,239 @@
+//! A Loop Invariant Code Motion optimization pass
+
+use crate::cursor::{Cursor, EncCursor, FuncCursor};
+use crate::dominator_tree::DominatorTree;
+use crate::entity::{EntityList, ListPool};
+use crate::flowgraph::{BasicBlock, ControlFlowGraph};
+use crate::fx::FxHashSet;
+use crate::ir::{DataFlowGraph, Ebb, Function, Inst, InstBuilder, Layout, Opcode, Type, Value};
+use crate::isa::TargetIsa;
+use crate::loop_analysis::{Loop, LoopAnalysis};
+use crate::timing;
+use std::vec::Vec;
+
+/// Performs the LICM pass by detecting loops within the CFG and moving
+/// loop-invariant instructions out of them.
+/// Changes the CFG and domtree in-place during the operation.
+pub fn do_licm(
+    isa: &TargetIsa,
+    func: &mut Function,
+    cfg: &mut ControlFlowGraph,
+    domtree: &mut DominatorTree,
+    loop_analysis: &mut LoopAnalysis,
+) {
+    let _tt = timing::licm();
+    debug_assert!(cfg.is_valid());
+    debug_assert!(domtree.is_valid());
+    debug_assert!(loop_analysis.is_valid());
+
+    for lp in loop_analysis.loops() {
+        // For each loop that we want to optimize we determine the set of loop-invariant
+        // instructions
+        let invariant_insts = remove_loop_invariant_instructions(lp, func, cfg, loop_analysis);
+        // Then we create the loop's pre-header and fill it with the invariant instructions
+        // Then we remove the invariant instructions from the loop body
+        if !invariant_insts.is_empty() {
+            // If the loop has a natural pre-header we use it, otherwise we create it.
+            let mut pos;
+            match has_pre_header(&func.layout, cfg, domtree, loop_analysis.loop_header(lp)) {
+                None => {
+                    let pre_header =
+                        create_pre_header(isa, loop_analysis.loop_header(lp), func, cfg, domtree);
+                    pos = FuncCursor::new(func).at_last_inst(pre_header);
+                }
+                // If there is a natural pre-header we insert new instructions just before the
+                // related jumping instruction (which is not necessarily at the end).
+                Some((_, last_inst)) => {
+                    pos = FuncCursor::new(func).at_inst(last_inst);
+                }
+            };
+            // The last instruction of the pre-header is the termination instruction (usually
+            // a jump) so we need to insert just before this.
+            for inst in invariant_insts {
+                pos.insert_inst(inst);
+            }
+        }
+    }
+    // We have to recompute the domtree to account for the changes
+    cfg.compute(func);
+    domtree.compute(func, cfg);
+}
+
+// Insert a pre-header before the header, modifying the function layout and CFG to reflect it.
+// A jump instruction to the header is placed at the end of the pre-header.
+fn create_pre_header(
+    isa: &TargetIsa,
+    header: Ebb,
+    func: &mut Function,
+    cfg: &mut ControlFlowGraph,
+    domtree: &DominatorTree,
+) -> Ebb {
+    let pool = &mut ListPool::<Value>::new();
+    let header_args_values: Vec<Value> = func.dfg.ebb_params(header).into_iter().cloned().collect();
+    let header_args_types: Vec<Type> = header_args_values
+        .clone()
+        .into_iter()
+        .map(|val| func.dfg.value_type(val))
+        .collect();
+    let pre_header = func.dfg.make_ebb();
+    let mut pre_header_args_value: EntityList<Value> = EntityList::new();
+    for typ in header_args_types {
+        pre_header_args_value.push(func.dfg.append_ebb_param(pre_header, typ), pool);
+    }
+    for BasicBlock {
+        inst: last_inst, ..
+    } in cfg.pred_iter(header)
+    {
+        // We only follow normal edges (not the back edges)
+        if !domtree.dominates(header, last_inst, &func.layout) {
+            change_branch_jump_destination(last_inst, pre_header, func);
+        }
+    }
+    {
+        let mut pos = EncCursor::new(func, isa).at_top(header);
+        // Inserts the pre-header at the right place in the layout.
+        pos.insert_ebb(pre_header);
+        pos.next_inst();
+        pos.ins().jump(header, pre_header_args_value.as_slice(pool));
+    }
+    pre_header
+}
+
+// Detects if a loop header has a natural pre-header.
+//
+// A loop header has a pre-header if there is only one predecessor that the header doesn't
+// dominate.
+// Returns the pre-header Ebb and the instruction jumping to the header.
+fn has_pre_header(
+    layout: &Layout,
+    cfg: &ControlFlowGraph,
+    domtree: &DominatorTree,
+    header: Ebb,
+) -> Option<(Ebb, Inst)> {
+    let mut result = None;
+    for BasicBlock {
+        ebb: pred_ebb,
+        inst: branch_inst,
+    } in cfg.pred_iter(header)
+    {
+        // We only count normal edges (not the back edges)
+        if !domtree.dominates(header, branch_inst, layout) {
+            if result.is_some() {
+                // We have already found one, there are more than one
+                return None;
+            }
+            if branch_inst != layout.last_inst(pred_ebb).unwrap()
+                || cfg.succ_iter(pred_ebb).nth(1).is_some()
+            {
+                // It's along a critical edge, so don't use it.
+                return None;
+            }
+            result = Some((pred_ebb, branch_inst));
+        }
+    }
+    result
+}
+
+// Change the destination of a jump or branch instruction. Does nothing if called with a non-jump
+// or non-branch instruction.
+fn change_branch_jump_destination(inst: Inst, new_ebb: Ebb, func: &mut Function) {
+    match func.dfg[inst].branch_destination_mut() {
+        None => (),
+        Some(instruction_dest) => *instruction_dest = new_ebb,
+    }
+}
+
+/// Test whether the given opcode is unsafe to even consider for LICM.
+fn trivially_unsafe_for_licm(opcode: Opcode) -> bool {
+    opcode.can_load()
+        || opcode.can_store()
+        || opcode.is_call()
+        || opcode.is_branch()
+        || opcode.is_terminator()
+        || opcode.is_return()
+        || opcode.can_trap()
+        || opcode.other_side_effects()
+        || opcode.writes_cpu_flags()
+}
+
+/// Test whether the given instruction is loop-invariant.
+fn is_loop_invariant(inst: Inst, dfg: &DataFlowGraph, loop_values: &FxHashSet<Value>) -> bool {
+    if trivially_unsafe_for_licm(dfg[inst].opcode()) {
+        return false;
+    }
+
+    let inst_args = dfg.inst_args(inst);
+    for arg in inst_args {
+        let arg = dfg.resolve_aliases(*arg);
+        if loop_values.contains(&arg) {
+            return false;
+        }
+    }
+    true
+}
+
+// Traverses a loop in reverse post-order from a header EBB and identify loop-invariant
+// instructions. These loop-invariant instructions are then removed from the code and returned
+// (in reverse post-order) for later use.
+fn remove_loop_invariant_instructions(
+    lp: Loop,
+    func: &mut Function,
+    cfg: &ControlFlowGraph,
+    loop_analysis: &LoopAnalysis,
+) -> Vec<Inst> {
+    let mut loop_values: FxHashSet<Value> = FxHashSet();
+    let mut invariant_insts: Vec<Inst> = Vec::new();
+    let mut pos = FuncCursor::new(func);
+    // We traverse the loop EBB in reverse post-order.
+    for ebb in postorder_ebbs_loop(loop_analysis, cfg, lp).iter().rev() {
+        // Arguments of the EBB are loop values
+        for val in pos.func.dfg.ebb_params(*ebb) {
+            loop_values.insert(*val);
+        }
+        pos.goto_top(*ebb);
+        #[cfg_attr(feature = "cargo-clippy", allow(clippy::block_in_if_condition_stmt))]
+        while let Some(inst) = pos.next_inst() {
+            if is_loop_invariant(inst, &pos.func.dfg, &loop_values) {
+                // If all the instruction's argument are defined outside the loop
+                // then this instruction is loop-invariant
+                invariant_insts.push(inst);
+                // We remove it from the loop
+                pos.remove_inst_and_step_back();
+            } else {
+                // If the instruction is not loop-invariant we push its results in the set of
+                // loop values
+                for out in pos.func.dfg.inst_results(inst) {
+                    loop_values.insert(*out);
+                }
+            }
+        }
+    }
+    invariant_insts
+}
+
+/// Return ebbs from a loop in post-order, starting from an entry point in the block.
+fn postorder_ebbs_loop(loop_analysis: &LoopAnalysis, cfg: &ControlFlowGraph, lp: Loop) -> Vec<Ebb> {
+    let mut grey = FxHashSet();
+    let mut black = FxHashSet();
+    let mut stack = vec![loop_analysis.loop_header(lp)];
+    let mut postorder = Vec::new();
+
+    while !stack.is_empty() {
+        let node = stack.pop().unwrap();
+        if !grey.contains(&node) {
+            // This is a white node. Mark it as gray.
+            grey.insert(node);
+            stack.push(node);
+            // Get any children we've never seen before.
+            for child in cfg.succ_iter(node) {
+                if loop_analysis.is_in_loop(child, lp) && !grey.contains(&child) {
+                    stack.push(child);
+                }
+            }
+        } else if !black.contains(&node) {
+            postorder.push(node);
+            black.insert(node);
+        }
+    }
+    postorder
+}
--- a/cranelift/codegen/src/loop_analysis.rs
+++ b/cranelift/codegen/src/loop_analysis.rs
@@ -0,0 +1,349 @@
+//! A loop analysis represented as mappings of loops to their header Ebb
+//! and parent in the loop tree.
+
+use crate::dominator_tree::DominatorTree;
+use crate::entity::entity_impl;
+use crate::entity::SecondaryMap;
+use crate::entity::{Keys, PrimaryMap};
+use crate::flowgraph::{BasicBlock, ControlFlowGraph};
+use crate::ir::{Ebb, Function, Layout};
+use crate::packed_option::PackedOption;
+use crate::timing;
+use std::vec::Vec;
+
+/// A opaque reference to a code loop.
+#[derive(Copy, Clone, PartialEq, Eq, Hash)]
+pub struct Loop(u32);
+entity_impl!(Loop, "loop");
+
+/// Loop tree information for a single function.
+///
+/// Loops are referenced by the Loop object, and for each loop you can access its header EBB,
+/// its eventual parent in the loop tree and all the EBB belonging to the loop.
+pub struct LoopAnalysis {
+    loops: PrimaryMap<Loop, LoopData>,
+    ebb_loop_map: SecondaryMap<Ebb, PackedOption<Loop>>,
+    valid: bool,
+}
+
+struct LoopData {
+    header: Ebb,
+    parent: PackedOption<Loop>,
+}
+
+impl LoopData {
+    /// Creates a `LoopData` object with the loop header and its eventual parent in the loop tree.
+    pub fn new(header: Ebb, parent: Option<Loop>) -> Self {
+        Self {
+            header,
+            parent: parent.into(),
+        }
+    }
+}
+
+/// Methods for querying the loop analysis.
+impl LoopAnalysis {
+    /// Allocate a new blank loop analysis struct. Use `compute` to compute the loop analysis for
+    /// a function.
+    pub fn new() -> Self {
+        Self {
+            valid: false,
+            loops: PrimaryMap::new(),
+            ebb_loop_map: SecondaryMap::new(),
+        }
+    }
+
+    /// Returns all the loops contained in a function.
+    pub fn loops(&self) -> Keys<Loop> {
+        self.loops.keys()
+    }
+
+    /// Returns the header EBB of a particular loop.
+    ///
+    /// The characteristic property of a loop header block is that it dominates some of its
+    /// predecessors.
+    pub fn loop_header(&self, lp: Loop) -> Ebb {
+        self.loops[lp].header
+    }
+
+    /// Return the eventual parent of a loop in the loop tree.
+    pub fn loop_parent(&self, lp: Loop) -> Option<Loop> {
+        self.loops[lp].parent.expand()
+    }
+
+    /// Determine if an Ebb belongs to a loop by running a finger along the loop tree.
+    ///
+    /// Returns `true` if `ebb` is in loop `lp`.
+    pub fn is_in_loop(&self, ebb: Ebb, lp: Loop) -> bool {
+        let ebb_loop = self.ebb_loop_map[ebb];
+        match ebb_loop.expand() {
+            None => false,
+            Some(ebb_loop) => self.is_child_loop(ebb_loop, lp),
+        }
+    }
+
+    /// Determines if a loop is contained in another loop.
+    ///
+    /// `is_child_loop(child,parent)` returns `true` if and only if `child` is a child loop of
+    /// `parent` (or `child == parent`).
+    pub fn is_child_loop(&self, child: Loop, parent: Loop) -> bool {
+        let mut finger = Some(child);
+        while let Some(finger_loop) = finger {
+            if finger_loop == parent {
+                return true;
+            }
+            finger = self.loop_parent(finger_loop);
+        }
+        false
+    }
+}
+
+impl LoopAnalysis {
+    /// Detects the loops in a function. Needs the control flow graph and the dominator tree.
+    pub fn compute(&mut self, func: &Function, cfg: &ControlFlowGraph, domtree: &DominatorTree) {
+        let _tt = timing::loop_analysis();
+        self.loops.clear();
+        self.ebb_loop_map.clear();
+        self.ebb_loop_map.resize(func.dfg.num_ebbs());
+        self.find_loop_headers(cfg, domtree, &func.layout);
+        self.discover_loop_blocks(cfg, domtree, &func.layout);
+        self.valid = true;
+    }
+
+    /// Check if the loop analysis is in a valid state.
+    ///
+    /// Note that this doesn't perform any kind of validity checks. It simply checks if the
+    /// `compute()` method has been called since the last `clear()`. It does not check that the
+    /// loop analysis is consistent with the CFG.
+    pub fn is_valid(&self) -> bool {
+        self.valid
+    }
+
+    /// Clear all the data structures contained in the loop analysis. This will leave the
+    /// analysis in a similar state to a context returned by `new()` except that allocated
+    /// memory be retained.
+    pub fn clear(&mut self) {
+        self.loops.clear();
+        self.ebb_loop_map.clear();
+        self.valid = false;
+    }
+
+    // Traverses the CFG in reverse postorder and create a loop object for every EBB having a
+    // back edge.
+    fn find_loop_headers(
+        &mut self,
+        cfg: &ControlFlowGraph,
+        domtree: &DominatorTree,
+        layout: &Layout,
+    ) {
+        // We traverse the CFG in reverse postorder
+        for &ebb in domtree.cfg_postorder().iter().rev() {
+            for BasicBlock {
+                inst: pred_inst, ..
+            } in cfg.pred_iter(ebb)
+            {
+                // If the ebb dominates one of its predecessors it is a back edge
+                if domtree.dominates(ebb, pred_inst, layout) {
+                    // This ebb is a loop header, so we create its associated loop
+                    let lp = self.loops.push(LoopData::new(ebb, None));
+                    self.ebb_loop_map[ebb] = lp.into();
+                    break;
+                    // We break because we only need one back edge to identify a loop header.
+                }
+            }
+        }
+    }
+
+    // Intended to be called after `find_loop_headers`. For each detected loop header,
+    // discovers all the ebb belonging to the loop and its inner loops. After a call to this
+    // function, the loop tree is fully constructed.
+    fn discover_loop_blocks(
+        &mut self,
+        cfg: &ControlFlowGraph,
+        domtree: &DominatorTree,
+        layout: &Layout,
+    ) {
+        let mut stack: Vec<Ebb> = Vec::new();
+        // We handle each loop header in reverse order, corresponding to a pseudo postorder
+        // traversal of the graph.
+        for lp in self.loops().rev() {
+            for BasicBlock {
+                ebb: pred,
+                inst: pred_inst,
+            } in cfg.pred_iter(self.loops[lp].header)
+            {
+                // We follow the back edges
+                if domtree.dominates(self.loops[lp].header, pred_inst, layout) {
+                    stack.push(pred);
+                }
+            }
+            while let Some(node) = stack.pop() {
+                let continue_dfs: Option<Ebb>;
+                match self.ebb_loop_map[node].expand() {
+                    None => {
+                        // The node hasn't been visited yet, we tag it as part of the loop
+                        self.ebb_loop_map[node] = PackedOption::from(lp);
+                        continue_dfs = Some(node);
+                    }
+                    Some(node_loop) => {
+                        // We copy the node_loop into a mutable reference passed along the while
+                        let mut node_loop = node_loop;
+                        // The node is part of a loop, which can be lp or an inner loop
+                        let mut node_loop_parent_option = self.loops[node_loop].parent;
+                        while let Some(node_loop_parent) = node_loop_parent_option.expand() {
+                            if node_loop_parent == lp {
+                                // We have encountered lp so we stop (already visited)
+                                break;
+                            } else {
+                                //
+                                node_loop = node_loop_parent;
+                                // We lookup the parent loop
+                                node_loop_parent_option = self.loops[node_loop].parent;
+                            }
+                        }
+                        // Now node_loop_parent is either:
+                        // - None and node_loop is an new inner loop of lp
+                        // - Some(...) and the initial node_loop was a known inner loop of lp
+                        match node_loop_parent_option.expand() {
+                            Some(_) => continue_dfs = None,
+                            None => {
+                                if node_loop != lp {
+                                    self.loops[node_loop].parent = lp.into();
+                                    continue_dfs = Some(self.loops[node_loop].header)
+                                } else {
+                                    // If lp is a one-block loop then we make sure we stop
+                                    continue_dfs = None
+                                }
+                            }
+                        }
+                    }
+                }
+                // Now we have handled the popped node and need to continue the DFS by adding the
+                // predecessors of that node
+                if let Some(continue_dfs) = continue_dfs {
+                    for BasicBlock { ebb: pred, .. } in cfg.pred_iter(continue_dfs) {
+                        stack.push(pred)
+                    }
+                }
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::cursor::{Cursor, FuncCursor};
+    use crate::dominator_tree::DominatorTree;
+    use crate::flowgraph::ControlFlowGraph;
+    use crate::ir::{types, Function, InstBuilder};
+    use crate::loop_analysis::{Loop, LoopAnalysis};
+    use std::vec::Vec;
+
+    #[test]
+    fn nested_loops_detection() {
+        let mut func = Function::new();
+        let ebb0 = func.dfg.make_ebb();
+        let ebb1 = func.dfg.make_ebb();
+        let ebb2 = func.dfg.make_ebb();
+        let ebb3 = func.dfg.make_ebb();
+        let cond = func.dfg.append_ebb_param(ebb0, types::I32);
+
+        {
+            let mut cur = FuncCursor::new(&mut func);
+
+            cur.insert_ebb(ebb0);
+            cur.ins().jump(ebb1, &[]);
+
+            cur.insert_ebb(ebb1);
+            cur.ins().jump(ebb2, &[]);
+
+            cur.insert_ebb(ebb2);
+            cur.ins().brnz(cond, ebb1, &[]);
+            cur.ins().jump(ebb3, &[]);
+
+            cur.insert_ebb(ebb3);
+            cur.ins().brnz(cond, ebb0, &[]);
+        }
+
+        let mut loop_analysis = LoopAnalysis::new();
+        let mut cfg = ControlFlowGraph::new();
+        let mut domtree = DominatorTree::new();
+        cfg.compute(&func);
+        domtree.compute(&func, &cfg);
+        loop_analysis.compute(&func, &cfg, &domtree);
+
+        let loops = loop_analysis.loops().collect::<Vec<Loop>>();
+        assert_eq!(loops.len(), 2);
+        assert_eq!(loop_analysis.loop_header(loops[0]), ebb0);
+        assert_eq!(loop_analysis.loop_header(loops[1]), ebb1);
+        assert_eq!(loop_analysis.loop_parent(loops[1]), Some(loops[0]));
+        assert_eq!(loop_analysis.loop_parent(loops[0]), None);
+        assert_eq!(loop_analysis.is_in_loop(ebb0, loops[0]), true);
+        assert_eq!(loop_analysis.is_in_loop(ebb0, loops[1]), false);
+        assert_eq!(loop_analysis.is_in_loop(ebb1, loops[1]), true);
+        assert_eq!(loop_analysis.is_in_loop(ebb1, loops[0]), true);
+        assert_eq!(loop_analysis.is_in_loop(ebb2, loops[1]), true);
+        assert_eq!(loop_analysis.is_in_loop(ebb2, loops[0]), true);
+        assert_eq!(loop_analysis.is_in_loop(ebb3, loops[0]), true);
+        assert_eq!(loop_analysis.is_in_loop(ebb0, loops[1]), false);
+    }
+
+    #[test]
+    fn complex_loop_detection() {
+        let mut func = Function::new();
+        let ebb0 = func.dfg.make_ebb();
+        let ebb1 = func.dfg.make_ebb();
+        let ebb2 = func.dfg.make_ebb();
+        let ebb3 = func.dfg.make_ebb();
+        let ebb4 = func.dfg.make_ebb();
+        let ebb5 = func.dfg.make_ebb();
+        let cond = func.dfg.append_ebb_param(ebb0, types::I32);
+
+        {
+            let mut cur = FuncCursor::new(&mut func);
+
+            cur.insert_ebb(ebb0);
+            cur.ins().brnz(cond, ebb1, &[]);
+            cur.ins().jump(ebb3, &[]);
+
+            cur.insert_ebb(ebb1);
+            cur.ins().jump(ebb2, &[]);
+
+            cur.insert_ebb(ebb2);
+            cur.ins().brnz(cond, ebb1, &[]);
+            cur.ins().jump(ebb5, &[]);
+
+            cur.insert_ebb(ebb3);
+            cur.ins().jump(ebb4, &[]);
+
+            cur.insert_ebb(ebb4);
+            cur.ins().brnz(cond, ebb3, &[]);
+            cur.ins().jump(ebb5, &[]);
+
+            cur.insert_ebb(ebb5);
+            cur.ins().brnz(cond, ebb0, &[]);
+        }
+
+        let mut loop_analysis = LoopAnalysis::new();
+        let mut cfg = ControlFlowGraph::new();
+        let mut domtree = DominatorTree::new();
+        cfg.compute(&func);
+        domtree.compute(&func, &cfg);
+        loop_analysis.compute(&func, &cfg, &domtree);
+
+        let loops = loop_analysis.loops().collect::<Vec<Loop>>();
+        assert_eq!(loops.len(), 3);
+        assert_eq!(loop_analysis.loop_header(loops[0]), ebb0);
+        assert_eq!(loop_analysis.loop_header(loops[1]), ebb1);
+        assert_eq!(loop_analysis.loop_header(loops[2]), ebb3);
+        assert_eq!(loop_analysis.loop_parent(loops[1]), Some(loops[0]));
+        assert_eq!(loop_analysis.loop_parent(loops[2]), Some(loops[0]));
+        assert_eq!(loop_analysis.loop_parent(loops[0]), None);
+        assert_eq!(loop_analysis.is_in_loop(ebb0, loops[0]), true);
+        assert_eq!(loop_analysis.is_in_loop(ebb1, loops[1]), true);
+        assert_eq!(loop_analysis.is_in_loop(ebb2, loops[1]), true);
+        assert_eq!(loop_analysis.is_in_loop(ebb3, loops[2]), true);
+        assert_eq!(loop_analysis.is_in_loop(ebb4, loops[2]), true);
+        assert_eq!(loop_analysis.is_in_loop(ebb5, loops[0]), true);
+    }
+}
--- a/cranelift/codegen/src/nan_canonicalization.rs
+++ b/cranelift/codegen/src/nan_canonicalization.rs
@@ -0,0 +1,85 @@
+//! A NaN-canonicalizing rewriting pass. Patch floating point arithmetic
+//! instructions that may return a NaN result with a sequence of operations
+//! that will replace nondeterministic NaN's with a single canonical NaN value.
+
+use crate::cursor::{Cursor, FuncCursor};
+use crate::ir::condcodes::FloatCC;
+use crate::ir::immediates::{Ieee32, Ieee64};
+use crate::ir::types;
+use crate::ir::types::Type;
+use crate::ir::{Function, Inst, InstBuilder, InstructionData, Opcode, Value};
+use crate::timing;
+
+// Canonical 32-bit and 64-bit NaN values.
+static CANON_32BIT_NAN: u32 = 0b01111111110000000000000000000000;
+static CANON_64BIT_NAN: u64 = 0b0111111111111000000000000000000000000000000000000000000000000000;
+
+/// Perform the NaN canonicalization pass.
+pub fn do_nan_canonicalization(func: &mut Function) {
+    let _tt = timing::canonicalize_nans();
+    let mut pos = FuncCursor::new(func);
+    while let Some(_ebb) = pos.next_ebb() {
+        while let Some(inst) = pos.next_inst() {
+            if is_fp_arith(&mut pos, inst) {
+                add_nan_canon_seq(&mut pos, inst);
+            }
+        }
+    }
+}
+
+/// Returns true/false based on whether the instruction is a floating-point
+/// arithmetic operation. This ignores operations like `fneg`, `fabs`, or
+/// `fcopysign` that only operate on the sign bit of a floating point value.
+fn is_fp_arith(pos: &mut FuncCursor, inst: Inst) -> bool {
+    match pos.func.dfg[inst] {
+        InstructionData::Unary { opcode, .. } => {
+            opcode == Opcode::Ceil
+                || opcode == Opcode::Floor
+                || opcode == Opcode::Nearest
+                || opcode == Opcode::Sqrt
+                || opcode == Opcode::Trunc
+        }
+        InstructionData::Binary { opcode, .. } => {
+            opcode == Opcode::Fadd
+                || opcode == Opcode::Fdiv
+                || opcode == Opcode::Fmax
+                || opcode == Opcode::Fmin
+                || opcode == Opcode::Fmul
+                || opcode == Opcode::Fsub
+        }
+        InstructionData::Ternary { opcode, .. } => opcode == Opcode::Fma,
+        _ => false,
+    }
+}
+
+/// Append a sequence of canonicalizing instructions after the given instruction.
+fn add_nan_canon_seq(pos: &mut FuncCursor, inst: Inst) {
+    // Select the instruction result, result type. Replace the instruction
+    // result and step forward before inserting the canonicalization sequence.
+    let val = pos.func.dfg.first_result(inst);
+    let val_type = pos.func.dfg.value_type(val);
+    let new_res = pos.func.dfg.replace_result(val, val_type);
+    let _next_inst = pos.next_inst().expect("EBB missing terminator!");
+
+    // Insert a comparison instruction, to check if `inst_res` is NaN. Select
+    // the canonical NaN value if `val` is NaN, assign the result to `inst`.
+    let is_nan = pos.ins().fcmp(FloatCC::NotEqual, new_res, new_res);
+    let canon_nan = insert_nan_const(pos, val_type);
+    pos.ins()
+        .with_result(val)
+        .select(is_nan, canon_nan, new_res);
+
+    pos.prev_inst(); // Step backwards so the pass does not skip instructions.
+}
+
+/// Insert a canonical 32-bit or 64-bit NaN constant at the current position.
+fn insert_nan_const(pos: &mut FuncCursor, nan_type: Type) -> Value {
+    match nan_type {
+        types::F32 => pos.ins().f32const(Ieee32::with_bits(CANON_32BIT_NAN)),
+        types::F64 => pos.ins().f64const(Ieee64::with_bits(CANON_64BIT_NAN)),
+        _ => {
+            // Panic if the type given was not an IEEE floating point type.
+            panic!("Could not canonicalize NaN: Unexpected result type found.");
+        }
+    }
+}
--- a/cranelift/codegen/src/partition_slice.rs
+++ b/cranelift/codegen/src/partition_slice.rs
@@ -0,0 +1,97 @@
+//! Rearrange the elements in a slice according to a predicate.
+
+use core::mem;
+
+/// Rearrange the elements of the mutable slice `s` such that elements where `p(t)` is true precede
+/// the elements where `p(t)` is false.
+///
+/// The order of elements is not preserved, unless the slice is already partitioned.
+///
+/// Returns the number of elements where `p(t)` is true.
+pub fn partition_slice<T, F>(s: &mut [T], mut p: F) -> usize
+where
+    F: FnMut(&T) -> bool,
+{
+    // The iterator works like a deque which we can pop from both ends.
+    let mut i = s.iter_mut();
+
+    // Number of elements for which the predicate is known to be true.
+    let mut pos = 0;
+
+    loop {
+        // Find the first element for which the predicate fails.
+        let head = loop {
+            match i.next() {
+                Some(head) => {
+                    if !p(&head) {
+                        break head;
+                    }
+                }
+                None => return pos,
+            }
+            pos += 1;
+        };
+
+        // Find the last element for which the predicate succeeds.
+        let tail = loop {
+            match i.next_back() {
+                Some(tail) => {
+                    if p(&tail) {
+                        break tail;
+                    }
+                }
+                None => return pos,
+            }
+        };
+
+        // Swap the two elements into the right order.
+        mem::swap(head, tail);
+        pos += 1;
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::partition_slice;
+    use std::vec::Vec;
+
+    fn check(x: &[u32], want: &[u32]) {
+        assert_eq!(x.len(), want.len());
+        let want_count = want.iter().cloned().filter(|&x| x % 10 == 0).count();
+        let mut v = Vec::new();
+        v.extend(x.iter().cloned());
+        let count = partition_slice(&mut v[..], |&x| x % 10 == 0);
+        assert_eq!(v, want);
+        assert_eq!(count, want_count);
+    }
+
+    #[test]
+    fn empty() {
+        check(&[], &[]);
+    }
+
+    #[test]
+    fn singles() {
+        check(&[0], &[0]);
+        check(&[1], &[1]);
+        check(&[10], &[10]);
+    }
+
+    #[test]
+    fn doubles() {
+        check(&[0, 0], &[0, 0]);
+        check(&[0, 5], &[0, 5]);
+        check(&[5, 0], &[0, 5]);
+        check(&[5, 4], &[5, 4]);
+    }
+
+    #[test]
+    fn longer() {
+        check(&[1, 2, 3], &[1, 2, 3]);
+        check(&[1, 2, 10], &[10, 2, 1]); // Note: 2, 1 order not required.
+        check(&[1, 10, 2], &[10, 1, 2]); // Note: 1, 2 order not required.
+        check(&[1, 20, 10], &[10, 20, 1]); // Note: 10, 20 order not required.
+        check(&[1, 20, 3, 10], &[10, 20, 3, 1]);
+        check(&[20, 3, 10, 1], &[20, 10, 3, 1]);
+    }
+}
--- a/cranelift/codegen/src/postopt.rs
+++ b/cranelift/codegen/src/postopt.rs
@@ -0,0 +1,385 @@
+//! A post-legalization rewriting pass.
+
+#![allow(non_snake_case)]
+
+use crate::cursor::{Cursor, EncCursor};
+use crate::ir::condcodes::{CondCode, FloatCC, IntCC};
+use crate::ir::dfg::ValueDef;
+use crate::ir::immediates::{Imm64, Offset32};
+use crate::ir::instructions::{Opcode, ValueList};
+use crate::ir::{Ebb, Function, Inst, InstBuilder, InstructionData, MemFlags, Type, Value};
+use crate::isa::TargetIsa;
+use crate::timing;
+
+/// Information collected about a compare+branch sequence.
+struct CmpBrInfo {
+    /// The branch instruction.
+    br_inst: Inst,
+    /// The icmp, icmp_imm, or fcmp instruction.
+    cmp_inst: Inst,
+    /// The destination of the branch.
+    destination: Ebb,
+    /// The arguments of the branch.
+    args: ValueList,
+    /// The first argument to the comparison. The second is in the `kind` field.
+    cmp_arg: Value,
+    /// If the branch is `brz` rather than `brnz`, we need to invert the condition
+    /// before the branch.
+    invert_branch_cond: bool,
+    /// The kind of comparison, and the second argument.
+    kind: CmpBrKind,
+}
+
+enum CmpBrKind {
+    Icmp { cond: IntCC, arg: Value },
+    IcmpImm { cond: IntCC, imm: Imm64 },
+    Fcmp { cond: FloatCC, arg: Value },
+}
+
+/// Optimize comparisons to use flags values, to avoid materializing conditions
+/// in integer registers.
+///
+/// For example, optimize icmp/fcmp brz/brnz sequences into ifcmp/ffcmp brif/brff
+/// sequences.
+fn optimize_cpu_flags(
+    pos: &mut EncCursor,
+    inst: Inst,
+    last_flags_clobber: Option<Inst>,
+    isa: &TargetIsa,
+) {
+    // Look for compare and branch patterns.
+    // This code could be considerably simplified with non-lexical lifetimes.
+    let info = match pos.func.dfg[inst] {
+        InstructionData::Branch {
+            opcode,
+            destination,
+            ref args,
+        } => {
+            let first_arg = args.first(&pos.func.dfg.value_lists).unwrap();
+            let invert_branch_cond = match opcode {
+                Opcode::Brz => true,
+                Opcode::Brnz => false,
+                _ => panic!(),
+            };
+            if let ValueDef::Result(cond_inst, _) = pos.func.dfg.value_def(first_arg) {
+                match pos.func.dfg[cond_inst] {
+                    InstructionData::IntCompare {
+                        cond,
+                        args: cmp_args,
+                        ..
+                    } => CmpBrInfo {
+                        br_inst: inst,
+                        cmp_inst: cond_inst,
+                        destination,
+                        args: args.clone(),
+                        cmp_arg: cmp_args[0],
+                        invert_branch_cond,
+                        kind: CmpBrKind::Icmp {
+                            cond,
+                            arg: cmp_args[1],
+                        },
+                    },
+                    InstructionData::IntCompareImm {
+                        cond,
+                        arg: cmp_arg,
+                        imm: cmp_imm,
+                        ..
+                    } => CmpBrInfo {
+                        br_inst: inst,
+                        cmp_inst: cond_inst,
+                        destination,
+                        args: args.clone(),
+                        cmp_arg,
+                        invert_branch_cond,
+                        kind: CmpBrKind::IcmpImm { cond, imm: cmp_imm },
+                    },
+                    InstructionData::FloatCompare {
+                        cond,
+                        args: cmp_args,
+                        ..
+                    } => CmpBrInfo {
+                        br_inst: inst,
+                        cmp_inst: cond_inst,
+                        destination,
+                        args: args.clone(),
+                        cmp_arg: cmp_args[0],
+                        invert_branch_cond,
+                        kind: CmpBrKind::Fcmp {
+                            cond,
+                            arg: cmp_args[1],
+                        },
+                    },
+                    _ => return,
+                }
+            } else {
+                return;
+            }
+        }
+        // TODO: trapif, trueif, selectif, and their ff counterparts.
+        _ => return,
+    };
+
+    // If any instructions clobber the flags between the comparison and the branch,
+    // don't optimize them.
+    if last_flags_clobber != Some(info.cmp_inst) {
+        return;
+    }
+
+    // We found a compare+branch pattern. Transform it to use flags.
+    let args = info.args.as_slice(&pos.func.dfg.value_lists)[1..].to_vec();
+    pos.goto_inst(info.cmp_inst);
+    match info.kind {
+        CmpBrKind::Icmp { mut cond, arg } => {
+            let flags = pos.ins().ifcmp(info.cmp_arg, arg);
+            pos.func.dfg.replace(info.cmp_inst).trueif(cond, flags);
+            if info.invert_branch_cond {
+                cond = cond.inverse();
+            }
+            pos.func
+                .dfg
+                .replace(info.br_inst)
+                .brif(cond, flags, info.destination, &args);
+        }
+        CmpBrKind::IcmpImm { mut cond, imm } => {
+            let flags = pos.ins().ifcmp_imm(info.cmp_arg, imm);
+            pos.func.dfg.replace(info.cmp_inst).trueif(cond, flags);
+            if info.invert_branch_cond {
+                cond = cond.inverse();
+            }
+            pos.func
+                .dfg
+                .replace(info.br_inst)
+                .brif(cond, flags, info.destination, &args);
+        }
+        CmpBrKind::Fcmp { mut cond, arg } => {
+            let flags = pos.ins().ffcmp(info.cmp_arg, arg);
+            pos.func.dfg.replace(info.cmp_inst).trueff(cond, flags);
+            if info.invert_branch_cond {
+                cond = cond.inverse();
+            }
+            pos.func
+                .dfg
+                .replace(info.br_inst)
+                .brff(cond, flags, info.destination, &args);
+        }
+    }
+    let ok = pos.func.update_encoding(info.cmp_inst, isa).is_ok();
+    debug_assert!(ok);
+    let ok = pos.func.update_encoding(info.br_inst, isa).is_ok();
+    debug_assert!(ok);
+}
+
+struct MemOpInfo {
+    opcode: Opcode,
+    itype: Type,
+    arg: Value,
+    st_arg: Option<Value>,
+    flags: MemFlags,
+    offset: Offset32,
+}
+
+fn optimize_complex_addresses(pos: &mut EncCursor, inst: Inst, isa: &TargetIsa) {
+    // Look for simple loads and stores we can optimize.
+    let info = match pos.func.dfg[inst] {
+        InstructionData::Load {
+            opcode,
+            arg,
+            flags,
+            offset,
+        } => MemOpInfo {
+            opcode,
+            itype: pos.func.dfg.ctrl_typevar(inst),
+            arg,
+            st_arg: None,
+            flags,
+            offset,
+        },
+        InstructionData::Store {
+            opcode,
+            args,
+            flags,
+            offset,
+        } => MemOpInfo {
+            opcode,
+            itype: pos.func.dfg.ctrl_typevar(inst),
+            arg: args[1],
+            st_arg: Some(args[0]),
+            flags,
+            offset,
+        },
+        _ => return,
+    };
+
+    // Examine the instruction that defines the address operand.
+    if let ValueDef::Result(result_inst, _) = pos.func.dfg.value_def(info.arg) {
+        match pos.func.dfg[result_inst] {
+            InstructionData::Binary {
+                opcode: Opcode::Iadd,
+                args,
+            } => match info.opcode {
+                // Operand is an iadd. Fold it into a memory address with a complex address mode.
+                Opcode::Load => {
+                    pos.func.dfg.replace(inst).load_complex(
+                        info.itype,
+                        info.flags,
+                        &args,
+                        info.offset,
+                    );
+                }
+                Opcode::Uload8 => {
+                    pos.func.dfg.replace(inst).uload8_complex(
+                        info.itype,
+                        info.flags,
+                        &args,
+                        info.offset,
+                    );
+                }
+                Opcode::Sload8 => {
+                    pos.func.dfg.replace(inst).sload8_complex(
+                        info.itype,
+                        info.flags,
+                        &args,
+                        info.offset,
+                    );
+                }
+                Opcode::Uload16 => {
+                    pos.func.dfg.replace(inst).uload16_complex(
+                        info.itype,
+                        info.flags,
+                        &args,
+                        info.offset,
+                    );
+                }
+                Opcode::Sload16 => {
+                    pos.func.dfg.replace(inst).sload16_complex(
+                        info.itype,
+                        info.flags,
+                        &args,
+                        info.offset,
+                    );
+                }
+                Opcode::Uload32 => {
+                    pos.func
+                        .dfg
+                        .replace(inst)
+                        .uload32_complex(info.flags, &args, info.offset);
+                }
+                Opcode::Sload32 => {
+                    pos.func
+                        .dfg
+                        .replace(inst)
+                        .sload32_complex(info.flags, &args, info.offset);
+                }
+                Opcode::Store => {
+                    pos.func.dfg.replace(inst).store_complex(
+                        info.flags,
+                        info.st_arg.unwrap(),
+                        &args,
+                        info.offset,
+                    );
+                }
+                Opcode::Istore8 => {
+                    pos.func.dfg.replace(inst).istore8_complex(
+                        info.flags,
+                        info.st_arg.unwrap(),
+                        &args,
+                        info.offset,
+                    );
+                }
+                Opcode::Istore16 => {
+                    pos.func.dfg.replace(inst).istore16_complex(
+                        info.flags,
+                        info.st_arg.unwrap(),
+                        &args,
+                        info.offset,
+                    );
+                }
+                Opcode::Istore32 => {
+                    pos.func.dfg.replace(inst).istore32_complex(
+                        info.flags,
+                        info.st_arg.unwrap(),
+                        &args,
+                        info.offset,
+                    );
+                }
+                _ => panic!("Unsupported load or store opcode"),
+            },
+            InstructionData::BinaryImm {
+                opcode: Opcode::IaddImm,
+                arg,
+                imm,
+            } => match pos.func.dfg[inst] {
+                // Operand is an iadd_imm. Fold the immediate into the offset if possible.
+                InstructionData::Load {
+                    arg: ref mut load_arg,
+                    ref mut offset,
+                    ..
+                } => {
+                    if let Some(imm) = offset.try_add_i64(imm.into()) {
+                        *load_arg = arg;
+                        *offset = imm;
+                    } else {
+                        // Overflow.
+                        return;
+                    }
+                }
+                InstructionData::Store {
+                    args: ref mut store_args,
+                    ref mut offset,
+                    ..
+                } => {
+                    if let Some(imm) = offset.try_add_i64(imm.into()) {
+                        store_args[1] = arg;
+                        *offset = imm;
+                    } else {
+                        // Overflow.
+                        return;
+                    }
+                }
+                _ => panic!(),
+            },
+            _ => {
+                // Address value is defined by some other kind of instruction.
+                return;
+            }
+        }
+    } else {
+        // Address value is not the result of an instruction.
+        return;
+    }
+
+    let ok = pos.func.update_encoding(inst, isa).is_ok();
+    debug_assert!(ok);
+}
+
+//----------------------------------------------------------------------
+//
+// The main post-opt pass.
+
+pub fn do_postopt(func: &mut Function, isa: &TargetIsa) {
+    let _tt = timing::postopt();
+    let mut pos = EncCursor::new(func, isa);
+    while let Some(_ebb) = pos.next_ebb() {
+        let mut last_flags_clobber = None;
+        while let Some(inst) = pos.next_inst() {
+            if isa.uses_cpu_flags() {
+                // Optimize instructions to make use of flags.
+                optimize_cpu_flags(&mut pos, inst, last_flags_clobber, isa);
+
+                // Track the most recent seen instruction that clobbers the flags.
+                if let Some(constraints) = isa
+                    .encoding_info()
+                    .operand_constraints(pos.func.encodings[inst])
+                {
+                    if constraints.clobbers_flags {
+                        last_flags_clobber = Some(inst)
+                    }
+                }
+            }
+
+            if isa.uses_complex_addresses() {
+                optimize_complex_addresses(&mut pos, inst, isa);
+            }
+        }
+    }
+}
--- a/cranelift/codegen/src/predicates.rs
+++ b/cranelift/codegen/src/predicates.rs
@@ -0,0 +1,106 @@
+//! Predicate functions for testing instruction fields.
+//!
+//! This module defines functions that are used by the instruction predicates defined by
+//! `cranelift-codegen/meta-python/cdsl/predicates.py` classes.
+//!
+//! The predicates the operate on integer fields use `Into<i64>` as a shared trait bound. This
+//! bound is implemented by all the native integer types as well as `Imm64`.
+//!
+//! Some of these predicates may be unused in certain ISA configurations, so we suppress the
+//! dead code warning.
+
+use crate::ir;
+
+/// Check that a 64-bit floating point value is zero.
+#[allow(dead_code)]
+pub fn is_zero_64_bit_float<T: Into<ir::immediates::Ieee64>>(x: T) -> bool {
+    let x64 = x.into();
+    x64.bits() == 0
+}
+
+/// Check that a 32-bit floating point value is zero.
+#[allow(dead_code)]
+pub fn is_zero_32_bit_float<T: Into<ir::immediates::Ieee32>>(x: T) -> bool {
+    let x32 = x.into();
+    x32.bits() == 0
+}
+
+/// Check that `x` is the same as `y`.
+#[allow(dead_code)]
+pub fn is_equal<T: Eq + Copy, O: Into<T> + Copy>(x: T, y: O) -> bool {
+    x == y.into()
+}
+
+/// Check that `x` can be represented as a `wd`-bit signed integer with `sc` low zero bits.
+#[allow(dead_code)]
+pub fn is_signed_int<T: Into<i64>>(x: T, wd: u8, sc: u8) -> bool {
+    let s = x.into();
+    s == (s >> sc << (64 - wd + sc) >> (64 - wd))
+}
+
+/// Check that `x` can be represented as a `wd`-bit unsigned integer with `sc` low zero bits.
+#[allow(dead_code)]
+pub fn is_unsigned_int<T: Into<i64>>(x: T, wd: u8, sc: u8) -> bool {
+    let u = x.into() as u64;
+    // Bit-mask of the permitted bits.
+    let m = (1 << wd) - (1 << sc);
+    u == (u & m)
+}
+
+#[allow(dead_code)]
+pub fn is_colocated_func(func_ref: ir::FuncRef, func: &ir::Function) -> bool {
+    func.dfg.ext_funcs[func_ref].colocated
+}
+
+#[allow(dead_code)]
+pub fn is_colocated_data(global_value: ir::GlobalValue, func: &ir::Function) -> bool {
+    match func.global_values[global_value] {
+        ir::GlobalValueData::Symbol { colocated, .. } => colocated,
+        _ => panic!("is_colocated_data only makes sense for data with symbolic addresses"),
+    }
+}
+
+#[allow(dead_code)]
+pub fn has_length_of(value_list: &ir::ValueList, num: usize, func: &ir::Function) -> bool {
+    value_list.len(&func.dfg.value_lists) == num
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn cvt_u32() {
+        let x1 = 0u32;
+        let x2 = 1u32;
+        let x3 = 0xffff_fff0u32;
+
+        assert!(is_signed_int(x1, 1, 0));
+        assert!(is_signed_int(x1, 2, 1));
+        assert!(is_signed_int(x2, 2, 0));
+        assert!(!is_signed_int(x2, 2, 1));
+
+        // `u32` doesn't sign-extend when converted to `i64`.
+        assert!(!is_signed_int(x3, 8, 0));
+
+        assert!(is_unsigned_int(x1, 1, 0));
+        assert!(is_unsigned_int(x1, 8, 4));
+        assert!(is_unsigned_int(x2, 1, 0));
+        assert!(!is_unsigned_int(x2, 8, 4));
+        assert!(!is_unsigned_int(x3, 1, 0));
+        assert!(is_unsigned_int(x3, 32, 4));
+    }
+
+    #[test]
+    fn cvt_imm64() {
+        use crate::ir::immediates::Imm64;
+
+        let x1 = Imm64::new(-8);
+        let x2 = Imm64::new(8);
+
+        assert!(is_signed_int(x1, 16, 2));
+        assert!(is_signed_int(x2, 16, 2));
+        assert!(!is_signed_int(x1, 16, 4));
+        assert!(!is_signed_int(x2, 16, 4));
+    }
+}
--- a/cranelift/codegen/src/print_errors.rs
+++ b/cranelift/codegen/src/print_errors.rs
@@ -0,0 +1,227 @@
+//! Utility routines for pretty-printing error messages.
+
+use crate::entity::SecondaryMap;
+use crate::ir;
+use crate::ir::entities::{AnyEntity, Ebb, Inst, Value};
+use crate::ir::function::Function;
+use crate::isa::TargetIsa;
+use crate::result::CodegenError;
+use crate::verifier::{VerifierError, VerifierErrors};
+use crate::write::{decorate_function, FuncWriter, PlainWriter};
+use core::fmt;
+use core::fmt::Write;
+use std::boxed::Box;
+use std::string::{String, ToString};
+use std::vec::Vec;
+
+/// Pretty-print a verifier error.
+pub fn pretty_verifier_error<'a>(
+    func: &ir::Function,
+    isa: Option<&TargetIsa>,
+    func_w: Option<Box<FuncWriter + 'a>>,
+    errors: VerifierErrors,
+) -> String {
+    let mut errors = errors.0;
+    let mut w = String::new();
+    let num_errors = errors.len();
+
+    decorate_function(
+        &mut PrettyVerifierError(func_w.unwrap_or_else(|| Box::new(PlainWriter)), &mut errors),
+        &mut w,
+        func,
+        isa,
+    )
+    .unwrap();
+
+    writeln!(
+        w,
+        "\n; {} verifier error{} detected (see above). Compilation aborted.",
+        num_errors,
+        if num_errors == 1 { "" } else { "s" }
+    )
+    .unwrap();
+
+    w
+}
+
+struct PrettyVerifierError<'a>(Box<FuncWriter + 'a>, &'a mut Vec<VerifierError>);
+
+impl<'a> FuncWriter for PrettyVerifierError<'a> {
+    fn write_ebb_header(
+        &mut self,
+        w: &mut Write,
+        func: &Function,
+        isa: Option<&TargetIsa>,
+        ebb: Ebb,
+        indent: usize,
+    ) -> fmt::Result {
+        pretty_ebb_header_error(w, func, isa, ebb, indent, &mut *self.0, self.1)
+    }
+
+    fn write_instruction(
+        &mut self,
+        w: &mut Write,
+        func: &Function,
+        aliases: &SecondaryMap<Value, Vec<Value>>,
+        isa: Option<&TargetIsa>,
+        inst: Inst,
+        indent: usize,
+    ) -> fmt::Result {
+        pretty_instruction_error(w, func, aliases, isa, inst, indent, &mut *self.0, self.1)
+    }
+
+    fn write_entity_definition(
+        &mut self,
+        w: &mut Write,
+        func: &Function,
+        entity: AnyEntity,
+        value: &fmt::Display,
+    ) -> fmt::Result {
+        pretty_preamble_error(w, func, entity, value, &mut *self.0, self.1)
+    }
+}
+
+/// Pretty-print a function verifier error for a given EBB.
+fn pretty_ebb_header_error(
+    w: &mut Write,
+    func: &Function,
+    isa: Option<&TargetIsa>,
+    cur_ebb: Ebb,
+    indent: usize,
+    func_w: &mut FuncWriter,
+    errors: &mut Vec<VerifierError>,
+) -> fmt::Result {
+    let mut s = String::new();
+    func_w.write_ebb_header(&mut s, func, isa, cur_ebb, indent)?;
+    write!(w, "{}", s)?;
+
+    // TODO: Use drain_filter here when it gets stabilized
+    let mut i = 0;
+    let mut printed_error = false;
+    while i != errors.len() {
+        match errors[i].location {
+            ir::entities::AnyEntity::Ebb(ebb) if ebb == cur_ebb => {
+                if !printed_error {
+                    print_arrow(w, &s)?;
+                    printed_error = true;
+                }
+                let err = errors.remove(i);
+                print_error(w, err)?;
+            }
+            _ => i += 1,
+        }
+    }
+
+    if printed_error {
+        w.write_char('\n')?;
+    }
+
+    Ok(())
+}
+
+/// Pretty-print a function verifier error for a given instruction.
+fn pretty_instruction_error(
+    w: &mut Write,
+    func: &Function,
+    aliases: &SecondaryMap<Value, Vec<Value>>,
+    isa: Option<&TargetIsa>,
+    cur_inst: Inst,
+    indent: usize,
+    func_w: &mut FuncWriter,
+    errors: &mut Vec<VerifierError>,
+) -> fmt::Result {
+    let mut s = String::new();
+    func_w.write_instruction(&mut s, func, aliases, isa, cur_inst, indent)?;
+    write!(w, "{}", s)?;
+
+    // TODO: Use drain_filter here when it gets stabilized
+    let mut i = 0;
+    let mut printed_error = false;
+    while i != errors.len() {
+        match errors[i].location {
+            ir::entities::AnyEntity::Inst(inst) if inst == cur_inst => {
+                if !printed_error {
+                    print_arrow(w, &s)?;
+                    printed_error = true;
+                }
+                let err = errors.remove(i);
+                print_error(w, err)?;
+            }
+            _ => i += 1,
+        }
+    }
+
+    if printed_error {
+        w.write_char('\n')?;
+    }
+
+    Ok(())
+}
+
+fn pretty_preamble_error(
+    w: &mut Write,
+    func: &Function,
+    entity: AnyEntity,
+    value: &fmt::Display,
+    func_w: &mut FuncWriter,
+    errors: &mut Vec<VerifierError>,
+) -> fmt::Result {
+    let mut s = String::new();
+    func_w.write_entity_definition(&mut s, func, entity, value)?;
+    write!(w, "{}", s)?;
+
+    // TODO: Use drain_filter here when it gets stabilized
+    let mut i = 0;
+    let mut printed_error = false;
+    while i != errors.len() {
+        if entity == errors[i].location {
+            if !printed_error {
+                print_arrow(w, &s)?;
+                printed_error = true;
+            }
+            let err = errors.remove(i);
+            print_error(w, err)?;
+        } else {
+            i += 1
+        }
+    }
+
+    if printed_error {
+        w.write_char('\n')?;
+    }
+
+    Ok(())
+}
+
+/// Prints:
+///    ;   ^~~~~~
+fn print_arrow(w: &mut Write, entity: &str) -> fmt::Result {
+    write!(w, ";")?;
+
+    let indent = entity.len() - entity.trim_start().len();
+    if indent != 0 {
+        write!(w, "{1:0$}^", indent - 1, "")?;
+    }
+
+    for _ in 0..entity.trim().len() - 1 {
+        write!(w, "~")?;
+    }
+
+    writeln!(w)
+}
+
+/// Prints:
+///    ; error: [ERROR BODY]
+fn print_error(w: &mut Write, err: VerifierError) -> fmt::Result {
+    writeln!(w, "; error: {}", err.to_string())?;
+    Ok(())
+}
+
+/// Pretty-print a Cranelift error.
+pub fn pretty_error(func: &ir::Function, isa: Option<&TargetIsa>, err: CodegenError) -> String {
+    if let CodegenError::Verifier(e) = err {
+        pretty_verifier_error(func, isa, None, e)
+    } else {
+        err.to_string()
+    }
+}
--- a/cranelift/codegen/src/ref_slice.rs
+++ b/cranelift/codegen/src/ref_slice.rs
@@ -0,0 +1,18 @@
+//! Functions for converting a reference into a singleton slice.
+//!
+//! See also the [`ref_slice` crate](https://crates.io/crates/ref_slice).
+//!
+//! We define the functions here to avoid external dependencies, and to ensure that they are
+//! inlined in this crate.
+//!
+//! Despite their using an unsafe block, these functions are completely safe.
+
+use core::slice;
+
+pub fn ref_slice<T>(s: &T) -> &[T] {
+    unsafe { slice::from_raw_parts(s, 1) }
+}
+
+pub fn ref_slice_mut<T>(s: &mut T) -> &mut [T] {
+    unsafe { slice::from_raw_parts_mut(s, 1) }
+}
--- a/cranelift/codegen/src/regalloc/affinity.rs
+++ b/cranelift/codegen/src/regalloc/affinity.rs
@@ -0,0 +1,128 @@
+//! Value affinity for register allocation.
+//!
+//! An SSA value's affinity is a hint used to guide the register allocator. It specifies the class
+//! of allocation that is likely to cause the least amount of fixup moves in order to satisfy
+//! instruction operand constraints.
+//!
+//! For values that want to be in registers, the affinity hint includes a register class or
+//! subclass. This is just a hint, and the register allocator is allowed to pick a register from a
+//! larger register class instead.
+
+use crate::ir::{AbiParam, ArgumentLoc};
+use crate::isa::{ConstraintKind, OperandConstraint, RegClassIndex, RegInfo, TargetIsa};
+use core::fmt;
+
+/// Preferred register allocation for an SSA value.
+#[derive(Clone, Copy, Debug)]
+pub enum Affinity {
+    /// No affinity.
+    ///
+    /// This indicates a value that is not defined or used by any real instructions. It is a ghost
+    /// value that won't appear in the final program.
+    Unassigned,
+
+    /// This value should be placed in a spill slot on the stack.
+    Stack,
+
+    /// This value prefers a register from the given register class.
+    Reg(RegClassIndex),
+}
+
+impl Default for Affinity {
+    fn default() -> Self {
+        Affinity::Unassigned
+    }
+}
+
+impl Affinity {
+    /// Create an affinity that satisfies a single constraint.
+    ///
+    /// This will never create an `Affinity::Unassigned`.
+    /// Use the `Default` implementation for that.
+    pub fn new(constraint: &OperandConstraint) -> Self {
+        if constraint.kind == ConstraintKind::Stack {
+            Affinity::Stack
+        } else {
+            Affinity::Reg(constraint.regclass.into())
+        }
+    }
+
+    /// Create an affinity that matches an ABI argument for `isa`.
+    pub fn abi(arg: &AbiParam, isa: &TargetIsa) -> Self {
+        match arg.location {
+            ArgumentLoc::Unassigned => Affinity::Unassigned,
+            ArgumentLoc::Reg(_) => Affinity::Reg(isa.regclass_for_abi_type(arg.value_type).into()),
+            ArgumentLoc::Stack(_) => Affinity::Stack,
+        }
+    }
+
+    /// Is this the `Unassigned` affinity?
+    pub fn is_unassigned(self) -> bool {
+        match self {
+            Affinity::Unassigned => true,
+            _ => false,
+        }
+    }
+
+    /// Is this the `Reg` affinity?
+    pub fn is_reg(self) -> bool {
+        match self {
+            Affinity::Reg(_) => true,
+            _ => false,
+        }
+    }
+
+    /// Is this the `Stack` affinity?
+    pub fn is_stack(self) -> bool {
+        match self {
+            Affinity::Stack => true,
+            _ => false,
+        }
+    }
+
+    /// Merge an operand constraint into this affinity.
+    ///
+    /// Note that this does not guarantee that the register allocator will pick a register that
+    /// satisfies the constraint.
+    pub fn merge(&mut self, constraint: &OperandConstraint, reginfo: &RegInfo) {
+        match *self {
+            Affinity::Unassigned => *self = Self::new(constraint),
+            Affinity::Reg(rc) => {
+                // If the preferred register class is a subclass of the constraint, there's no need
+                // to change anything.
+                if constraint.kind != ConstraintKind::Stack && !constraint.regclass.has_subclass(rc)
+                {
+                    // If the register classes don't overlap, `intersect` returns `Unassigned`, and
+                    // we just keep our previous affinity.
+                    if let Some(subclass) = constraint.regclass.intersect_index(reginfo.rc(rc)) {
+                        // This constraint shrinks our preferred register class.
+                        *self = Affinity::Reg(subclass);
+                    }
+                }
+            }
+            Affinity::Stack => {}
+        }
+    }
+
+    /// Return an object that can display this value affinity, using the register info from the
+    /// target ISA.
+    pub fn display<'a, R: Into<Option<&'a RegInfo>>>(self, regs: R) -> DisplayAffinity<'a> {
+        DisplayAffinity(self, regs.into())
+    }
+}
+
+/// Displaying an `Affinity` correctly requires the associated `RegInfo` from the target ISA.
+pub struct DisplayAffinity<'a>(Affinity, Option<&'a RegInfo>);
+
+impl<'a> fmt::Display for DisplayAffinity<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self.0 {
+            Affinity::Unassigned => write!(f, "unassigned"),
+            Affinity::Stack => write!(f, "stack"),
+            Affinity::Reg(rci) => match self.1 {
+                Some(regs) => write!(f, "{}", regs.rc(rci)),
+                None => write!(f, "{}", rci),
+            },
+        }
+    }
+}
--- a/cranelift/codegen/src/regalloc/coalescing.rs
+++ b/cranelift/codegen/src/regalloc/coalescing.rs
--- a/cranelift/codegen/src/regalloc/coloring.rs
+++ b/cranelift/codegen/src/regalloc/coloring.rs
--- a/cranelift/codegen/src/regalloc/context.rs
+++ b/cranelift/codegen/src/regalloc/context.rs
@@ -0,0 +1,217 @@
+//! Register allocator context.
+//!
+//! The `Context` struct contains data structures that should be preserved across invocations of
+//! the register allocator algorithm. This doesn't preserve any data between functions, but it
+//! avoids allocating data structures independently for each function begin compiled.
+
+use crate::dominator_tree::DominatorTree;
+use crate::flowgraph::ControlFlowGraph;
+use crate::ir::Function;
+use crate::isa::TargetIsa;
+use crate::regalloc::coalescing::Coalescing;
+use crate::regalloc::coloring::Coloring;
+use crate::regalloc::live_value_tracker::LiveValueTracker;
+use crate::regalloc::liveness::Liveness;
+use crate::regalloc::reload::Reload;
+use crate::regalloc::spilling::Spilling;
+use crate::regalloc::virtregs::VirtRegs;
+use crate::result::CodegenResult;
+use crate::timing;
+use crate::topo_order::TopoOrder;
+use crate::verifier::{
+    verify_context, verify_cssa, verify_liveness, verify_locations, VerifierErrors,
+};
+
+/// Persistent memory allocations for register allocation.
+pub struct Context {
+    liveness: Liveness,
+    virtregs: VirtRegs,
+    coalescing: Coalescing,
+    topo: TopoOrder,
+    tracker: LiveValueTracker,
+    spilling: Spilling,
+    reload: Reload,
+    coloring: Coloring,
+}
+
+impl Context {
+    /// Create a new context for register allocation.
+    ///
+    /// This context should be reused for multiple functions in order to avoid repeated memory
+    /// allocations.
+    pub fn new() -> Self {
+        Self {
+            liveness: Liveness::new(),
+            virtregs: VirtRegs::new(),
+            coalescing: Coalescing::new(),
+            topo: TopoOrder::new(),
+            tracker: LiveValueTracker::new(),
+            spilling: Spilling::new(),
+            reload: Reload::new(),
+            coloring: Coloring::new(),
+        }
+    }
+
+    /// Clear all data structures in this context.
+    pub fn clear(&mut self) {
+        self.liveness.clear();
+        self.virtregs.clear();
+        self.coalescing.clear();
+        self.topo.clear();
+        self.tracker.clear();
+        self.spilling.clear();
+        self.reload.clear();
+        self.coloring.clear();
+    }
+
+    /// Allocate registers in `func`.
+    ///
+    /// After register allocation, all values in `func` have been assigned to a register or stack
+    /// location that is consistent with instruction encoding constraints.
+    pub fn run(
+        &mut self,
+        isa: &TargetIsa,
+        func: &mut Function,
+        cfg: &ControlFlowGraph,
+        domtree: &mut DominatorTree,
+    ) -> CodegenResult<()> {
+        let _tt = timing::regalloc();
+        debug_assert!(domtree.is_valid());
+
+        let mut errors = VerifierErrors::default();
+
+        // `Liveness` and `Coloring` are self-clearing.
+        self.virtregs.clear();
+
+        // Tracker state (dominator live sets) is actually reused between the spilling and coloring
+        // phases.
+        self.tracker.clear();
+
+        // Pass: Liveness analysis.
+        self.liveness.compute(isa, func, cfg);
+
+        if isa.flags().enable_verifier() {
+            let ok = verify_liveness(isa, func, cfg, &self.liveness, &mut errors).is_ok();
+
+            if !ok {
+                return Err(errors.into());
+            }
+        }
+
+        // Pass: Coalesce and create Conventional SSA form.
+        self.coalescing.conventional_ssa(
+            isa,
+            func,
+            cfg,
+            domtree,
+            &mut self.liveness,
+            &mut self.virtregs,
+        );
+
+        if isa.flags().enable_verifier() {
+            let ok = verify_context(func, cfg, domtree, isa, &mut errors).is_ok()
+                && verify_liveness(isa, func, cfg, &self.liveness, &mut errors).is_ok()
+                && verify_cssa(
+                    func,
+                    cfg,
+                    domtree,
+                    &self.liveness,
+                    &self.virtregs,
+                    &mut errors,
+                )
+                .is_ok();
+
+            if !ok {
+                return Err(errors.into());
+            }
+        }
+
+        // Pass: Spilling.
+        self.spilling.run(
+            isa,
+            func,
+            domtree,
+            &mut self.liveness,
+            &self.virtregs,
+            &mut self.topo,
+            &mut self.tracker,
+        );
+
+        if isa.flags().enable_verifier() {
+            let ok = verify_context(func, cfg, domtree, isa, &mut errors).is_ok()
+                && verify_liveness(isa, func, cfg, &self.liveness, &mut errors).is_ok()
+                && verify_cssa(
+                    func,
+                    cfg,
+                    domtree,
+                    &self.liveness,
+                    &self.virtregs,
+                    &mut errors,
+                )
+                .is_ok();
+
+            if !ok {
+                return Err(errors.into());
+            }
+        }
+
+        // Pass: Reload.
+        self.reload.run(
+            isa,
+            func,
+            domtree,
+            &mut self.liveness,
+            &mut self.topo,
+            &mut self.tracker,
+        );
+
+        if isa.flags().enable_verifier() {
+            let ok = verify_context(func, cfg, domtree, isa, &mut errors).is_ok()
+                && verify_liveness(isa, func, cfg, &self.liveness, &mut errors).is_ok()
+                && verify_cssa(
+                    func,
+                    cfg,
+                    domtree,
+                    &self.liveness,
+                    &self.virtregs,
+                    &mut errors,
+                )
+                .is_ok();
+
+            if !ok {
+                return Err(errors.into());
+            }
+        }
+
+        // Pass: Coloring.
+        self.coloring
+            .run(isa, func, domtree, &mut self.liveness, &mut self.tracker);
+
+        if isa.flags().enable_verifier() {
+            let ok = verify_context(func, cfg, domtree, isa, &mut errors).is_ok()
+                && verify_liveness(isa, func, cfg, &self.liveness, &mut errors).is_ok()
+                && verify_locations(isa, func, Some(&self.liveness), &mut errors).is_ok()
+                && verify_cssa(
+                    func,
+                    cfg,
+                    domtree,
+                    &self.liveness,
+                    &self.virtregs,
+                    &mut errors,
+                )
+                .is_ok();
+
+            if !ok {
+                return Err(errors.into());
+            }
+        }
+
+        // Even if we arrive here, (non-fatal) errors might have been reported, so we
+        // must make sure absolutely nothing is wrong
+        if errors.is_empty() {
+            Ok(())
+        } else {
+            Err(errors.into())
+        }
+    }
+}
--- a/cranelift/codegen/src/regalloc/diversion.rs
+++ b/cranelift/codegen/src/regalloc/diversion.rs
@@ -0,0 +1,218 @@
+//! Register diversions.
+//!
+//! Normally, a value is assigned to a single register or stack location by the register allocator.
+//! Sometimes, it is necessary to move register values to a different register in order to satisfy
+//! instruction constraints.
+//!
+//! These register diversions are local to an EBB. No values can be diverted when entering a new
+//! EBB.
+
+use crate::fx::FxHashMap;
+use crate::hash_map::{Entry, Iter};
+use crate::ir::{InstructionData, Opcode};
+use crate::ir::{StackSlot, Value, ValueLoc, ValueLocations};
+use crate::isa::{RegInfo, RegUnit};
+use core::fmt;
+
+/// A diversion of a value from its original location to a new register or stack location.
+///
+/// In IR, a diversion is represented by a `regmove` instruction, possibly a chain of them for the
+/// same value.
+///
+/// When tracking diversions, the `from` field is the original assigned value location, and `to` is
+/// the current one.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub struct Diversion {
+    /// The original value location.
+    pub from: ValueLoc,
+    /// The current value location.
+    pub to: ValueLoc,
+}
+
+impl Diversion {
+    /// Make a new diversion.
+    pub fn new(from: ValueLoc, to: ValueLoc) -> Self {
+        debug_assert!(from.is_assigned() && to.is_assigned());
+        Self { from, to }
+    }
+}
+
+/// Keep track of diversions in an EBB.
+pub struct RegDiversions {
+    current: FxHashMap<Value, Diversion>,
+}
+
+impl RegDiversions {
+    /// Create a new empty diversion tracker.
+    pub fn new() -> Self {
+        Self {
+            current: FxHashMap::default(),
+        }
+    }
+
+    /// Clear the tracker, preparing for a new EBB.
+    pub fn clear(&mut self) {
+        self.current.clear()
+    }
+
+    /// Are there any diversions?
+    pub fn is_empty(&self) -> bool {
+        self.current.is_empty()
+    }
+
+    /// Get the current diversion of `value`, if any.
+    pub fn diversion(&self, value: Value) -> Option<&Diversion> {
+        self.current.get(&value)
+    }
+
+    /// Get all current diversions.
+    pub fn iter(&self) -> Iter<'_, Value, Diversion> {
+        self.current.iter()
+    }
+
+    /// Get the current location for `value`. Fall back to the assignment map for non-diverted
+    /// values
+    pub fn get(&self, value: Value, locations: &ValueLocations) -> ValueLoc {
+        match self.diversion(value) {
+            Some(d) => d.to,
+            None => locations[value],
+        }
+    }
+
+    /// Get the current register location for `value`, or panic if `value` isn't in a register.
+    pub fn reg(&self, value: Value, locations: &ValueLocations) -> RegUnit {
+        self.get(value, locations).unwrap_reg()
+    }
+
+    /// Get the current stack location for `value`, or panic if `value` isn't in a stack slot.
+    pub fn stack(&self, value: Value, locations: &ValueLocations) -> StackSlot {
+        self.get(value, locations).unwrap_stack()
+    }
+
+    /// Record any kind of move.
+    ///
+    /// The `from` location must match an existing `to` location, if any.
+    pub fn divert(&mut self, value: Value, from: ValueLoc, to: ValueLoc) {
+        debug_assert!(from.is_assigned() && to.is_assigned());
+        match self.current.entry(value) {
+            Entry::Occupied(mut e) => {
+                // TODO: non-lexical lifetimes should allow removal of the scope and early return.
+                {
+                    let d = e.get_mut();
+                    debug_assert_eq!(d.to, from, "Bad regmove chain for {}", value);
+                    if d.from != to {
+                        d.to = to;
+                        return;
+                    }
+                }
+                e.remove();
+            }
+            Entry::Vacant(e) => {
+                e.insert(Diversion::new(from, to));
+            }
+        }
+    }
+
+    /// Record a register -> register move.
+    pub fn regmove(&mut self, value: Value, from: RegUnit, to: RegUnit) {
+        self.divert(value, ValueLoc::Reg(from), ValueLoc::Reg(to));
+    }
+
+    /// Record a register -> stack move.
+    pub fn regspill(&mut self, value: Value, from: RegUnit, to: StackSlot) {
+        self.divert(value, ValueLoc::Reg(from), ValueLoc::Stack(to));
+    }
+
+    /// Record a stack -> register move.
+    pub fn regfill(&mut self, value: Value, from: StackSlot, to: RegUnit) {
+        self.divert(value, ValueLoc::Stack(from), ValueLoc::Reg(to));
+    }
+
+    /// Apply the effect of `inst`.
+    ///
+    /// If `inst` is a `regmove`, `regfill`, or `regspill` instruction, update the diversions to
+    /// match.
+    pub fn apply(&mut self, inst: &InstructionData) {
+        match *inst {
+            InstructionData::RegMove {
+                opcode: Opcode::Regmove,
+                arg,
+                src,
+                dst,
+            } => self.regmove(arg, src, dst),
+            InstructionData::RegSpill {
+                opcode: Opcode::Regspill,
+                arg,
+                src,
+                dst,
+            } => self.regspill(arg, src, dst),
+            InstructionData::RegFill {
+                opcode: Opcode::Regfill,
+                arg,
+                src,
+                dst,
+            } => self.regfill(arg, src, dst),
+            _ => {}
+        }
+    }
+
+    /// Drop any recorded move for `value`.
+    ///
+    /// Returns the `to` location of the removed diversion.
+    pub fn remove(&mut self, value: Value) -> Option<ValueLoc> {
+        self.current.remove(&value).map(|d| d.to)
+    }
+
+    /// Return an object that can display the diversions.
+    pub fn display<'a, R: Into<Option<&'a RegInfo>>>(&'a self, regs: R) -> DisplayDiversions<'a> {
+        DisplayDiversions(self, regs.into())
+    }
+}
+
+/// Object that displays register diversions.
+pub struct DisplayDiversions<'a>(&'a RegDiversions, Option<&'a RegInfo>);
+
+impl<'a> fmt::Display for DisplayDiversions<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{{")?;
+        for (value, div) in self.0.iter() {
+            write!(
+                f,
+                " {}: {} -> {}",
+                value,
+                div.from.display(self.1),
+                div.to.display(self.1)
+            )?
+        }
+        write!(f, " }}")
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::entity::EntityRef;
+    use crate::ir::Value;
+
+    #[test]
+    fn inserts() {
+        let mut divs = RegDiversions::new();
+        let v1 = Value::new(1);
+        let v2 = Value::new(2);
+
+        divs.regmove(v1, 10, 12);
+        assert_eq!(
+            divs.diversion(v1),
+            Some(&Diversion {
+                from: ValueLoc::Reg(10),
+                to: ValueLoc::Reg(12),
+            })
+        );
+        assert_eq!(divs.diversion(v2), None);
+
+        divs.regmove(v1, 12, 11);
+        assert_eq!(divs.diversion(v1).unwrap().to, ValueLoc::Reg(11));
+        divs.regmove(v1, 11, 10);
+        assert_eq!(divs.diversion(v1), None);
+    }
+}
--- a/cranelift/codegen/src/regalloc/live_value_tracker.rs
+++ b/cranelift/codegen/src/regalloc/live_value_tracker.rs
@@ -0,0 +1,345 @@
+//! Track which values are live in an EBB with instruction granularity.
+//!
+//! The `LiveValueTracker` keeps track of the set of live SSA values at each instruction in an EBB.
+//! The sets of live values are computed on the fly as the tracker is moved from instruction to
+//! instruction, starting at the EBB header.
+
+use crate::dominator_tree::DominatorTree;
+use crate::entity::{EntityList, ListPool};
+use crate::fx::FxHashMap;
+use crate::ir::{DataFlowGraph, Ebb, ExpandedProgramPoint, Inst, Layout, Value};
+use crate::partition_slice::partition_slice;
+use crate::regalloc::affinity::Affinity;
+use crate::regalloc::liveness::Liveness;
+use crate::regalloc::liverange::LiveRange;
+use std::vec::Vec;
+
+type ValueList = EntityList<Value>;
+
+/// Compute and track live values throughout an EBB.
+pub struct LiveValueTracker {
+    /// The set of values that are live at the current program point.
+    live: LiveValueVec,
+
+    /// Saved set of live values for every jump and branch that can potentially be an immediate
+    /// dominator of an EBB.
+    ///
+    /// This is the set of values that are live *before* the branch.
+    idom_sets: FxHashMap<Inst, ValueList>,
+
+    /// Memory pool for the live sets.
+    idom_pool: ListPool<Value>,
+}
+
+/// Information about a value that is live at the current program point.
+#[derive(Debug)]
+pub struct LiveValue {
+    /// The live value.
+    pub value: Value,
+
+    /// The local ending point of the live range in the current EBB, as returned by
+    /// `LiveRange::def_local_end()` or `LiveRange::livein_local_end()`.
+    pub endpoint: Inst,
+
+    /// The affinity of the value as represented in its `LiveRange`.
+    ///
+    /// This value is simply a copy of the affinity stored in the live range. We copy it because
+    /// almost all users of `LiveValue` need to look at it.
+    pub affinity: Affinity,
+
+    /// The live range for this value never leaves its EBB.
+    pub is_local: bool,
+
+    /// This value is dead - the live range ends immediately.
+    pub is_dead: bool,
+}
+
+struct LiveValueVec {
+    /// The set of values that are live at the current program point.
+    values: Vec<LiveValue>,
+
+    /// How many values at the front of `values` are known to be live after `inst`?
+    ///
+    /// This is used to pass a much smaller slice to `partition_slice` when its called a second
+    /// time for the same instruction.
+    live_prefix: Option<(Inst, usize)>,
+}
+
+impl LiveValueVec {
+    fn new() -> Self {
+        Self {
+            values: Vec::new(),
+            live_prefix: None,
+        }
+    }
+
+    /// Add a new live value to `values`. Copy some properties from `lr`.
+    fn push(&mut self, value: Value, endpoint: Inst, lr: &LiveRange) {
+        self.values.push(LiveValue {
+            value,
+            endpoint,
+            affinity: lr.affinity,
+            is_local: lr.is_local(),
+            is_dead: lr.is_dead(),
+        });
+    }
+
+    /// Remove all elements.
+    fn clear(&mut self) {
+        self.values.clear();
+        self.live_prefix = None;
+    }
+
+    /// Make sure that the values killed by `next_inst` are moved to the end of the `values`
+    /// vector.
+    ///
+    /// Returns the number of values that will be live after `next_inst`.
+    fn live_after(&mut self, next_inst: Inst) -> usize {
+        // How many values at the front of the vector are already known to survive `next_inst`?
+        // We don't need to pass this prefix to `partition_slice()`
+        let keep = match self.live_prefix {
+            Some((i, prefix)) if i == next_inst => prefix,
+            _ => 0,
+        };
+
+        // Move the remaining surviving values to the front partition of the vector.
+        let prefix = keep + partition_slice(&mut self.values[keep..], |v| v.endpoint != next_inst);
+
+        // Remember the new prefix length in case we get called again for the same `next_inst`.
+        self.live_prefix = Some((next_inst, prefix));
+        prefix
+    }
+
+    /// Remove the values killed by `next_inst`.
+    fn remove_kill_values(&mut self, next_inst: Inst) {
+        let keep = self.live_after(next_inst);
+        self.values.truncate(keep);
+    }
+
+    /// Remove any dead values.
+    fn remove_dead_values(&mut self) {
+        self.values.retain(|v| !v.is_dead);
+        self.live_prefix = None;
+    }
+}
+
+impl LiveValueTracker {
+    /// Create a new blank tracker.
+    pub fn new() -> Self {
+        Self {
+            live: LiveValueVec::new(),
+            idom_sets: FxHashMap(),
+            idom_pool: ListPool::new(),
+        }
+    }
+
+    /// Clear all cached information.
+    pub fn clear(&mut self) {
+        self.live.clear();
+        self.idom_sets.clear();
+        self.idom_pool.clear();
+    }
+
+    /// Get the set of currently live values.
+    ///
+    /// Between calls to `process_inst()` and `drop_dead()`, this includes both values killed and
+    /// defined by the current instruction.
+    pub fn live(&self) -> &[LiveValue] {
+        &self.live.values
+    }
+
+    /// Get a mutable set of currently live values.
+    ///
+    /// Use with care and don't move entries around.
+    pub fn live_mut(&mut self) -> &mut [LiveValue] {
+        &mut self.live.values
+    }
+
+    /// Move the current position to the top of `ebb`.
+    ///
+    /// This depends on the stored live value set at `ebb`'s immediate dominator, so that must have
+    /// been visited first.
+    ///
+    /// Returns `(liveins, args)` as a pair of slices. The first slice is the set of live-in values
+    /// from the immediate dominator. The second slice is the set of `ebb` parameters.
+    ///
+    /// Dead parameters with no uses are included in `args`. Call `drop_dead_args()` to remove them.
+    pub fn ebb_top(
+        &mut self,
+        ebb: Ebb,
+        dfg: &DataFlowGraph,
+        liveness: &Liveness,
+        layout: &Layout,
+        domtree: &DominatorTree,
+    ) -> (&[LiveValue], &[LiveValue]) {
+        // Start over, compute the set of live values at the top of the EBB from two sources:
+        //
+        // 1. Values that were live before `ebb`'s immediate dominator, filtered for those that are
+        //    actually live-in.
+        // 2. Arguments to `ebb` that are not dead.
+        //
+        self.live.clear();
+
+        // Compute the live-in values. Start by filtering the set of values that were live before
+        // the immediate dominator. Just use the empty set if there's no immediate dominator (i.e.,
+        // the entry block or an unreachable block).
+        if let Some(idom) = domtree.idom(ebb) {
+            // If the immediate dominator exits, we must have a stored list for it. This is a
+            // requirement to the order EBBs are visited: All dominators must have been processed
+            // before the current EBB.
+            let idom_live_list = self
+                .idom_sets
+                .get(&idom)
+                .expect("No stored live set for dominator");
+            let ctx = liveness.context(layout);
+            // Get just the values that are live-in to `ebb`.
+            for &value in idom_live_list.as_slice(&self.idom_pool) {
+                let lr = liveness
+                    .get(value)
+                    .expect("Immediate dominator value has no live range");
+
+                // Check if this value is live-in here.
+                if let Some(endpoint) = lr.livein_local_end(ebb, ctx) {
+                    self.live.push(value, endpoint, lr);
+                }
+            }
+        }
+
+        // Now add all the live parameters to `ebb`.
+        let first_arg = self.live.values.len();
+        for &value in dfg.ebb_params(ebb) {
+            let lr = &liveness[value];
+            debug_assert_eq!(lr.def(), ebb.into());
+            match lr.def_local_end().into() {
+                ExpandedProgramPoint::Inst(endpoint) => {
+                    self.live.push(value, endpoint, lr);
+                }
+                ExpandedProgramPoint::Ebb(local_ebb) => {
+                    // This is a dead EBB parameter which is not even live into the first
+                    // instruction in the EBB.
+                    debug_assert_eq!(
+                        local_ebb, ebb,
+                        "EBB parameter live range ends at wrong EBB header"
+                    );
+                    // Give this value a fake endpoint that is the first instruction in the EBB.
+                    // We expect it to be removed by calling `drop_dead_args()`.
+                    self.live
+                        .push(value, layout.first_inst(ebb).expect("Empty EBB"), lr);
+                }
+            }
+        }
+
+        self.live.values.split_at(first_arg)
+    }
+
+    /// Prepare to move past `inst`.
+    ///
+    /// Determine the set of already live values that are killed by `inst`, and add the new defined
+    /// values to the tracked set.
+    ///
+    /// Returns `(throughs, kills, defs)` as a tuple of slices:
+    ///
+    /// 1. The `throughs` slice is the set of live-through values that are neither defined nor
+    ///    killed by the instruction.
+    /// 2. The `kills` slice is the set of values that were live before the instruction and are
+    ///    killed at the instruction. This does not include dead defs.
+    /// 3. The `defs` slice is guaranteed to be in the same order as `inst`'s results, and includes
+    ///    dead defines.
+    ///
+    /// The order of `throughs` and `kills` is arbitrary.
+    ///
+    /// The `drop_dead()` method must be called next to actually remove the dead values from the
+    /// tracked set after the two returned slices are no longer needed.
+    pub fn process_inst(
+        &mut self,
+        inst: Inst,
+        dfg: &DataFlowGraph,
+        liveness: &Liveness,
+    ) -> (&[LiveValue], &[LiveValue], &[LiveValue]) {
+        // Save a copy of the live values before any branches or jumps that could be somebody's
+        // immediate dominator.
+        if dfg[inst].opcode().is_branch() {
+            self.save_idom_live_set(inst);
+        }
+
+        // Move killed values to the end of the vector.
+        // Don't remove them yet, `drop_dead()` will do that.
+        let first_kill = self.live.live_after(inst);
+
+        // Add the values defined by `inst`.
+        let first_def = self.live.values.len();
+        for &value in dfg.inst_results(inst) {
+            let lr = &liveness[value];
+            debug_assert_eq!(lr.def(), inst.into());
+            match lr.def_local_end().into() {
+                ExpandedProgramPoint::Inst(endpoint) => {
+                    self.live.push(value, endpoint, lr);
+                }
+                ExpandedProgramPoint::Ebb(ebb) => {
+                    panic!("Instruction result live range can't end at {}", ebb);
+                }
+            }
+        }
+
+        (
+            &self.live.values[0..first_kill],
+            &self.live.values[first_kill..first_def],
+            &self.live.values[first_def..],
+        )
+    }
+
+    /// Prepare to move past a ghost instruction.
+    ///
+    /// This is like `process_inst`, except any defs are ignored.
+    ///
+    /// Returns `(throughs, kills)`.
+    pub fn process_ghost(&mut self, inst: Inst) -> (&[LiveValue], &[LiveValue]) {
+        let first_kill = self.live.live_after(inst);
+        self.live.values.as_slice().split_at(first_kill)
+    }
+
+    /// Drop the values that are now dead after moving past `inst`.
+    ///
+    /// This removes both live values that were killed by `inst` and dead defines on `inst` itself.
+    ///
+    /// This must be called after `process_inst(inst)` and before proceeding to the next
+    /// instruction.
+    pub fn drop_dead(&mut self, inst: Inst) {
+        // Remove both live values that were killed by `inst` and dead defines from `inst`.
+        self.live.remove_kill_values(inst);
+    }
+
+    /// Drop any values that are marked as `is_dead`.
+    ///
+    /// Use this after calling `ebb_top` to clean out dead EBB parameters.
+    pub fn drop_dead_params(&mut self) {
+        self.live.remove_dead_values();
+    }
+
+    /// Process new spills.
+    ///
+    /// Any values where `f` returns true are spilled and will be treated as if their affinity was
+    /// `Stack`.
+    pub fn process_spills<F>(&mut self, mut f: F)
+    where
+        F: FnMut(Value) -> bool,
+    {
+        for lv in &mut self.live.values {
+            if f(lv.value) {
+                lv.affinity = Affinity::Stack;
+            }
+        }
+    }
+
+    /// Save the current set of live values so it is associated with `idom`.
+    fn save_idom_live_set(&mut self, idom: Inst) {
+        let values = self.live.values.iter().map(|lv| lv.value);
+        let pool = &mut self.idom_pool;
+        // If there already is a set saved for `idom`, just keep it.
+        self.idom_sets.entry(idom).or_insert_with(|| {
+            let mut list = ValueList::default();
+            list.extend(values, pool);
+            list
+        });
+    }
+}
--- a/cranelift/codegen/src/regalloc/liveness.rs
+++ b/cranelift/codegen/src/regalloc/liveness.rs
@@ -0,0 +1,460 @@
+//! Liveness analysis for SSA values.
+//!
+//! This module computes the live range of all the SSA values in a function and produces a
+//! `LiveRange` instance for each.
+//!
+//!
+//! # Liveness consumers
+//!
+//! The primary consumer of the liveness analysis is the SSA coloring pass which goes through each
+//! EBB and assigns a register to the defined values. This algorithm needs to maintain a set of the
+//! currently live values as it is iterating down the instructions in the EBB. It asks the
+//! following questions:
+//!
+//! - What is the set of live values at the entry to the EBB?
+//! - When moving past a use of a value, is that value still alive in the EBB, or was that the last
+//!   use?
+//! - When moving past a branch, which of the live values are still live below the branch?
+//!
+//! The set of `LiveRange` instances can answer these questions through their `def_local_end` and
+//! `livein_local_end` queries. The coloring algorithm visits EBBs in a topological order of the
+//! dominator tree, so it can compute the set of live values at the beginning of an EBB by starting
+//! from the set of live values at the dominating branch instruction and filtering it with
+//! `livein_local_end`. These sets do not need to be stored in the liveness analysis.
+//!
+//! The secondary consumer of the liveness analysis is the spilling pass which needs to count the
+//! number of live values at every program point and insert spill code until the number of
+//! registers needed is small enough.
+//!
+//!
+//! # Alternative algorithms
+//!
+//! A number of different liveness analysis algorithms exist, so it is worthwhile to look at a few
+//! alternatives.
+//!
+//! ## Data-flow equations
+//!
+//! The classic *live variables analysis* that you will find in all compiler books from the
+//! previous century does not depend on SSA form. It is typically implemented by iteratively
+//! solving data-flow equations on bit-vectors of variables. The result is a live-out bit-vector of
+//! variables for every basic block in the program.
+//!
+//! This algorithm has some disadvantages that makes us look elsewhere:
+//!
+//! - Quadratic memory use. We need a bit per variable per basic block in the function.
+//! - Dense representation of sparse data. In practice, the majority of SSA values never leave
+//!   their basic block, and those that do span basic blocks rarely span a large number of basic
+//!   blocks. This makes the data stored in the bitvectors quite sparse.
+//! - Traditionally, the data-flow equations were solved for real program *variables* which does
+//!   not include temporaries used in evaluating expressions. We have an SSA form program which
+//!   blurs the distinction between temporaries and variables. This makes the quadratic memory
+//!   problem worse because there are many more SSA values than there was variables in the original
+//!   program, and we don't know a priori which SSA values leave their basic block.
+//! - Missing last-use information. For values that are not live-out of a basic block, we would
+//!   need to store information about the last use in the block somewhere. LLVM stores this
+//!   information as a 'kill bit' on the last use in the IR. Maintaining these kill bits has been a
+//!   source of problems for LLVM's register allocator.
+//!
+//! Data-flow equations can detect when a variable is used uninitialized, and they can handle
+//! multiple definitions of the same variable. We don't need this generality since we already have
+//! a program in SSA form.
+//!
+//! ## LLVM's liveness analysis
+//!
+//! LLVM's register allocator computes liveness per *virtual register*, where a virtual register is
+//! a disjoint union of related SSA values that should be assigned to the same physical register.
+//! It uses a compact data structure very similar to our `LiveRange`. The important difference is
+//! that Cranelift's `LiveRange` only describes a single SSA value, while LLVM's `LiveInterval`
+//! describes the live range of a virtual register *and* which one of the related SSA values is
+//! live at any given program point.
+//!
+//! LLVM computes the live range of each virtual register independently by using the use-def chains
+//! that are baked into its IR. The algorithm for a single virtual register is:
+//!
+//! 1. Initialize the live range with a single-instruction snippet of liveness at each def, using
+//!    the def-chain. This does not include any phi-values.
+//! 2. Go through the virtual register's use chain and perform the following steps at each use:
+//! 3. Perform an exhaustive depth-first traversal up the CFG from the use. Look for basic blocks
+//!    that already contain some liveness and extend the last live SSA value in the block to be
+//!    live-out. Also build a list of new basic blocks where the register needs to be live-in.
+//! 4. Iteratively propagate live-out SSA values to the new live-in blocks. This may require new
+//!    PHI values to be created when different SSA values can reach the same block.
+//!
+//! The iterative SSA form reconstruction can be skipped if the depth-first search only encountered
+//! one SSA value.
+//!
+//! This algorithm has some advantages compared to the data-flow equations:
+//!
+//! - The live ranges of local virtual registers are computed very quickly without ever traversing
+//!   the CFG. The memory needed to store these live ranges is independent of the number of basic
+//!   blocks in the program.
+//! - The time to compute the live range of a global virtual register is proportional to the number
+//!   of basic blocks covered. Many virtual registers only cover a few blocks, even in very large
+//!   functions.
+//! - A single live range can be recomputed after making modifications to the IR. No global
+//!   algorithm is necessary. This feature depends on having use-def chains for virtual registers
+//!   which Cranelift doesn't.
+//!
+//! Cranelift uses a very similar data structures and algorithms to LLVM, with the important
+//! difference that live ranges are computed per SSA value instead of per virtual register, and the
+//! uses in Cranelift IR refers to SSA values instead of virtual registers. This means that
+//! Cranelift can skip the last step of reconstructing SSA form for the virtual register uses.
+//!
+//! ## Fast Liveness Checking for SSA-Form Programs
+//!
+//! A liveness analysis that is often brought up in the context of SSA-based register allocation
+//! was presented at CGO 2008:
+//!
+//! > Boissinot, B., Hack, S., Grund, D., de Dinechin, B. D., & Rastello, F. (2008). *Fast Liveness
+//! Checking for SSA-Form Programs.* CGO.
+//!
+//! This analysis uses a global pre-computation that only depends on the CFG of the function. It
+//! then allows liveness queries for any (value, program point) pair. Each query traverses the use
+//! chain of the value and performs lookups in the precomputed bit-vectors.
+//!
+//! I did not seriously consider this analysis for Cranelift because:
+//!
+//! - It depends critically on use chains which Cranelift doesn't have.
+//! - Popular variables like the `this` pointer in a C++ method can have very large use chains.
+//!   Traversing such a long use chain on every liveness lookup has the potential for some nasty
+//!   quadratic behavior in unfortunate cases.
+//! - It says "fast" in the title, but the paper only claims to be 16% faster than a data-flow
+//!   based approach, which isn't that impressive.
+//!
+//! Nevertheless, the property of only depending in the CFG structure is very useful. If Cranelift
+//! gains use chains, this approach would be worth a proper evaluation.
+//!
+//!
+//! # Cranelift's liveness analysis
+//!
+//! The algorithm implemented in this module is similar to LLVM's with these differences:
+//!
+//! - The `LiveRange` data structure describes the liveness of a single SSA value, not a virtual
+//!   register.
+//! - Instructions in Cranelift IR contains references to SSA values, not virtual registers.
+//! - All live ranges are computed in one traversal of the program. Cranelift doesn't have use
+//!   chains, so it is not possible to compute the live range for a single SSA value independently.
+//!
+//! The liveness computation visits all instructions in the program. The order is not important for
+//! the algorithm to be correct. At each instruction, the used values are examined.
+//!
+//! - The first time a value is encountered, its live range is constructed as a dead live range
+//!   containing only the defining program point.
+//! - The local interval of the value's live range is extended so it reaches the use. This may
+//!   require creating a new live-in local interval for the EBB.
+//! - If the live range became live-in to the EBB, add the EBB to a work-list.
+//! - While the work-list is non-empty pop a live-in EBB and repeat the two steps above, using each
+//!   of the live-in EBB's CFG predecessor instructions as a 'use'.
+//!
+//! The effect of this algorithm is to extend the live range of each to reach uses as they are
+//! visited. No data about each value beyond the live range is needed between visiting uses, so
+//! nothing is lost by computing the live range of all values simultaneously.
+//!
+//! ## Cache efficiency of Cranelift vs LLVM
+//!
+//! Since LLVM computes the complete live range of a virtual register in one go, it can keep the
+//! whole `LiveInterval` for the register in L1 cache. Since it is visiting the instructions in use
+//! chain order, some cache thrashing can occur as a result of pulling instructions into cache
+//! somewhat chaotically.
+//!
+//! Cranelift uses a transposed algorithm, visiting instructions in order. This means that each
+//! instruction is brought into cache only once, and it is likely that the other instructions on
+//! the same cache line will be visited before the line is evicted.
+//!
+//! Cranelift's problem is that the `LiveRange` structs are visited many times and not always
+//! regularly. We should strive to make the `LiveRange` struct as small as possible such that
+//! multiple related values can live on the same cache line.
+//!
+//! - Local values should fit in a 16-byte `LiveRange` struct or smaller. The current
+//!   implementation contains a 24-byte `Vec` object and a redundant `value` member pushing the
+//!   size to 32 bytes.
+//! - Related values should be stored on the same cache line. The current sparse set implementation
+//!   does a decent job of that.
+//! - For global values, the list of live-in intervals is very likely to fit on a single cache
+//!   line. These lists are very likely to be found in L2 cache at least.
+//!
+//! There is some room for improvement.
+
+use crate::entity::SparseMap;
+use crate::flowgraph::{BasicBlock, ControlFlowGraph};
+use crate::ir::dfg::ValueDef;
+use crate::ir::{Ebb, Function, Inst, Layout, ProgramPoint, Value};
+use crate::isa::{EncInfo, OperandConstraint, TargetIsa};
+use crate::regalloc::affinity::Affinity;
+use crate::regalloc::liverange::{LiveRange, LiveRangeContext, LiveRangeForest};
+use crate::timing;
+use core::mem;
+use core::ops::Index;
+use std::vec::Vec;
+
+/// A set of live ranges, indexed by value number.
+type LiveRangeSet = SparseMap<Value, LiveRange>;
+
+/// Get a mutable reference to the live range for `value`.
+/// Create it if necessary.
+fn get_or_create<'a>(
+    lrset: &'a mut LiveRangeSet,
+    value: Value,
+    isa: &TargetIsa,
+    func: &Function,
+    encinfo: &EncInfo,
+) -> &'a mut LiveRange {
+    // It would be better to use `get_mut()` here, but that leads to borrow checker fighting
+    // which can probably only be resolved by non-lexical lifetimes.
+    // https://github.com/rust-lang/rfcs/issues/811
+    if lrset.get(value).is_none() {
+        // Create a live range for value. We need the program point that defines it.
+        let def;
+        let affinity;
+        match func.dfg.value_def(value) {
+            ValueDef::Result(inst, rnum) => {
+                def = inst.into();
+                // Initialize the affinity from the defining instruction's result constraints.
+                // Don't do this for call return values which are always tied to a single register.
+                affinity = encinfo
+                    .operand_constraints(func.encodings[inst])
+                    .and_then(|rc| rc.outs.get(rnum))
+                    .map(Affinity::new)
+                    .or_else(|| {
+                        // If this is a call, get the return value affinity.
+                        func.dfg
+                            .call_signature(inst)
+                            .map(|sig| Affinity::abi(&func.dfg.signatures[sig].returns[rnum], isa))
+                    })
+                    .unwrap_or_default();
+            }
+            ValueDef::Param(ebb, num) => {
+                def = ebb.into();
+                if func.layout.entry_block() == Some(ebb) {
+                    // The affinity for entry block parameters can be inferred from the function
+                    // signature.
+                    affinity = Affinity::abi(&func.signature.params[num], isa);
+                } else {
+                    // Give normal EBB parameters a register affinity matching their type.
+                    let rc = isa.regclass_for_abi_type(func.dfg.value_type(value));
+                    affinity = Affinity::Reg(rc.into());
+                }
+            }
+        };
+        lrset.insert(LiveRange::new(value, def, affinity));
+    }
+    lrset.get_mut(value).unwrap()
+}
+
+/// Extend the live range for `value` so it reaches `to` which must live in `ebb`.
+fn extend_to_use(
+    lr: &mut LiveRange,
+    ebb: Ebb,
+    to: Inst,
+    worklist: &mut Vec<Ebb>,
+    func: &Function,
+    cfg: &ControlFlowGraph,
+    forest: &mut LiveRangeForest,
+) {
+    // This is our scratch working space, and we'll leave it empty when we return.
+    debug_assert!(worklist.is_empty());
+
+    // Extend the range locally in `ebb`.
+    // If there already was a live interval in that block, we're done.
+    if lr.extend_in_ebb(ebb, to, &func.layout, forest) {
+        worklist.push(ebb);
+    }
+
+    // The work list contains those EBBs where we have learned that the value needs to be
+    // live-in.
+    //
+    // This algorithm becomes a depth-first traversal up the CFG, enumerating all paths through the
+    // CFG from the existing live range to `ebb`.
+    //
+    // Extend the live range as we go. The live range itself also serves as a visited set since
+    // `extend_in_ebb` will never return true twice for the same EBB.
+    //
+    while let Some(livein) = worklist.pop() {
+        // We've learned that the value needs to be live-in to the `livein` EBB.
+        // Make sure it is also live at all predecessor branches to `livein`.
+        for BasicBlock {
+            ebb: pred,
+            inst: branch,
+        } in cfg.pred_iter(livein)
+        {
+            if lr.extend_in_ebb(pred, branch, &func.layout, forest) {
+                // This predecessor EBB also became live-in. We need to process it later.
+                worklist.push(pred);
+            }
+        }
+    }
+}
+
+/// Liveness analysis for a function.
+///
+/// Compute a live range for every SSA value used in the function.
+pub struct Liveness {
+    /// The live ranges that have been computed so far.
+    ranges: LiveRangeSet,
+
+    /// Memory pool for the live ranges.
+    forest: LiveRangeForest,
+
+    /// Working space for the `extend_to_use` algorithm.
+    /// This vector is always empty, except for inside that function.
+    /// It lives here to avoid repeated allocation of scratch memory.
+    worklist: Vec<Ebb>,
+}
+
+impl Liveness {
+    /// Create a new empty liveness analysis.
+    ///
+    /// The memory allocated for this analysis can be reused for multiple functions. Use the
+    /// `compute` method to actually runs the analysis for a function.
+    pub fn new() -> Self {
+        Self {
+            ranges: LiveRangeSet::new(),
+            forest: LiveRangeForest::new(),
+            worklist: Vec::new(),
+        }
+    }
+
+    /// Get a context needed for working with a `LiveRange`.
+    pub fn context<'a>(&'a self, layout: &'a Layout) -> LiveRangeContext<'a, Layout> {
+        LiveRangeContext::new(layout, &self.forest)
+    }
+
+    /// Clear all data structures in this liveness analysis.
+    pub fn clear(&mut self) {
+        self.ranges.clear();
+        self.forest.clear();
+        self.worklist.clear();
+    }
+
+    /// Get the live range for `value`, if it exists.
+    pub fn get(&self, value: Value) -> Option<&LiveRange> {
+        self.ranges.get(value)
+    }
+
+    /// Create a new live range for `value`.
+    ///
+    /// The new live range will be defined at `def` with no extent, like a dead value.
+    ///
+    /// This asserts that `value` does not have an existing live range.
+    pub fn create_dead<PP>(&mut self, value: Value, def: PP, affinity: Affinity)
+    where
+        PP: Into<ProgramPoint>,
+    {
+        let old = self
+            .ranges
+            .insert(LiveRange::new(value, def.into(), affinity));
+        debug_assert!(old.is_none(), "{} already has a live range", value);
+    }
+
+    /// Move the definition of `value` to `def`.
+    ///
+    /// The old and new def points must be in the same EBB, and before the end of the live range.
+    pub fn move_def_locally<PP>(&mut self, value: Value, def: PP)
+    where
+        PP: Into<ProgramPoint>,
+    {
+        let lr = self.ranges.get_mut(value).expect("Value has no live range");
+        lr.move_def_locally(def.into());
+    }
+
+    /// Locally extend the live range for `value` to reach `user`.
+    ///
+    /// It is assumed the `value` is already live before `user` in `ebb`.
+    ///
+    /// Returns a mutable reference to the value's affinity in case that also needs to be updated.
+    pub fn extend_locally(
+        &mut self,
+        value: Value,
+        ebb: Ebb,
+        user: Inst,
+        layout: &Layout,
+    ) -> &mut Affinity {
+        debug_assert_eq!(Some(ebb), layout.inst_ebb(user));
+        let lr = self.ranges.get_mut(value).expect("Value has no live range");
+        let livein = lr.extend_in_ebb(ebb, user, layout, &mut self.forest);
+        debug_assert!(!livein, "{} should already be live in {}", value, ebb);
+        &mut lr.affinity
+    }
+
+    /// Change the affinity of `value` to `Stack` and return the previous affinity.
+    pub fn spill(&mut self, value: Value) -> Affinity {
+        let lr = self.ranges.get_mut(value).expect("Value has no live range");
+        mem::replace(&mut lr.affinity, Affinity::Stack)
+    }
+
+    /// Compute the live ranges of all SSA values used in `func`.
+    /// This clears out any existing analysis stored in this data structure.
+    pub fn compute(&mut self, isa: &TargetIsa, func: &mut Function, cfg: &ControlFlowGraph) {
+        let _tt = timing::ra_liveness();
+        self.ranges.clear();
+
+        // Get ISA data structures used for computing live range affinities.
+        let encinfo = isa.encoding_info();
+        let reginfo = isa.register_info();
+
+        // The liveness computation needs to visit all uses, but the order doesn't matter.
+        // TODO: Perhaps this traversal of the function could be combined with a dead code
+        // elimination pass if we visit a post-order of the dominator tree?
+        // TODO: Resolve value aliases while we're visiting instructions?
+        for ebb in func.layout.ebbs() {
+            // Make sure we have created live ranges for dead EBB parameters.
+            // TODO: If these parameters are really dead, we could remove them, except for the
+            // entry block which must match the function signature.
+            for &arg in func.dfg.ebb_params(ebb) {
+                get_or_create(&mut self.ranges, arg, isa, func, &encinfo);
+            }
+
+            for inst in func.layout.ebb_insts(ebb) {
+                // Eliminate all value aliases, they would confuse the register allocator.
+                func.dfg.resolve_aliases_in_arguments(inst);
+
+                // Make sure we have created live ranges for dead defs.
+                // TODO: When we implement DCE, we can use the absence of a live range to indicate
+                // an unused value.
+                for &def in func.dfg.inst_results(inst) {
+                    get_or_create(&mut self.ranges, def, isa, func, &encinfo);
+                }
+
+                // Iterator of constraints, one per value operand.
+                let encoding = func.encodings[inst];
+                let operand_constraint_slice: &[OperandConstraint] =
+                    encinfo.operand_constraints(encoding).map_or(&[], |c| c.ins);
+                let mut operand_constraints = operand_constraint_slice.iter();
+
+                for &arg in func.dfg.inst_args(inst) {
+                    // Get the live range, create it as a dead range if necessary.
+                    let lr = get_or_create(&mut self.ranges, arg, isa, func, &encinfo);
+
+                    // Extend the live range to reach this use.
+                    extend_to_use(
+                        lr,
+                        ebb,
+                        inst,
+                        &mut self.worklist,
+                        func,
+                        cfg,
+                        &mut self.forest,
+                    );
+
+                    // Apply operand constraint, ignoring any variable arguments after the fixed
+                    // operands described by `operand_constraints`. Variable arguments are either
+                    // EBB arguments or call/return ABI arguments.
+                    if let Some(constraint) = operand_constraints.next() {
+                        lr.affinity.merge(constraint, &reginfo);
+                    }
+                }
+            }
+        }
+    }
+}
+
+impl Index<Value> for Liveness {
+    type Output = LiveRange;
+
+    fn index(&self, index: Value) -> &LiveRange {
+        match self.ranges.get(index) {
+            Some(lr) => lr,
+            None => panic!("{} has no live range", index),
+        }
+    }
+}
--- a/cranelift/codegen/src/regalloc/liverange.rs
+++ b/cranelift/codegen/src/regalloc/liverange.rs
@@ -0,0 +1,745 @@
+//! Data structure representing the live range of an SSA value.
+//!
+//! Live ranges are tracked per SSA value, not per variable or virtual register. The live range of
+//! an SSA value begins where it is defined and extends to all program points where the value is
+//! still needed.
+//!
+//! # Local Live Ranges
+//!
+//! Inside a single extended basic block, the live range of a value is always an interval between
+//! two program points (if the value is live in the EBB at all). The starting point is either:
+//!
+//! 1. The instruction that defines the value, or
+//! 2. The EBB header, because the value is an argument to the EBB, or
+//! 3. The EBB header, because the value is defined in another EBB and live-in to this one.
+//!
+//! The ending point of the local live range is the last of the following program points in the
+//! EBB:
+//!
+//! 1. The last use in the EBB, where a *use* is an instruction that has the value as an argument.
+//! 2. The last branch or jump instruction in the EBB that can reach a use.
+//! 3. If the value has no uses anywhere (a *dead value*), the program point that defines it.
+//!
+//! Note that 2. includes loop back-edges to the same EBB. In general, if a value is defined
+//! outside a loop and used inside the loop, it will be live in the entire loop.
+//!
+//! # Global Live Ranges
+//!
+//! Values that appear in more than one EBB have a *global live range* which can be seen as the
+//! disjoint union of the per-EBB local intervals for all of the EBBs where the value is live.
+//! Together with a `ProgramOrder` which provides a linear ordering of the EBBs, the global live
+//! range becomes a linear sequence of disjoint intervals, at most one per EBB.
+//!
+//! In the special case of a dead value, the global live range is a single interval where the start
+//! and end points are the same. The global live range of a value is never completely empty.
+//!
+//! # Register interference
+//!
+//! The register allocator uses live ranges to determine if values *interfere*, which means that
+//! they can't be stored in the same register. Two live ranges interfere if and only if any of
+//! their intervals overlap.
+//!
+//! If one live range ends at an instruction that defines another live range, those two live ranges
+//! are not considered to interfere. This is because most ISAs allow instructions to reuse an input
+//! register for an output value. If Cranelift gets support for inline assembly, we will need to
+//! handle *early clobbers* which are output registers that are not allowed to alias any input
+//! registers.
+//!
+//! If `i1 < i2 < i3` are program points, we have:
+//!
+//! - `i1-i2` and `i1-i3` interfere because the intervals overlap.
+//! - `i1-i2` and `i2-i3` don't interfere.
+//! - `i1-i3` and `i2-i2` do interfere because the dead def would clobber the register.
+//! - `i1-i2` and `i2-i2` don't interfere.
+//! - `i2-i3` and `i2-i2` do interfere.
+//!
+//! Because of this behavior around interval end points, live range interference is not completely
+//! equivalent to mathematical intersection of open or half-open intervals.
+//!
+//! # Implementation notes
+//!
+//! A few notes about the implementation of this data structure. This should not concern someone
+//! only looking to use the public interface.
+//!
+//! ## EBB ordering
+//!
+//! The relative order of EBBs is used to maintain a sorted list of live-in intervals and to
+//! coalesce adjacent live-in intervals when the prior interval covers the whole EBB. This doesn't
+//! depend on any property of the program order, so alternative orderings are possible:
+//!
+//! 1. The EBB layout order. This is what we currently use.
+//! 2. A topological order of the dominator tree. All the live-in intervals would come after the
+//!    def interval.
+//! 3. A numerical order by EBB number. Performant because it doesn't need to indirect through the
+//!    `ProgramOrder` for comparisons.
+//!
+//! These orderings will cause small differences in coalescing opportunities, but all of them would
+//! do a decent job of compressing a long live range. The numerical order might be preferable
+//! because:
+//!
+//! - It has better performance because EBB numbers can be compared directly without any table
+//!   lookups.
+//! - If EBB numbers are not reused, it is safe to allocate new EBBs without getting spurious
+//!   live-in intervals from any coalesced representations that happen to cross a new EBB.
+//!
+//! For comparing instructions, the layout order is always what we want.
+//!
+//! ## Alternative representation
+//!
+//! Since a local live-in interval always begins at its EBB header, it is uniquely described by its
+//! end point instruction alone. We can use the layout to look up the EBB containing the end point.
+//! This means that a sorted `Vec<Inst>` would be enough to represent the set of live-in intervals.
+//!
+//! Coalescing is an important compression technique because some live ranges can span thousands of
+//! EBBs. We can represent that by switching to a sorted `Vec<ProgramPoint>` representation where
+//! an `[Ebb, Inst]` pair represents a coalesced range, while an `Inst` entry without a preceding
+//! `Ebb` entry represents a single live-in interval.
+//!
+//! This representation is more compact for a live range with many uncoalesced live-in intervals.
+//! It is more complicated to work with, though, so it is probably not worth it. The performance
+//! benefits of switching to a numerical EBB order only appears if the binary search is doing
+//! EBB-EBB comparisons.
+//!
+//! ## B-tree representation
+//!
+//! A `BTreeMap<Ebb, Inst>` could also be used for the live-in intervals. It looks like the
+//! standard library B-tree doesn't provide the necessary interface for an efficient implementation
+//! of coalescing, so we would need to roll our own.
+//!
+
+use crate::bforest;
+use crate::entity::SparseMapValue;
+use crate::ir::{Ebb, ExpandedProgramPoint, Inst, Layout, ProgramOrder, ProgramPoint, Value};
+use crate::regalloc::affinity::Affinity;
+use core::cmp::Ordering;
+use core::marker::PhantomData;
+
+/// Global live range of a single SSA value.
+///
+/// As [explained in the module documentation](index.html#local-live-ranges), the live range of an
+/// SSA value is the disjoint union of a set of intervals, each local to a single EBB, and with at
+/// most one interval per EBB. We further distinguish between:
+///
+/// 1. The *def interval* is the local interval in the EBB where the value is defined, and
+/// 2. The *live-in intervals* are the local intervals in the remaining EBBs.
+///
+/// A live-in interval always begins at the EBB header, while the def interval can begin at the
+/// defining instruction, or at the EBB header for an EBB argument value.
+///
+/// All values have a def interval, but a large proportion of values don't have any live-in
+/// intervals. These are called *local live ranges*.
+///
+/// # Program order requirements
+///
+/// The internal representation of a `LiveRange` depends on a consistent `ProgramOrder` both for
+/// ordering instructions inside an EBB *and* for ordering EBBs. The methods that depend on the
+/// ordering take an explicit `ProgramOrder` object, and it is the caller's responsibility to
+/// ensure that the provided ordering is consistent between calls.
+///
+/// In particular, changing the order of EBBs or inserting new EBBs will invalidate live ranges.
+///
+/// Inserting new instructions in the layout is safe, but removing instructions is not. Besides the
+/// instructions using or defining their value, `LiveRange` structs can contain references to
+/// branch and jump instructions.
+pub type LiveRange = GenLiveRange<Layout>;
+
+/// Generic live range implementation.
+///
+/// The intended generic parameter is `PO=Layout`, but tests are simpler with a mock order.
+/// Use `LiveRange` instead of using this generic directly.
+pub struct GenLiveRange<PO: ProgramOrder> {
+    /// The value described by this live range.
+    /// This member can't be modified in case the live range is stored in a `SparseMap`.
+    value: Value,
+
+    /// The preferred register allocation for this value.
+    pub affinity: Affinity,
+
+    /// The instruction or EBB header where this value is defined.
+    def_begin: ProgramPoint,
+
+    /// The end point of the def interval. This must always belong to the same EBB as `def_begin`.
+    ///
+    /// We always have `def_begin <= def_end` with equality implying a dead def live range with no
+    /// uses.
+    def_end: ProgramPoint,
+
+    /// Additional live-in intervals sorted in program order.
+    ///
+    /// This map is empty for most values which are only used in one EBB.
+    ///
+    /// A map entry `ebb -> inst` means that the live range is live-in to `ebb`, continuing up to
+    /// `inst` which may belong to a later EBB in the program order.
+    ///
+    /// The entries are non-overlapping, and none of them overlap the EBB where the value is
+    /// defined.
+    liveins: bforest::Map<Ebb, Inst>,
+
+    po: PhantomData<*const PO>,
+}
+
+/// Context information needed to query a `LiveRange`.
+pub struct LiveRangeContext<'a, PO: 'a + ProgramOrder> {
+    /// Ordering of EBBs.
+    pub order: &'a PO,
+    /// Memory pool.
+    pub forest: &'a bforest::MapForest<Ebb, Inst>,
+}
+
+impl<'a, PO: ProgramOrder> LiveRangeContext<'a, PO> {
+    /// Make a new context.
+    pub fn new(order: &'a PO, forest: &'a bforest::MapForest<Ebb, Inst>) -> Self {
+        Self { order, forest }
+    }
+}
+
+impl<'a, PO: ProgramOrder> Clone for LiveRangeContext<'a, PO> {
+    fn clone(&self) -> Self {
+        LiveRangeContext {
+            order: self.order,
+            forest: self.forest,
+        }
+    }
+}
+
+impl<'a, PO: ProgramOrder> Copy for LiveRangeContext<'a, PO> {}
+
+/// Forest of B-trees used for storing live ranges.
+pub type LiveRangeForest = bforest::MapForest<Ebb, Inst>;
+
+struct Cmp<'a, PO: ProgramOrder + 'a>(&'a PO);
+
+impl<'a, PO: ProgramOrder> bforest::Comparator<Ebb> for Cmp<'a, PO> {
+    fn cmp(&self, a: Ebb, b: Ebb) -> Ordering {
+        self.0.cmp(a, b)
+    }
+}
+
+impl<PO: ProgramOrder> GenLiveRange<PO> {
+    /// Create a new live range for `value` defined at `def`.
+    ///
+    /// The live range will be created as dead, but it can be extended with `extend_in_ebb()`.
+    pub fn new(value: Value, def: ProgramPoint, affinity: Affinity) -> Self {
+        Self {
+            value,
+            affinity,
+            def_begin: def,
+            def_end: def,
+            liveins: bforest::Map::new(),
+            po: PhantomData,
+        }
+    }
+
+    /// Extend the local interval for `ebb` so it reaches `to` which must belong to `ebb`.
+    /// Create a live-in interval if necessary.
+    ///
+    /// If the live range already has a local interval in `ebb`, extend its end point so it
+    /// includes `to`, and return false.
+    ///
+    /// If the live range did not previously have a local interval in `ebb`, add one so the value
+    /// is live-in to `ebb`, extending to `to`. Return true.
+    ///
+    /// The return value can be used to detect if we just learned that the value is live-in to
+    /// `ebb`. This can trigger recursive extensions in `ebb`'s CFG predecessor blocks.
+    pub fn extend_in_ebb(
+        &mut self,
+        ebb: Ebb,
+        to: Inst,
+        order: &PO,
+        forest: &mut bforest::MapForest<Ebb, Inst>,
+    ) -> bool {
+        // First check if we're extending the def interval.
+        //
+        // We're assuming here that `to` never precedes `def_begin` in the same EBB, but we can't
+        // check it without a method for getting `to`'s EBB.
+        if order.cmp(ebb, self.def_end) != Ordering::Greater
+            && order.cmp(to, self.def_begin) != Ordering::Less
+        {
+            let to_pp = to.into();
+            debug_assert_ne!(
+                to_pp, self.def_begin,
+                "Can't use value in the defining instruction."
+            );
+            if order.cmp(to, self.def_end) == Ordering::Greater {
+                self.def_end = to_pp;
+            }
+            return false;
+        }
+
+        // Now check if we're extending any of the existing live-in intervals.
+        let cmp = Cmp(order);
+        let mut c = self.liveins.cursor(forest, &cmp);
+        let first_time_livein;
+
+        if let Some(end) = c.goto(ebb) {
+            // There's an interval beginning at `ebb`. See if it extends.
+            first_time_livein = false;
+            if order.cmp(end, to) == Ordering::Less {
+                *c.value_mut().unwrap() = to;
+            } else {
+                return first_time_livein;
+            }
+        } else if let Some((_, end)) = c.prev() {
+            // There's no interval beginning at `ebb`, but we could still be live-in at `ebb` with
+            // a coalesced interval that begins before and ends after.
+            if order.cmp(end, ebb) == Ordering::Greater {
+                // Yep, the previous interval overlaps `ebb`.
+                first_time_livein = false;
+                if order.cmp(end, to) == Ordering::Less {
+                    *c.value_mut().unwrap() = to;
+                } else {
+                    return first_time_livein;
+                }
+            } else {
+                first_time_livein = true;
+                // The current interval does not overlap `ebb`, but it may still be possible to
+                // coalesce with it.
+                if order.is_ebb_gap(end, ebb) {
+                    *c.value_mut().unwrap() = to;
+                } else {
+                    c.insert(ebb, to);
+                }
+            }
+        } else {
+            // There is no existing interval before `ebb`.
+            first_time_livein = true;
+            c.insert(ebb, to);
+        }
+
+        // Now `c` to left pointing at an interval that ends in `to`.
+        debug_assert_eq!(c.value(), Some(to));
+
+        // See if it can be coalesced with the following interval.
+        if let Some((next_ebb, next_end)) = c.next() {
+            if order.is_ebb_gap(to, next_ebb) {
+                // Remove this interval and extend the previous end point to `next_end`.
+                c.remove();
+                c.prev();
+                *c.value_mut().unwrap() = next_end;
+            }
+        }
+
+        first_time_livein
+    }
+
+    /// Is this the live range of a dead value?
+    ///
+    /// A dead value has no uses, and its live range ends at the same program point where it is
+    /// defined.
+    pub fn is_dead(&self) -> bool {
+        self.def_begin == self.def_end
+    }
+
+    /// Is this a local live range?
+    ///
+    /// A local live range is only used in the same EBB where it was defined. It is allowed to span
+    /// multiple basic blocks within that EBB.
+    pub fn is_local(&self) -> bool {
+        self.liveins.is_empty()
+    }
+
+    /// Get the program point where this live range is defined.
+    ///
+    /// This will be an EBB header when the value is an EBB argument, otherwise it is the defining
+    /// instruction.
+    pub fn def(&self) -> ProgramPoint {
+        self.def_begin
+    }
+
+    /// Move the definition of this value to a new program point.
+    ///
+    /// It is only valid to move the definition within the same EBB, and it can't be moved beyond
+    /// `def_local_end()`.
+    pub fn move_def_locally(&mut self, def: ProgramPoint) {
+        self.def_begin = def;
+    }
+
+    /// Get the local end-point of this live range in the EBB where it is defined.
+    ///
+    /// This can be the EBB header itself in the case of a dead EBB argument.
+    /// Otherwise, it will be the last local use or branch/jump that can reach a use.
+    pub fn def_local_end(&self) -> ProgramPoint {
+        self.def_end
+    }
+
+    /// Get the local end-point of this live range in an EBB where it is live-in.
+    ///
+    /// If this live range is not live-in to `ebb`, return `None`. Otherwise, return the end-point
+    /// of this live range's local interval in `ebb`.
+    ///
+    /// If the live range is live through all of `ebb`, the terminator of `ebb` is a correct
+    /// answer, but it is also possible that an even later program point is returned. So don't
+    /// depend on the returned `Inst` to belong to `ebb`.
+    pub fn livein_local_end(&self, ebb: Ebb, ctx: LiveRangeContext<PO>) -> Option<Inst> {
+        let cmp = Cmp(ctx.order);
+        self.liveins
+            .get_or_less(ebb, ctx.forest, &cmp)
+            .and_then(|(_, inst)| {
+                // We have an entry that ends at `inst`.
+                if ctx.order.cmp(inst, ebb) == Ordering::Greater {
+                    Some(inst)
+                } else {
+                    None
+                }
+            })
+    }
+
+    /// Is this value live-in to `ebb`?
+    ///
+    /// An EBB argument is not considered to be live in.
+    pub fn is_livein(&self, ebb: Ebb, ctx: LiveRangeContext<PO>) -> bool {
+        self.livein_local_end(ebb, ctx).is_some()
+    }
+
+    /// Get all the live-in intervals.
+    ///
+    /// Note that the intervals are stored in a compressed form so each entry may span multiple
+    /// EBBs where the value is live in.
+    pub fn liveins<'a>(&'a self, ctx: LiveRangeContext<'a, PO>) -> bforest::MapIter<'a, Ebb, Inst> {
+        self.liveins.iter(ctx.forest)
+    }
+
+    /// Check if this live range overlaps a definition in `ebb`.
+    pub fn overlaps_def(
+        &self,
+        def: ExpandedProgramPoint,
+        ebb: Ebb,
+        ctx: LiveRangeContext<PO>,
+    ) -> bool {
+        // Two defs at the same program point always overlap, even if one is dead.
+        if def == self.def_begin.into() {
+            return true;
+        }
+
+        // Check for an overlap with the local range.
+        if ctx.order.cmp(def, self.def_begin) != Ordering::Less
+            && ctx.order.cmp(def, self.def_end) == Ordering::Less
+        {
+            return true;
+        }
+
+        // Check for an overlap with a live-in range.
+        match self.livein_local_end(ebb, ctx) {
+            Some(inst) => ctx.order.cmp(def, inst) == Ordering::Less,
+            None => false,
+        }
+    }
+
+    /// Check if this live range reaches a use at `user` in `ebb`.
+    pub fn reaches_use(&self, user: Inst, ebb: Ebb, ctx: LiveRangeContext<PO>) -> bool {
+        // Check for an overlap with the local range.
+        if ctx.order.cmp(user, self.def_begin) == Ordering::Greater
+            && ctx.order.cmp(user, self.def_end) != Ordering::Greater
+        {
+            return true;
+        }
+
+        // Check for an overlap with a live-in range.
+        match self.livein_local_end(ebb, ctx) {
+            Some(inst) => ctx.order.cmp(user, inst) != Ordering::Greater,
+            None => false,
+        }
+    }
+
+    /// Check if this live range is killed at `user` in `ebb`.
+    pub fn killed_at(&self, user: Inst, ebb: Ebb, ctx: LiveRangeContext<PO>) -> bool {
+        self.def_local_end() == user.into() || self.livein_local_end(ebb, ctx) == Some(user)
+    }
+}
+
+/// Allow a `LiveRange` to be stored in a `SparseMap` indexed by values.
+impl<PO: ProgramOrder> SparseMapValue<Value> for GenLiveRange<PO> {
+    fn key(&self) -> Value {
+        self.value
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::{GenLiveRange, LiveRangeContext};
+    use crate::bforest;
+    use crate::entity::EntityRef;
+    use crate::ir::{Ebb, Inst, Value};
+    use crate::ir::{ExpandedProgramPoint, ProgramOrder};
+    use core::cmp::Ordering;
+    use std::vec::Vec;
+
+    // Dummy program order which simply compares indexes.
+    // It is assumed that EBBs have indexes that are multiples of 10, and instructions have indexes
+    // in between. `is_ebb_gap` assumes that terminator instructions have indexes of the form
+    // ebb * 10 + 1. This is used in the coalesce test.
+    struct ProgOrder {}
+
+    impl ProgramOrder for ProgOrder {
+        fn cmp<A, B>(&self, a: A, b: B) -> Ordering
+        where
+            A: Into<ExpandedProgramPoint>,
+            B: Into<ExpandedProgramPoint>,
+        {
+            fn idx(pp: ExpandedProgramPoint) -> usize {
+                match pp {
+                    ExpandedProgramPoint::Inst(i) => i.index(),
+                    ExpandedProgramPoint::Ebb(e) => e.index(),
+                }
+            }
+
+            let ia = idx(a.into());
+            let ib = idx(b.into());
+            ia.cmp(&ib)
+        }
+
+        fn is_ebb_gap(&self, inst: Inst, ebb: Ebb) -> bool {
+            inst.index() % 10 == 1 && ebb.index() / 10 == inst.index() / 10 + 1
+        }
+    }
+
+    impl ProgOrder {
+        // Get the EBB corresponding to `inst`.
+        fn inst_ebb(&self, inst: Inst) -> Ebb {
+            let i = inst.index();
+            Ebb::new(i - i % 10)
+        }
+
+        // Get the EBB of a program point.
+        fn pp_ebb<PP: Into<ExpandedProgramPoint>>(&self, pp: PP) -> Ebb {
+            match pp.into() {
+                ExpandedProgramPoint::Inst(i) => self.inst_ebb(i),
+                ExpandedProgramPoint::Ebb(e) => e,
+            }
+        }
+
+        // Validate the live range invariants.
+        fn validate(&self, lr: &GenLiveRange<ProgOrder>, forest: &bforest::MapForest<Ebb, Inst>) {
+            // The def interval must cover a single EBB.
+            let def_ebb = self.pp_ebb(lr.def_begin);
+            assert_eq!(def_ebb, self.pp_ebb(lr.def_end));
+
+            // Check that the def interval isn't backwards.
+            match self.cmp(lr.def_begin, lr.def_end) {
+                Ordering::Equal => assert!(lr.liveins.is_empty()),
+                Ordering::Greater => {
+                    panic!("Backwards def interval: {}-{}", lr.def_begin, lr.def_end)
+                }
+                Ordering::Less => {}
+            }
+
+            // Check the live-in intervals.
+            let mut prev_end = None;
+            for (begin, end) in lr.liveins.iter(forest) {
+                assert_eq!(self.cmp(begin, end), Ordering::Less);
+                if let Some(e) = prev_end {
+                    assert_eq!(self.cmp(e, begin), Ordering::Less);
+                }
+
+                assert!(
+                    self.cmp(lr.def_end, begin) == Ordering::Less
+                        || self.cmp(lr.def_begin, end) == Ordering::Greater,
+                    "Interval can't overlap the def EBB"
+                );
+
+                // Save for next round.
+                prev_end = Some(end);
+            }
+        }
+    }
+
+    // Singleton `ProgramOrder` for tests below.
+    const PO: &'static ProgOrder = &ProgOrder {};
+
+    #[test]
+    fn dead_def_range() {
+        let v0 = Value::new(0);
+        let e0 = Ebb::new(0);
+        let i1 = Inst::new(1);
+        let i2 = Inst::new(2);
+        let e2 = Ebb::new(2);
+        let lr = GenLiveRange::new(v0, i1.into(), Default::default());
+        let forest = &bforest::MapForest::new();
+        let ctx = LiveRangeContext::new(PO, forest);
+        assert!(lr.is_dead());
+        assert!(lr.is_local());
+        assert_eq!(lr.def(), i1.into());
+        assert_eq!(lr.def_local_end(), i1.into());
+        assert_eq!(lr.livein_local_end(e2, ctx), None);
+        PO.validate(&lr, ctx.forest);
+
+        // A dead live range overlaps its own def program point.
+        assert!(lr.overlaps_def(i1.into(), e0, ctx));
+        assert!(!lr.overlaps_def(i2.into(), e0, ctx));
+        assert!(!lr.overlaps_def(e0.into(), e0, ctx));
+    }
+
+    #[test]
+    fn dead_arg_range() {
+        let v0 = Value::new(0);
+        let e2 = Ebb::new(2);
+        let lr = GenLiveRange::new(v0, e2.into(), Default::default());
+        let forest = &bforest::MapForest::new();
+        let ctx = LiveRangeContext::new(PO, forest);
+        assert!(lr.is_dead());
+        assert!(lr.is_local());
+        assert_eq!(lr.def(), e2.into());
+        assert_eq!(lr.def_local_end(), e2.into());
+        // The def interval of an EBB argument does not count as live-in.
+        assert_eq!(lr.livein_local_end(e2, ctx), None);
+        PO.validate(&lr, ctx.forest);
+    }
+
+    #[test]
+    fn local_def() {
+        let v0 = Value::new(0);
+        let e10 = Ebb::new(10);
+        let i11 = Inst::new(11);
+        let i12 = Inst::new(12);
+        let i13 = Inst::new(13);
+        let mut lr = GenLiveRange::new(v0, i11.into(), Default::default());
+        let forest = &mut bforest::MapForest::new();
+
+        assert_eq!(lr.extend_in_ebb(e10, i13, PO, forest), false);
+        PO.validate(&lr, forest);
+        assert!(!lr.is_dead());
+        assert!(lr.is_local());
+        assert_eq!(lr.def(), i11.into());
+        assert_eq!(lr.def_local_end(), i13.into());
+
+        // Extending to an already covered inst should not change anything.
+        assert_eq!(lr.extend_in_ebb(e10, i12, PO, forest), false);
+        PO.validate(&lr, forest);
+        assert_eq!(lr.def(), i11.into());
+        assert_eq!(lr.def_local_end(), i13.into());
+    }
+
+    #[test]
+    fn local_arg() {
+        let v0 = Value::new(0);
+        let e10 = Ebb::new(10);
+        let i11 = Inst::new(11);
+        let i12 = Inst::new(12);
+        let i13 = Inst::new(13);
+        let mut lr = GenLiveRange::new(v0, e10.into(), Default::default());
+        let forest = &mut bforest::MapForest::new();
+
+        // Extending a dead EBB argument in its own block should not indicate that a live-in
+        // interval was created.
+        assert_eq!(lr.extend_in_ebb(e10, i12, PO, forest), false);
+        PO.validate(&lr, forest);
+        assert!(!lr.is_dead());
+        assert!(lr.is_local());
+        assert_eq!(lr.def(), e10.into());
+        assert_eq!(lr.def_local_end(), i12.into());
+
+        // Extending to an already covered inst should not change anything.
+        assert_eq!(lr.extend_in_ebb(e10, i11, PO, forest), false);
+        PO.validate(&lr, forest);
+        assert_eq!(lr.def(), e10.into());
+        assert_eq!(lr.def_local_end(), i12.into());
+
+        // Extending further.
+        assert_eq!(lr.extend_in_ebb(e10, i13, PO, forest), false);
+        PO.validate(&lr, forest);
+        assert_eq!(lr.def(), e10.into());
+        assert_eq!(lr.def_local_end(), i13.into());
+    }
+
+    #[test]
+    fn global_def() {
+        let v0 = Value::new(0);
+        let e10 = Ebb::new(10);
+        let i11 = Inst::new(11);
+        let i12 = Inst::new(12);
+        let e20 = Ebb::new(20);
+        let i21 = Inst::new(21);
+        let i22 = Inst::new(22);
+        let i23 = Inst::new(23);
+        let mut lr = GenLiveRange::new(v0, i11.into(), Default::default());
+        let forest = &mut bforest::MapForest::new();
+
+        assert_eq!(lr.extend_in_ebb(e10, i12, PO, forest), false);
+
+        // Adding a live-in interval.
+        assert_eq!(lr.extend_in_ebb(e20, i22, PO, forest), true);
+        PO.validate(&lr, forest);
+        assert_eq!(
+            lr.livein_local_end(e20, LiveRangeContext::new(PO, forest)),
+            Some(i22)
+        );
+
+        // Non-extending the live-in.
+        assert_eq!(lr.extend_in_ebb(e20, i21, PO, forest), false);
+        assert_eq!(
+            lr.livein_local_end(e20, LiveRangeContext::new(PO, forest)),
+            Some(i22)
+        );
+
+        // Extending the existing live-in.
+        assert_eq!(lr.extend_in_ebb(e20, i23, PO, forest), false);
+        PO.validate(&lr, forest);
+        assert_eq!(
+            lr.livein_local_end(e20, LiveRangeContext::new(PO, forest)),
+            Some(i23)
+        );
+    }
+
+    #[test]
+    fn coalesce() {
+        let v0 = Value::new(0);
+        let i11 = Inst::new(11);
+        let e20 = Ebb::new(20);
+        let i21 = Inst::new(21);
+        let e30 = Ebb::new(30);
+        let i31 = Inst::new(31);
+        let e40 = Ebb::new(40);
+        let i41 = Inst::new(41);
+        let mut lr = GenLiveRange::new(v0, i11.into(), Default::default());
+        let forest = &mut bforest::MapForest::new();
+
+        assert_eq!(lr.extend_in_ebb(e30, i31, PO, forest), true);
+        assert_eq!(
+            lr.liveins(LiveRangeContext::new(PO, forest))
+                .collect::<Vec<_>>(),
+            [(e30, i31)]
+        );
+
+        // Coalesce to previous
+        assert_eq!(lr.extend_in_ebb(e40, i41, PO, forest), true);
+        assert_eq!(
+            lr.liveins(LiveRangeContext::new(PO, forest))
+                .collect::<Vec<_>>(),
+            [(e30, i41)]
+        );
+
+        // Coalesce to next
+        assert_eq!(lr.extend_in_ebb(e20, i21, PO, forest), true);
+        assert_eq!(
+            lr.liveins(LiveRangeContext::new(PO, forest))
+                .collect::<Vec<_>>(),
+            [(e20, i41)]
+        );
+
+        let mut lr = GenLiveRange::new(v0, i11.into(), Default::default());
+
+        assert_eq!(lr.extend_in_ebb(e40, i41, PO, forest), true);
+        assert_eq!(
+            lr.liveins(LiveRangeContext::new(PO, forest))
+                .collect::<Vec<_>>(),
+            [(e40, i41)]
+        );
+
+        assert_eq!(lr.extend_in_ebb(e20, i21, PO, forest), true);
+        assert_eq!(
+            lr.liveins(LiveRangeContext::new(PO, forest))
+                .collect::<Vec<_>>(),
+            [(e20, i21), (e40, i41)]
+        );
+
+        // Coalesce to previous and next
+        assert_eq!(lr.extend_in_ebb(e30, i31, PO, forest), true);
+        assert_eq!(
+            lr.liveins(LiveRangeContext::new(PO, forest))
+                .collect::<Vec<_>>(),
+            [(e20, i41)]
+        );
+    }
+
+    // TODO: Add more tests that exercise the binary search algorithm.
+}
--- a/cranelift/codegen/src/regalloc/mod.rs
+++ b/cranelift/codegen/src/regalloc/mod.rs
@@ -0,0 +1,23 @@
+//! Register allocation.
+//!
+//! This module contains data structures and algorithms used for register allocation.
+
+pub mod coloring;
+pub mod live_value_tracker;
+pub mod liveness;
+pub mod liverange;
+pub mod register_set;
+pub mod virtregs;
+
+mod affinity;
+mod coalescing;
+mod context;
+mod diversion;
+mod pressure;
+mod reload;
+mod solver;
+mod spilling;
+
+pub use self::context::Context;
+pub use self::diversion::RegDiversions;
+pub use self::register_set::RegisterSet;
--- a/cranelift/codegen/src/regalloc/pressure.rs
+++ b/cranelift/codegen/src/regalloc/pressure.rs
@@ -0,0 +1,384 @@
+//! Register pressure tracking.
+//!
+//! SSA-based register allocation depends on a spilling phase that "lowers register pressure
+//! sufficiently". This module defines the data structures needed to measure register pressure
+//! accurately enough to guarantee that the coloring phase will not run out of registers.
+//!
+//! Ideally, measuring register pressure amounts to simply counting the number of live registers at
+//! any given program point. This simplistic method has two problems:
+//!
+//! 1. Registers are not interchangeable. Most ISAs have separate integer and floating-point
+//!    register banks, so we need to at least count the number of live registers in each register
+//!    bank separately.
+//!
+//! 2. Some ISAs have complicated register aliasing properties. In particular, the 32-bit ARM
+//!    ISA has a floating-point register bank where two 32-bit registers alias one 64-bit register.
+//!    This makes it difficult to accurately measure register pressure.
+//!
+//! This module deals with the problems via *register banks* and *top-level register classes*.
+//! Register classes in different register banks are completely independent, so we can count
+//! registers in one bank without worrying about the other bank at all.
+//!
+//! All register classes have a unique top-level register class, and we will count registers for
+//! each top-level register class individually. However, a register bank can have multiple
+//! top-level register classes that interfere with each other, so all top-level counts need to
+//! be considered when determining how many more registers can be allocated.
+//!
+//! Currently, the only register bank with multiple top-level registers is the `arm32`
+//! floating-point register bank which has `S`, `D`, and `Q` top-level classes.
+//!
+//! # Base and transient counts
+//!
+//! We maintain two separate register counts per top-level register class: base counts and
+//! transient counts. The base counts are adjusted with the `take` and `free` functions. The
+//! transient counts are adjusted with `take_transient` and `free_transient`.
+
+// Remove once we're using the pressure tracker.
+#![allow(dead_code)]
+
+use crate::isa::registers::{RegClass, RegClassMask, RegInfo, MAX_TRACKED_TOPRCS};
+use crate::regalloc::RegisterSet;
+use core::cmp::min;
+use core::fmt;
+use core::iter::ExactSizeIterator;
+
+/// Information per top-level register class.
+///
+/// Everything but the counts is static information computed from the constructor arguments.
+#[derive(Default)]
+struct TopRC {
+    // Number of registers currently used from this register class.
+    base_count: u32,
+    transient_count: u32,
+
+    // Max number of registers that can be allocated.
+    limit: u32,
+
+    // Register units per register.
+    width: u8,
+
+    // The first aliasing top-level RC.
+    first_toprc: u8,
+
+    // The number of aliasing top-level RCs.
+    num_toprcs: u8,
+}
+
+impl TopRC {
+    fn total_count(&self) -> u32 {
+        self.base_count + self.transient_count
+    }
+}
+
+pub struct Pressure {
+    // Bit mask of top-level register classes that are aliased by other top-level register classes.
+    // Unaliased register classes can use a simpler interference algorithm.
+    aliased: RegClassMask,
+
+    // Current register counts per top-level register class.
+    toprc: [TopRC; MAX_TRACKED_TOPRCS],
+}
+
+impl Pressure {
+    /// Create a new register pressure tracker.
+    pub fn new(reginfo: &RegInfo, usable: &RegisterSet) -> Self {
+        let mut p = Self {
+            aliased: 0,
+            toprc: Default::default(),
+        };
+
+        // Get the layout of aliasing top-level register classes from the register banks.
+        for bank in reginfo.banks {
+            let first = bank.first_toprc;
+            let num = bank.num_toprcs;
+
+            if bank.pressure_tracking {
+                for rc in &mut p.toprc[first..first + num] {
+                    rc.first_toprc = first as u8;
+                    rc.num_toprcs = num as u8;
+                }
+
+                // Flag the top-level register classes with aliases.
+                if num > 1 {
+                    p.aliased |= ((1 << num) - 1) << first;
+                }
+            } else {
+                // This bank has no pressure tracking, so its top-level register classes may exceed
+                // `MAX_TRACKED_TOPRCS`. Fill in dummy entries.
+                for rc in &mut p.toprc[first..min(first + num, MAX_TRACKED_TOPRCS)] {
+                    // These aren't used if we don't set the `aliased` bit.
+                    rc.first_toprc = !0;
+                    rc.limit = !0;
+                }
+            }
+        }
+
+        // Compute per-class limits from `usable`.
+        for (toprc, rc) in p
+            .toprc
+            .iter_mut()
+            .take_while(|t| t.num_toprcs > 0)
+            .zip(reginfo.classes)
+        {
+            toprc.limit = usable.iter(rc).len() as u32;
+            toprc.width = rc.width;
+        }
+
+        p
+    }
+
+    /// Check for an available register in the register class `rc`.
+    ///
+    /// If it is possible to allocate one more register from `rc`'s top-level register class,
+    /// returns 0.
+    ///
+    /// If not, returns a bit-mask of top-level register classes that are interfering. Register
+    /// pressure should be eased in one of the returned top-level register classes before calling
+    /// `can_take()` to check again.
+    fn check_avail(&self, rc: RegClass) -> RegClassMask {
+        let entry = match self.toprc.get(rc.toprc as usize) {
+            None => return 0, // Not a pressure tracked bank.
+            Some(e) => e,
+        };
+        let mask = 1 << rc.toprc;
+        if (self.aliased & mask) == 0 {
+            // This is a simple unaliased top-level register class.
+            if entry.total_count() < entry.limit {
+                0
+            } else {
+                mask
+            }
+        } else {
+            // This is the more complicated case. The top-level register class has aliases.
+            self.check_avail_aliased(entry)
+        }
+    }
+
+    /// Check for an available register in a top-level register class that may have aliases.
+    ///
+    /// This is the out-of-line slow path for `check_avail()`.
+    fn check_avail_aliased(&self, entry: &TopRC) -> RegClassMask {
+        let first = usize::from(entry.first_toprc);
+        let num = usize::from(entry.num_toprcs);
+        let width = u32::from(entry.width);
+        let ulimit = entry.limit * width;
+
+        // Count up the number of available register units.
+        let mut units = 0;
+        for (rc, rci) in self.toprc[first..first + num].iter().zip(first..) {
+            let rcw = u32::from(rc.width);
+            // If `rc.width` is smaller than `width`, each register in `rc` could potentially block
+            // one of ours. This is assuming that none of the smaller registers are straddling the
+            // bigger ones.
+            //
+            // If `rc.width` is larger than `width`, we are also assuming that the registers are
+            // aligned and `rc.width` is a multiple of `width`.
+            let u = if rcw < width {
+                // We can't take more than the total number of register units in the class.
+                // This matters for arm32 S-registers which can only ever lock out 16 D-registers.
+                min(rc.total_count() * width, rc.limit * rcw)
+            } else {
+                rc.total_count() * rcw
+            };
+
+            // If this top-level RC on its own is responsible for exceeding our limit, return it
+            // early to guarantee that registers here are spilled before spilling other registers
+            // unnecessarily.
+            if u >= ulimit {
+                return 1 << rci;
+            }
+
+            units += u;
+        }
+
+        // We've counted up the worst-case number of register units claimed by all aliasing
+        // classes. Compare to the unit limit in this class.
+        if units < ulimit {
+            0
+        } else {
+            // Registers need to be spilled from any one of the aliasing classes.
+            ((1 << num) - 1) << first
+        }
+    }
+
+    /// Take a register from `rc`.
+    ///
+    /// This does not check if there are enough registers available.
+    pub fn take(&mut self, rc: RegClass) {
+        if let Some(t) = self.toprc.get_mut(rc.toprc as usize) {
+            t.base_count += 1;
+        }
+    }
+
+    /// Free a register in `rc`.
+    pub fn free(&mut self, rc: RegClass) {
+        if let Some(t) = self.toprc.get_mut(rc.toprc as usize) {
+            t.base_count -= 1;
+        }
+    }
+
+    /// Reset all counts to 0, both base and transient.
+    pub fn reset(&mut self) {
+        for e in &mut self.toprc {
+            e.base_count = 0;
+            e.transient_count = 0;
+        }
+    }
+
+    /// Try to increment a transient counter.
+    ///
+    /// This will fail if there are not enough registers available.
+    pub fn take_transient(&mut self, rc: RegClass) -> Result<(), RegClassMask> {
+        let mask = self.check_avail(rc);
+        if mask == 0 {
+            if let Some(t) = self.toprc.get_mut(rc.toprc as usize) {
+                t.transient_count += 1;
+            }
+
+            Ok(())
+        } else {
+            Err(mask)
+        }
+    }
+
+    /// Reset all transient counts to 0.
+    pub fn reset_transient(&mut self) {
+        for e in &mut self.toprc {
+            e.transient_count = 0;
+        }
+    }
+
+    /// Preserve the transient counts by transferring them to the base counts.
+    pub fn preserve_transient(&mut self) {
+        for e in &mut self.toprc {
+            e.base_count += e.transient_count;
+            e.transient_count = 0;
+        }
+    }
+}
+
+impl fmt::Display for Pressure {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "Pressure[")?;
+        for rc in &self.toprc {
+            if rc.limit > 0 && rc.limit < !0 {
+                write!(f, " {}+{}/{}", rc.base_count, rc.transient_count, rc.limit)?;
+            }
+        }
+        write!(f, " ]")
+    }
+}
+
+#[cfg(test)]
+#[cfg(build_arm32)]
+mod tests {
+    use super::Pressure;
+    use crate::isa::{RegClass, TargetIsa};
+    use crate::regalloc::RegisterSet;
+    use core::borrow::Borrow;
+    use core::str::FromStr;
+    use std::boxed::Box;
+    use target_lexicon::triple;
+
+    // Make an arm32 `TargetIsa`, if possible.
+    fn arm32() -> Option<Box<TargetIsa>> {
+        use crate::isa;
+        use crate::settings;
+
+        let shared_builder = settings::builder();
+        let shared_flags = settings::Flags::new(shared_builder);
+
+        isa::lookup(triple!("arm"))
+            .ok()
+            .map(|b| b.finish(shared_flags))
+    }
+
+    // Get a register class by name.
+    fn rc_by_name(isa: &TargetIsa, name: &str) -> RegClass {
+        isa.register_info()
+            .classes
+            .iter()
+            .find(|rc| rc.name == name)
+            .expect("Can't find named register class.")
+    }
+
+    #[test]
+    fn basic_counting() {
+        let isa = arm32().expect("This test requires arm32 support");
+        let isa = isa.borrow();
+        let gpr = rc_by_name(isa, "GPR");
+        let s = rc_by_name(isa, "S");
+        let reginfo = isa.register_info();
+        let regs = RegisterSet::new();
+
+        let mut pressure = Pressure::new(&reginfo, &regs);
+        let mut count = 0;
+        while pressure.check_avail(gpr) == 0 {
+            pressure.take(gpr);
+            count += 1;
+        }
+        assert_eq!(count, 16);
+        assert_eq!(pressure.check_avail(gpr), 1 << gpr.toprc);
+        assert_eq!(pressure.check_avail(s), 0);
+        pressure.free(gpr);
+        assert_eq!(pressure.check_avail(gpr), 0);
+        pressure.take(gpr);
+        assert_eq!(pressure.check_avail(gpr), 1 << gpr.toprc);
+        assert_eq!(pressure.check_avail(s), 0);
+        pressure.reset();
+        assert_eq!(pressure.check_avail(gpr), 0);
+        assert_eq!(pressure.check_avail(s), 0);
+    }
+
+    #[test]
+    fn arm_float_bank() {
+        let isa = arm32().expect("This test requires arm32 support");
+        let isa = isa.borrow();
+        let s = rc_by_name(isa, "S");
+        let d = rc_by_name(isa, "D");
+        let q = rc_by_name(isa, "Q");
+        let reginfo = isa.register_info();
+        let regs = RegisterSet::new();
+
+        let mut pressure = Pressure::new(&reginfo, &regs);
+        assert_eq!(pressure.check_avail(s), 0);
+        assert_eq!(pressure.check_avail(d), 0);
+        assert_eq!(pressure.check_avail(q), 0);
+
+        // Allocating a single S-register should not affect availability.
+        pressure.take(s);
+        assert_eq!(pressure.check_avail(s), 0);
+        assert_eq!(pressure.check_avail(d), 0);
+        assert_eq!(pressure.check_avail(q), 0);
+
+        pressure.take(d);
+        assert_eq!(pressure.check_avail(s), 0);
+        assert_eq!(pressure.check_avail(d), 0);
+        assert_eq!(pressure.check_avail(q), 0);
+
+        pressure.take(q);
+        assert_eq!(pressure.check_avail(s), 0);
+        assert_eq!(pressure.check_avail(d), 0);
+        assert_eq!(pressure.check_avail(q), 0);
+
+        // Take a total of 16 S-regs.
+        for _ in 1..16 {
+            pressure.take(s);
+        }
+        assert_eq!(pressure.check_avail(s), 0);
+        assert_eq!(pressure.check_avail(d), 0);
+        assert_eq!(pressure.check_avail(q), 0);
+
+        // We've taken 16 S, 1 D, and 1 Q. There should be 6 more Qs.
+        for _ in 0..6 {
+            assert_eq!(pressure.check_avail(d), 0);
+            assert_eq!(pressure.check_avail(q), 0);
+            pressure.take(q);
+        }
+
+        // We've taken 16 S, 1 D, and 7 Qs.
+        assert!(pressure.check_avail(s) != 0);
+        assert_eq!(pressure.check_avail(d), 0);
+        assert!(pressure.check_avail(q) != 0);
+    }
+}
--- a/cranelift/codegen/src/regalloc/register_set.rs
+++ b/cranelift/codegen/src/regalloc/register_set.rs
@@ -0,0 +1,324 @@
+//! Set of allocatable registers as a bit vector of register units.
+//!
+//! While allocating registers, we need to keep track of which registers are available and which
+//! registers are in use. Since registers can alias in different ways, we track this via the
+//! "register unit" abstraction. Every register contains one or more register units. Registers that
+//! share a register unit can't be in use at the same time.
+
+use crate::isa::registers::{RegClass, RegInfo, RegUnit, RegUnitMask};
+use core::char;
+use core::fmt;
+use core::iter::ExactSizeIterator;
+use core::mem::size_of_val;
+
+/// Set of registers available for allocation.
+#[derive(Clone)]
+pub struct RegisterSet {
+    avail: RegUnitMask,
+}
+
+// Given a register class and a register unit in the class, compute a word index and a bit mask of
+// register units representing that register.
+//
+// Note that a register is not allowed to straddle words.
+fn bitmask(rc: RegClass, reg: RegUnit) -> (usize, u32) {
+    // Bit mask representing the register. It is `rc.width` consecutive units.
+    let width_bits = (1 << rc.width) - 1;
+    // Index into avail[] of the word containing `reg`.
+    let word_index = (reg / 32) as usize;
+    // The actual bits in the word that cover `reg`.
+    let reg_bits = width_bits << (reg % 32);
+
+    (word_index, reg_bits)
+}
+
+impl RegisterSet {
+    /// Create a new register set with all registers available.
+    ///
+    /// Note that this includes *all* registers. Query the `TargetIsa` object to get a set of
+    /// allocatable registers where reserved registers have been filtered out.
+    pub fn new() -> Self {
+        Self { avail: [!0; 3] }
+    }
+
+    /// Create a new register set with no registers available.
+    pub fn empty() -> Self {
+        Self { avail: [0; 3] }
+    }
+
+    /// Returns `true` if the specified register is available.
+    pub fn is_avail(&self, rc: RegClass, reg: RegUnit) -> bool {
+        let (idx, bits) = bitmask(rc, reg);
+        (self.avail[idx] & bits) == bits
+    }
+
+    /// Allocate `reg` from `rc` so it is no longer available.
+    ///
+    /// It is an error to take a register that doesn't have all of its register units available.
+    pub fn take(&mut self, rc: RegClass, reg: RegUnit) {
+        let (idx, bits) = bitmask(rc, reg);
+        debug_assert!(
+            (self.avail[idx] & bits) == bits,
+            "{}:{} not available in {}",
+            rc,
+            rc.info.display_regunit(reg),
+            self.display(rc.info)
+        );
+        self.avail[idx] &= !bits;
+    }
+
+    /// Return `reg` and all of its register units to the set of available registers.
+    pub fn free(&mut self, rc: RegClass, reg: RegUnit) {
+        let (idx, bits) = bitmask(rc, reg);
+        debug_assert!(
+            (self.avail[idx] & bits) == 0,
+            "{}:{} is already free in {}",
+            rc,
+            rc.info.display_regunit(reg),
+            self.display(rc.info)
+        );
+        self.avail[idx] |= bits;
+    }
+
+    /// Return an iterator over all available registers belonging to the register class `rc`.
+    ///
+    /// This doesn't allocate anything from the set; use `take()` for that.
+    pub fn iter(&self, rc: RegClass) -> RegSetIter {
+        // Start by copying the RC mask. It is a single set bit for each register in the class.
+        let mut rsi = RegSetIter { regs: rc.mask };
+
+        // Mask out the unavailable units.
+        for idx in 0..self.avail.len() {
+            // If a single unit in a register is unavailable, the whole register can't be used.
+            // If a register straddles a word boundary, it will be marked as unavailable.
+            // There's an assertion in `cdsl/registers.py` to check for that.
+            for i in 0..rc.width {
+                rsi.regs[idx] &= self.avail[idx] >> i;
+            }
+        }
+        rsi
+    }
+
+    /// Check if any register units allocated out of this set interferes with units allocated out
+    /// of `other`.
+    ///
+    /// This assumes that unused bits are 1.
+    pub fn interferes_with(&self, other: &Self) -> bool {
+        self.avail
+            .iter()
+            .zip(&other.avail)
+            .any(|(&x, &y)| (x | y) != !0)
+    }
+
+    /// Intersect this set of registers with `other`. This has the effect of removing any register
+    /// units from this set that are not in `other`.
+    pub fn intersect(&mut self, other: &Self) {
+        for (x, &y) in self.avail.iter_mut().zip(&other.avail) {
+            *x &= y;
+        }
+    }
+
+    /// Return an object that can display this register set, using the register info from the
+    /// target ISA.
+    pub fn display<'a, R: Into<Option<&'a RegInfo>>>(&self, regs: R) -> DisplayRegisterSet<'a> {
+        DisplayRegisterSet(self.clone(), regs.into())
+    }
+}
+
+/// Iterator over available registers in a register class.
+pub struct RegSetIter {
+    regs: RegUnitMask,
+}
+
+impl Iterator for RegSetIter {
+    type Item = RegUnit;
+
+    fn next(&mut self) -> Option<RegUnit> {
+        let mut unit_offset = 0;
+
+        // Find the first set bit in `self.regs`.
+        for word in &mut self.regs {
+            if *word != 0 {
+                // Compute the register unit number from the lowest set bit in the word.
+                let unit = unit_offset + word.trailing_zeros() as RegUnit;
+
+                // Clear that lowest bit so we won't find it again.
+                *word &= *word - 1;
+
+                return Some(unit);
+            }
+            // How many register units was there in the word? This is a constant 32 for `u32` etc.
+            unit_offset += 8 * size_of_val(word) as RegUnit;
+        }
+
+        // All of `self.regs` is 0.
+        None
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        let bits = self.regs.iter().map(|&w| w.count_ones() as usize).sum();
+        (bits, Some(bits))
+    }
+}
+
+impl ExactSizeIterator for RegSetIter {}
+
+/// Displaying an `RegisterSet` correctly requires the associated `RegInfo` from the target ISA.
+pub struct DisplayRegisterSet<'a>(RegisterSet, Option<&'a RegInfo>);
+
+impl<'a> fmt::Display for DisplayRegisterSet<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "[")?;
+        match self.1 {
+            None => {
+                for w in &self.0.avail {
+                    write!(f, " #{:08x}", w)?;
+                }
+            }
+            Some(reginfo) => {
+                let toprcs = reginfo
+                    .banks
+                    .iter()
+                    .map(|b| b.first_toprc + b.num_toprcs)
+                    .max()
+                    .expect("No register banks");
+                for rc in &reginfo.classes[0..toprcs] {
+                    if rc.width == 1 {
+                        let bank = &reginfo.banks[rc.bank as usize];
+                        write!(f, " {}: ", rc)?;
+                        for offset in 0..bank.units {
+                            let reg = bank.first_unit + offset;
+                            if !rc.contains(reg) {
+                                continue;
+                            }
+                            if !self.0.is_avail(rc, reg) {
+                                write!(f, "-")?;
+                                continue;
+                            }
+                            // Display individual registers as either the second letter of their
+                            // name or the last digit of their number.
+                            // This works for x86 (rax, rbx, ...) and for numbered regs.
+                            write!(
+                                f,
+                                "{}",
+                                bank.names
+                                    .get(offset as usize)
+                                    .and_then(|name| name.chars().nth(1))
+                                    .unwrap_or_else(|| char::from_digit(
+                                        u32::from(offset % 10),
+                                        10
+                                    )
+                                    .unwrap())
+                            )?;
+                        }
+                    }
+                }
+            }
+        }
+        write!(f, " ]")
+    }
+}
+
+impl fmt::Display for RegisterSet {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        self.display(None).fmt(f)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::isa::registers::{RegClass, RegClassData};
+    use std::vec::Vec;
+
+    // Register classes for testing.
+    const GPR: RegClass = &RegClassData {
+        name: "GPR",
+        index: 0,
+        width: 1,
+        bank: 0,
+        toprc: 0,
+        first: 28,
+        subclasses: 0,
+        mask: [0xf0000000, 0x0000000f, 0],
+        info: &INFO,
+    };
+
+    const DPR: RegClass = &RegClassData {
+        name: "DPR",
+        index: 0,
+        width: 2,
+        bank: 0,
+        toprc: 0,
+        first: 28,
+        subclasses: 0,
+        mask: [0x50000000, 0x0000000a, 0],
+        info: &INFO,
+    };
+
+    const INFO: RegInfo = RegInfo {
+        banks: &[],
+        classes: &[],
+    };
+
+    #[test]
+    fn put_and_take() {
+        let mut regs = RegisterSet::new();
+
+        // `GPR` has units 28-36.
+        assert_eq!(regs.iter(GPR).len(), 8);
+        assert_eq!(regs.iter(GPR).count(), 8);
+        assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [28, 30, 33, 35]);
+
+        assert!(regs.is_avail(GPR, 29));
+        regs.take(&GPR, 29);
+        assert!(!regs.is_avail(GPR, 29));
+
+        assert_eq!(regs.iter(GPR).count(), 7);
+        assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [30, 33, 35]);
+
+        assert!(regs.is_avail(GPR, 30));
+        regs.take(&GPR, 30);
+        assert!(!regs.is_avail(GPR, 30));
+
+        assert_eq!(regs.iter(GPR).count(), 6);
+        assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [33, 35]);
+
+        assert!(regs.is_avail(GPR, 32));
+        regs.take(&GPR, 32);
+        assert!(!regs.is_avail(GPR, 32));
+
+        assert_eq!(regs.iter(GPR).count(), 5);
+        assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [33, 35]);
+
+        regs.free(&GPR, 30);
+        assert!(regs.is_avail(GPR, 30));
+        assert!(!regs.is_avail(GPR, 29));
+        assert!(!regs.is_avail(GPR, 32));
+
+        assert_eq!(regs.iter(GPR).count(), 6);
+        assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [30, 33, 35]);
+
+        regs.free(&GPR, 32);
+        assert!(regs.is_avail(GPR, 31));
+        assert!(!regs.is_avail(GPR, 29));
+        assert!(regs.is_avail(GPR, 32));
+
+        assert_eq!(regs.iter(GPR).count(), 7);
+        assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [30, 33, 35]);
+    }
+
+    #[test]
+    fn interference() {
+        let mut regs1 = RegisterSet::new();
+        let mut regs2 = RegisterSet::new();
+
+        assert!(!regs1.interferes_with(&regs2));
+        regs1.take(&GPR, 32);
+        assert!(!regs1.interferes_with(&regs2));
+        regs2.take(&GPR, 31);
+        assert!(!regs1.interferes_with(&regs2));
+        regs1.intersect(&regs2);
+        assert!(regs1.interferes_with(&regs2));
+    }
+}
--- a/cranelift/codegen/src/regalloc/reload.rs
+++ b/cranelift/codegen/src/regalloc/reload.rs
@@ -0,0 +1,438 @@
+//! Reload pass
+//!
+//! The reload pass runs between the spilling and coloring passes. Its primary responsibility is to
+//! insert `spill` and `fill` instructions such that instruction operands expecting a register will
+//! get a value with register affinity, and operands expecting a stack slot will get a value with
+//! stack affinity.
+//!
+//! The secondary responsibility of the reload pass is to reuse values in registers as much as
+//! possible to minimize the number of `fill` instructions needed. This must not cause the register
+//! pressure limits to be exceeded.
+
+use crate::cursor::{Cursor, EncCursor};
+use crate::dominator_tree::DominatorTree;
+use crate::entity::{SparseMap, SparseMapValue};
+use crate::ir::{AbiParam, ArgumentLoc, InstBuilder};
+use crate::ir::{Ebb, Function, Inst, InstructionData, Opcode, Value};
+use crate::isa::RegClass;
+use crate::isa::{ConstraintKind, EncInfo, Encoding, RecipeConstraints, TargetIsa};
+use crate::regalloc::affinity::Affinity;
+use crate::regalloc::live_value_tracker::{LiveValue, LiveValueTracker};
+use crate::regalloc::liveness::Liveness;
+use crate::timing;
+use crate::topo_order::TopoOrder;
+use log::debug;
+use std::vec::Vec;
+
+/// Reusable data structures for the reload pass.
+pub struct Reload {
+    candidates: Vec<ReloadCandidate>,
+    reloads: SparseMap<Value, ReloadedValue>,
+}
+
+/// Context data structure that gets instantiated once per pass.
+struct Context<'a> {
+    cur: EncCursor<'a>,
+
+    // Cached ISA information.
+    // We save it here to avoid frequent virtual function calls on the `TargetIsa` trait object.
+    encinfo: EncInfo,
+
+    // References to contextual data structures we need.
+    domtree: &'a DominatorTree,
+    liveness: &'a mut Liveness,
+    topo: &'a mut TopoOrder,
+
+    candidates: &'a mut Vec<ReloadCandidate>,
+    reloads: &'a mut SparseMap<Value, ReloadedValue>,
+}
+
+impl Reload {
+    /// Create a new blank reload pass.
+    pub fn new() -> Self {
+        Self {
+            candidates: Vec::new(),
+            reloads: SparseMap::new(),
+        }
+    }
+
+    /// Clear all data structures in this reload pass.
+    pub fn clear(&mut self) {
+        self.candidates.clear();
+        self.reloads.clear();
+    }
+
+    /// Run the reload algorithm over `func`.
+    pub fn run(
+        &mut self,
+        isa: &TargetIsa,
+        func: &mut Function,
+        domtree: &DominatorTree,
+        liveness: &mut Liveness,
+        topo: &mut TopoOrder,
+        tracker: &mut LiveValueTracker,
+    ) {
+        let _tt = timing::ra_reload();
+        debug!("Reload for:\n{}", func.display(isa));
+        let mut ctx = Context {
+            cur: EncCursor::new(func, isa),
+            encinfo: isa.encoding_info(),
+            domtree,
+            liveness,
+            topo,
+            candidates: &mut self.candidates,
+            reloads: &mut self.reloads,
+        };
+        ctx.run(tracker)
+    }
+}
+
+/// A reload candidate.
+///
+/// This represents a stack value that is used by the current instruction where a register is
+/// needed.
+struct ReloadCandidate {
+    argidx: usize,
+    value: Value,
+    regclass: RegClass,
+}
+
+/// A Reloaded value.
+///
+/// This represents a value that has been reloaded into a register value from the stack.
+struct ReloadedValue {
+    stack: Value,
+    reg: Value,
+}
+
+impl SparseMapValue<Value> for ReloadedValue {
+    fn key(&self) -> Value {
+        self.stack
+    }
+}
+
+impl<'a> Context<'a> {
+    fn run(&mut self, tracker: &mut LiveValueTracker) {
+        self.topo.reset(self.cur.func.layout.ebbs());
+        while let Some(ebb) = self.topo.next(&self.cur.func.layout, self.domtree) {
+            self.visit_ebb(ebb, tracker);
+        }
+    }
+
+    fn visit_ebb(&mut self, ebb: Ebb, tracker: &mut LiveValueTracker) {
+        debug!("Reloading {}:", ebb);
+        self.visit_ebb_header(ebb, tracker);
+        tracker.drop_dead_params();
+
+        // visit_ebb_header() places us at the first interesting instruction in the EBB.
+        while let Some(inst) = self.cur.current_inst() {
+            if !self.cur.func.dfg[inst].opcode().is_ghost() {
+                // This instruction either has an encoding or has ABI constraints, so visit it to
+                // insert spills and fills as needed.
+                let encoding = self.cur.func.encodings[inst];
+                self.visit_inst(ebb, inst, encoding, tracker);
+                tracker.drop_dead(inst);
+            } else {
+                // This is a ghost instruction with no encoding and no extra constraints, so we can
+                // just skip over it.
+                self.cur.next_inst();
+            }
+        }
+    }
+
+    /// Process the EBB parameters. Move to the next instruction in the EBB to be processed
+    fn visit_ebb_header(&mut self, ebb: Ebb, tracker: &mut LiveValueTracker) {
+        let (liveins, args) = tracker.ebb_top(
+            ebb,
+            &self.cur.func.dfg,
+            self.liveness,
+            &self.cur.func.layout,
+            self.domtree,
+        );
+
+        if self.cur.func.layout.entry_block() == Some(ebb) {
+            debug_assert_eq!(liveins.len(), 0);
+            self.visit_entry_params(ebb, args);
+        } else {
+            self.visit_ebb_params(ebb, args);
+        }
+    }
+
+    /// Visit the parameters on the entry block.
+    /// These values have ABI constraints from the function signature.
+    fn visit_entry_params(&mut self, ebb: Ebb, args: &[LiveValue]) {
+        debug_assert_eq!(self.cur.func.signature.params.len(), args.len());
+        self.cur.goto_first_inst(ebb);
+
+        for (arg_idx, arg) in args.iter().enumerate() {
+            let abi = self.cur.func.signature.params[arg_idx];
+            match abi.location {
+                ArgumentLoc::Reg(_) => {
+                    if arg.affinity.is_stack() {
+                        // An incoming register parameter was spilled. Replace the parameter value
+                        // with a temporary register value that is immediately spilled.
+                        let reg = self
+                            .cur
+                            .func
+                            .dfg
+                            .replace_ebb_param(arg.value, abi.value_type);
+                        let affinity = Affinity::abi(&abi, self.cur.isa);
+                        self.liveness.create_dead(reg, ebb, affinity);
+                        self.insert_spill(ebb, arg.value, reg);
+                    }
+                }
+                ArgumentLoc::Stack(_) => {
+                    debug_assert!(arg.affinity.is_stack());
+                }
+                ArgumentLoc::Unassigned => panic!("Unexpected ABI location"),
+            }
+        }
+    }
+
+    fn visit_ebb_params(&mut self, ebb: Ebb, _args: &[LiveValue]) {
+        self.cur.goto_first_inst(ebb);
+    }
+
+    /// Process the instruction pointed to by `pos`, and advance the cursor to the next instruction
+    /// that needs processing.
+    fn visit_inst(
+        &mut self,
+        ebb: Ebb,
+        inst: Inst,
+        encoding: Encoding,
+        tracker: &mut LiveValueTracker,
+    ) {
+        self.cur.use_srcloc(inst);
+
+        // Get the operand constraints for `inst` that we are trying to satisfy.
+        let constraints = self.encinfo.operand_constraints(encoding);
+
+        // Identify reload candidates.
+        debug_assert!(self.candidates.is_empty());
+        self.find_candidates(inst, constraints);
+
+        if let InstructionData::Unary {
+            opcode: Opcode::Copy,
+            ..
+        } = self.cur.func.dfg[inst]
+        {
+            self.reload_copy_candidates(inst);
+        } else {
+            self.reload_inst_candidates(ebb, inst);
+        }
+
+        // TODO: Reuse reloads for future instructions.
+        self.reloads.clear();
+
+        let (_throughs, _kills, defs) =
+            tracker.process_inst(inst, &self.cur.func.dfg, self.liveness);
+
+        // Advance to the next instruction so we can insert any spills after the instruction.
+        self.cur.next_inst();
+
+        // Rewrite register defs that need to be spilled.
+        //
+        // Change:
+        //
+        // v2 = inst ...
+        //
+        // Into:
+        //
+        // v7 = inst ...
+        // v2 = spill v7
+        //
+        // That way, we don't need to rewrite all future uses of v2.
+        if let Some(constraints) = constraints {
+            for (lv, op) in defs.iter().zip(constraints.outs) {
+                if lv.affinity.is_stack() && op.kind != ConstraintKind::Stack {
+                    if let InstructionData::Unary {
+                        opcode: Opcode::Copy,
+                        arg,
+                    } = self.cur.func.dfg[inst]
+                    {
+                        self.cur.func.dfg.replace(inst).spill(arg);
+                        let ok = self.cur.func.update_encoding(inst, self.cur.isa).is_ok();
+                        debug_assert!(ok);
+                    } else {
+                        let value_type = self.cur.func.dfg.value_type(lv.value);
+                        let reg = self.cur.func.dfg.replace_result(lv.value, value_type);
+                        self.liveness.create_dead(reg, inst, Affinity::new(op));
+                        self.insert_spill(ebb, lv.value, reg);
+                    }
+                }
+            }
+        }
+
+        // Same thing for spilled call return values.
+        let retvals = &defs[self.cur.func.dfg[inst]
+            .opcode()
+            .constraints()
+            .num_fixed_results()..];
+        if !retvals.is_empty() {
+            let sig = self
+                .cur
+                .func
+                .dfg
+                .call_signature(inst)
+                .expect("Extra results on non-call instruction");
+            for (i, lv) in retvals.iter().enumerate() {
+                let abi = self.cur.func.dfg.signatures[sig].returns[i];
+                debug_assert!(
+                    abi.location.is_reg(),
+                    "expected reg; got {:?}",
+                    abi.location
+                );
+                if lv.affinity.is_stack() {
+                    let reg = self.cur.func.dfg.replace_result(lv.value, abi.value_type);
+                    self.liveness
+                        .create_dead(reg, inst, Affinity::abi(&abi, self.cur.isa));
+                    self.insert_spill(ebb, lv.value, reg);
+                }
+            }
+        }
+    }
+
+    // Reload the current candidates for the given `inst`.
+    fn reload_inst_candidates(&mut self, ebb: Ebb, inst: Inst) {
+        // Insert fill instructions before `inst` and replace `cand.value` with the filled value.
+        for cand in self.candidates.iter_mut() {
+            if let Some(reload) = self.reloads.get(cand.value) {
+                cand.value = reload.reg;
+                continue;
+            }
+
+            let reg = self.cur.ins().fill(cand.value);
+            let fill = self.cur.built_inst();
+
+            self.reloads.insert(ReloadedValue {
+                stack: cand.value,
+                reg,
+            });
+            cand.value = reg;
+
+            // Create a live range for the new reload.
+            let affinity = Affinity::Reg(cand.regclass.into());
+            self.liveness.create_dead(reg, fill, affinity);
+            self.liveness
+                .extend_locally(reg, ebb, inst, &self.cur.func.layout);
+        }
+
+        // Rewrite instruction arguments.
+        //
+        // Only rewrite those arguments that were identified as candidates. This leaves EBB
+        // arguments on branches as-is without rewriting them. A spilled EBB argument needs to stay
+        // spilled because the matching EBB parameter is going to be in the same virtual register
+        // and therefore the same stack slot as the EBB argument value.
+        if !self.candidates.is_empty() {
+            let args = self.cur.func.dfg.inst_args_mut(inst);
+            while let Some(cand) = self.candidates.pop() {
+                args[cand.argidx] = cand.value;
+            }
+        }
+    }
+
+    // Reload the current candidates for the given copy `inst`.
+    //
+    // As an optimization, replace a copy instruction where the argument has been spilled with
+    // a fill instruction.
+    fn reload_copy_candidates(&mut self, inst: Inst) {
+        // Copy instructions can only have one argument.
+        debug_assert!(self.candidates.is_empty() || self.candidates.len() == 1);
+
+        if let Some(cand) = self.candidates.pop() {
+            self.cur.func.dfg.replace(inst).fill(cand.value);
+            let ok = self.cur.func.update_encoding(inst, self.cur.isa).is_ok();
+            debug_assert!(ok);
+        }
+    }
+
+    // Find reload candidates for `inst` and add them to `self.candidates`.
+    //
+    // These are uses of spilled values where the operand constraint requires a register.
+    fn find_candidates(&mut self, inst: Inst, constraints: Option<&RecipeConstraints>) {
+        let args = self.cur.func.dfg.inst_args(inst);
+
+        if let Some(constraints) = constraints {
+            for (argidx, (op, &arg)) in constraints.ins.iter().zip(args).enumerate() {
+                if op.kind != ConstraintKind::Stack && self.liveness[arg].affinity.is_stack() {
+                    self.candidates.push(ReloadCandidate {
+                        argidx,
+                        value: arg,
+                        regclass: op.regclass,
+                    })
+                }
+            }
+        }
+
+        // If we only have the fixed arguments, we're done now.
+        let offset = self.cur.func.dfg[inst]
+            .opcode()
+            .constraints()
+            .num_fixed_value_arguments();
+        if args.len() == offset {
+            return;
+        }
+        let var_args = &args[offset..];
+
+        // Handle ABI arguments.
+        if let Some(sig) = self.cur.func.dfg.call_signature(inst) {
+            handle_abi_args(
+                self.candidates,
+                &self.cur.func.dfg.signatures[sig].params,
+                var_args,
+                offset,
+                self.cur.isa,
+                self.liveness,
+            );
+        } else if self.cur.func.dfg[inst].opcode().is_return() {
+            handle_abi_args(
+                self.candidates,
+                &self.cur.func.signature.returns,
+                var_args,
+                offset,
+                self.cur.isa,
+                self.liveness,
+            );
+        }
+    }
+
+    /// Insert a spill at `pos` and update data structures.
+    ///
+    /// - Insert `stack = spill reg` at `pos`, and assign an encoding.
+    /// - Move the `stack` live range starting point to the new instruction.
+    /// - Extend the `reg` live range to reach the new instruction.
+    fn insert_spill(&mut self, ebb: Ebb, stack: Value, reg: Value) {
+        self.cur.ins().with_result(stack).spill(reg);
+        let inst = self.cur.built_inst();
+
+        // Update live ranges.
+        self.liveness.move_def_locally(stack, inst);
+        self.liveness
+            .extend_locally(reg, ebb, inst, &self.cur.func.layout);
+    }
+}
+
+/// Find reload candidates in the instruction's ABI variable arguments. This handles both
+/// return values and call arguments.
+fn handle_abi_args(
+    candidates: &mut Vec<ReloadCandidate>,
+    abi_types: &[AbiParam],
+    var_args: &[Value],
+    offset: usize,
+    isa: &TargetIsa,
+    liveness: &Liveness,
+) {
+    debug_assert_eq!(abi_types.len(), var_args.len());
+    for ((abi, &arg), argidx) in abi_types.iter().zip(var_args).zip(offset..) {
+        if abi.location.is_reg() {
+            let lv = liveness.get(arg).expect("Missing live range for ABI arg");
+            if lv.affinity.is_stack() {
+                candidates.push(ReloadCandidate {
+                    argidx,
+                    value: arg,
+                    regclass: isa.regclass_for_abi_type(abi.value_type),
+                });
+            }
+        }
+    }
+}
--- a/Show More
+++ b/Show More