diff --git a/Cargo.lock b/Cargo.lock
index b8c92cbc46..8d7d237b6e 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -379,6 +379,7 @@ dependencies = [
  "gimli",
  "hashbrown 0.7.1",
  "log",
+ "regalloc",
  "serde",
  "smallvec",
  "target-lexicon",
@@ -1599,6 +1600,16 @@ dependencies = [
  "rust-argon2",
 ]
 
+[[package]]
+name = "regalloc"
+version = "0.0.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "89ce0cd835fa6e91bbf5d010beee19d0c2e97e4ad5e13c399a31122cfc83bdd6"
+dependencies = [
+ "log",
+ "rustc-hash",
+]
+
 [[package]]
 name = "regex"
 version = "1.3.6"
@@ -1663,6 +1674,12 @@ version = "0.1.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4c691c0e608126e00913e33f0ccf3727d5fc84573623b8d65b2df340b5201783"
 
+[[package]]
+name = "rustc-hash"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
+
 [[package]]
 name = "rustc_version"
 version = "0.2.3"
diff --git a/cranelift/codegen/Cargo.toml b/cranelift/codegen/Cargo.toml
index 148fcf9327..83219d42e6 100644
--- a/cranelift/codegen/Cargo.toml
+++ b/cranelift/codegen/Cargo.toml
@@ -24,6 +24,7 @@ gimli = { version = "0.20.0", default-features = false, features = ["write"], op
 smallvec = { version = "1.0.0" }
 thiserror = "1.0.4"
 byteorder = { version = "1.3.2", default-features = false }
+regalloc = "0.0.17"
 # It is a goal of the cranelift-codegen crate to have minimal external dependencies.
 # Please don't add any unless they are essential to the task of creating binary
 # machine code. Integration tests that need external dependencies can be
@@ -33,7 +34,7 @@ byteorder = { version = "1.3.2", default-features = false }
 cranelift-codegen-meta = { path = "meta", version = "0.62.0" }
 
 [features]
-default = ["std", "unwind"]
+default = ["std", "unwind", "all-arch"]
 
 # The "std" feature enables use of libstd. The "core" feature enables use
 # of some minimal std-like replacement libraries. At least one of these two
diff --git a/cranelift/codegen/src/isa/mod.rs b/cranelift/codegen/src/isa/mod.rs
index bad6fd7e79..c94707690a 100644
--- a/cranelift/codegen/src/isa/mod.rs
+++ b/cranelift/codegen/src/isa/mod.rs
@@ -55,9 +55,10 @@ pub use crate::isa::stack::{StackBase, StackBaseMask, StackRef};
 use crate::binemit;
 use crate::flowgraph;
 use crate::ir;
-use crate::isa::enc_tables::Encodings;
+pub use crate::isa::enc_tables::Encodings;
 #[cfg(feature = "unwind")]
 use crate::isa::fde::RegisterMappingError;
+use crate::machinst::MachBackend;
 use crate::regalloc;
 use crate::result::CodegenResult;
 use crate::settings;
@@ -400,6 +401,11 @@ pub trait TargetIsa: fmt::Display + Send + Sync {
     ) {
         // No-op by default
     }
+
+    /// Get the new-style MachBackend, if this is an adapter around one.
+    fn get_mach_backend(&self) -> Option<&dyn MachBackend> {
+        None
+    }
 }
 
 impl Debug for &dyn TargetIsa {
diff --git a/cranelift/codegen/src/lib.rs b/cranelift/codegen/src/lib.rs
index 772562b916..2d6651a67e 100644
--- a/cranelift/codegen/src/lib.rs
+++ b/cranelift/codegen/src/lib.rs
@@ -71,6 +71,7 @@ pub mod flowgraph;
 pub mod ir;
 pub mod isa;
 pub mod loop_analysis;
+pub mod machinst;
 pub mod print_errors;
 pub mod settings;
 pub mod timing;
@@ -90,6 +91,7 @@ mod iterators;
 mod legalizer;
 mod licm;
 mod nan_canonicalization;
+mod num_uses;
 mod partition_slice;
 mod postopt;
 mod predicates;
diff --git a/cranelift/codegen/src/machinst/abi.rs b/cranelift/codegen/src/machinst/abi.rs
new file mode 100644
index 0000000000..7aaa66fe14
--- /dev/null
+++ b/cranelift/codegen/src/machinst/abi.rs
@@ -0,0 +1,142 @@
+//! ABI definitions.
+
+use crate::ir;
+use crate::ir::StackSlot;
+use crate::machinst::*;
+use crate::settings;
+
+use regalloc::{Reg, Set, SpillSlot, VirtualReg, Writable};
+
+/// Trait implemented by an object that tracks ABI-related state (e.g., stack
+/// layout) and can generate code while emitting the *body* of a function.
+pub trait ABIBody<I: VCodeInst> {
+    /// Get the liveins of the function.
+    fn liveins(&self) -> Set<RealReg>;
+
+    /// Get the liveouts of the function.
+    fn liveouts(&self) -> Set<RealReg>;
+
+    /// Number of arguments.
+    fn num_args(&self) -> usize;
+
+    /// Number of return values.
+    fn num_retvals(&self) -> usize;
+
+    /// Number of stack slots (not spill slots).
+    fn num_stackslots(&self) -> usize;
+
+    /// Generate an instruction which copies an argument to a destination
+    /// register.
+    fn gen_copy_arg_to_reg(&self, idx: usize, into_reg: Writable<Reg>) -> I;
+
+    /// Generate an instruction which copies a source register to a return
+    /// value slot.
+    fn gen_copy_reg_to_retval(&self, idx: usize, from_reg: Reg) -> I;
+
+    /// Generate a return instruction.
+    fn gen_ret(&self) -> I;
+
+    /// Generate an epilogue placeholder.
+    fn gen_epilogue_placeholder(&self) -> I;
+
+    // -----------------------------------------------------------------
+    // Every function above this line may only be called pre-regalloc.
+    // Every function below this line may only be called post-regalloc.
+    // `spillslots()` must be called before any other post-regalloc
+    // function.
+    // ----------------------------------------------------------------
+
+    /// Update with the number of spillslots, post-regalloc.
+    fn set_num_spillslots(&mut self, slots: usize);
+
+    /// Update with the clobbered registers, post-regalloc.
+    fn set_clobbered(&mut self, clobbered: Set<Writable<RealReg>>);
+
+    /// Load from a stackslot.
+    fn load_stackslot(
+        &self,
+        slot: StackSlot,
+        offset: usize,
+        ty: Type,
+        into_reg: Writable<Reg>,
+    ) -> I;
+
+    /// Store to a stackslot.
+    fn store_stackslot(&self, slot: StackSlot, offset: usize, ty: Type, from_reg: Reg) -> I;
+
+    /// Load from a spillslot.
+    fn load_spillslot(&self, slot: SpillSlot, ty: Type, into_reg: Writable<Reg>) -> I;
+
+    /// Store to a spillslot.
+    fn store_spillslot(&self, slot: SpillSlot, ty: Type, from_reg: Reg) -> I;
+
+    /// Generate a prologue, post-regalloc. This should include any stack
+    /// frame or other setup necessary to use the other methods (`load_arg`,
+    /// `store_retval`, and spillslot accesses.)  |self| is mutable so that we
+    /// can store information in it which will be useful when creating the
+    /// epilogue.
+    fn gen_prologue(&mut self, flags: &settings::Flags) -> Vec<I>;
+
+    /// Generate an epilogue, post-regalloc. Note that this must generate the
+    /// actual return instruction (rather than emitting this in the lowering
+    /// logic), because the epilogue code comes before the return and the two are
+    /// likely closely related.
+    fn gen_epilogue(&self, flags: &settings::Flags) -> Vec<I>;
+
+    /// Returns the full frame size for the given function, after prologue emission has run. This
+    /// comprises the spill space, incoming argument space, alignment padding, etc.
+    fn frame_size(&self) -> u32;
+
+    /// Get the spill-slot size.
+    fn get_spillslot_size(&self, rc: RegClass, ty: Type) -> u32;
+
+    /// Generate a spill.
+    fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, ty: Type) -> I;
+
+    /// Generate a reload (fill).
+    fn gen_reload(&self, to_reg: Writable<RealReg>, from_slot: SpillSlot, ty: Type) -> I;
+}
+
+/// Trait implemented by an object that tracks ABI-related state and can
+/// generate code while emitting a *call* to a function.
+///
+/// An instance of this trait returns information for a *particular*
+/// callsite. It will usually be computed from the called function's
+/// signature.
+///
+/// Unlike `ABIBody` above, methods on this trait are not invoked directly
+/// by the machine-independent code. Rather, the machine-specific lowering
+/// code will typically create an `ABICall` when creating machine instructions
+/// for an IR call instruction inside `lower()`, directly emit the arg and
+/// and retval copies, and attach the register use/def info to the call.
+///
+/// This trait is thus provided for convenience to the backends.
+pub trait ABICall<I: VCodeInst> {
+    /// Get the number of arguments expected.
+    fn num_args(&self) -> usize;
+
+    /// Save the clobbered registers.
+    /// Copy an argument value from a source register, prior to the call.
+    fn gen_copy_reg_to_arg(&self, idx: usize, from_reg: Reg) -> I;
+
+    /// Copy a return value into a destination register, after the call returns.
+    fn gen_copy_retval_to_reg(&self, idx: usize, into_reg: Writable<Reg>) -> I;
+
+    /// Pre-adjust the stack, prior to argument copies and call.
+    fn gen_stack_pre_adjust(&self) -> Vec<I>;
+
+    /// Post-adjust the satck, after call return and return-value copies.
+    fn gen_stack_post_adjust(&self) -> Vec<I>;
+
+    /// Generate the call itself.
+    ///
+    /// The returned instruction should have proper use- and def-sets according
+    /// to the argument registers, return-value registers, and clobbered
+    /// registers for this function signature in this ABI.
+    ///
+    /// (Arg registers are uses, and retval registers are defs. Clobbered
+    /// registers are also logically defs, but should never be read; their
+    /// values are "defined" (to the regalloc) but "undefined" in every other
+    /// sense.)
+    fn gen_call(&self) -> Vec<I>;
+}
diff --git a/cranelift/codegen/src/machinst/adapter.rs b/cranelift/codegen/src/machinst/adapter.rs
new file mode 100644
index 0000000000..3f7c5b7b57
--- /dev/null
+++ b/cranelift/codegen/src/machinst/adapter.rs
@@ -0,0 +1,123 @@
+//! Adapter for a `MachBackend` to implement the `TargetIsa` trait.
+
+use crate::binemit;
+use crate::ir;
+use crate::isa::{EncInfo, Encoding, Encodings, Legalize, RegClass, RegInfo, TargetIsa};
+use crate::machinst::*;
+use crate::regalloc::{RegDiversions, RegisterSet};
+use crate::settings::Flags;
+
+use std::borrow::Cow;
+use std::fmt;
+use target_lexicon::Triple;
+
+/// A wrapper around a `MachBackend` that provides a `TargetIsa` impl.
+pub struct TargetIsaAdapter {
+    backend: Box<dyn MachBackend + Send + Sync + 'static>,
+    triple: Triple,
+}
+
+impl TargetIsaAdapter {
+    /// Create a new `TargetIsa` wrapper around a `MachBackend`.
+    pub fn new<B: MachBackend + Send + Sync + 'static>(backend: B) -> TargetIsaAdapter {
+        let triple = backend.triple();
+        TargetIsaAdapter {
+            backend: Box::new(backend),
+            triple,
+        }
+    }
+}
+
+impl fmt::Display for TargetIsaAdapter {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "MachBackend")
+    }
+}
+
+impl TargetIsa for TargetIsaAdapter {
+    fn name(&self) -> &'static str {
+        self.backend.name()
+    }
+
+    fn triple(&self) -> &Triple {
+        &self.triple
+    }
+
+    fn flags(&self) -> &Flags {
+        self.backend.flags()
+    }
+
+    fn register_info(&self) -> RegInfo {
+        // Called from function's Display impl, so we need a stub here.
+        RegInfo {
+            banks: &[],
+            classes: &[],
+        }
+    }
+
+    fn legal_encodings<'a>(
+        &'a self,
+        _func: &'a ir::Function,
+        _inst: &'a ir::InstructionData,
+        _ctrl_typevar: ir::Type,
+    ) -> Encodings<'a> {
+        panic!("Should not be called when new-style backend is available!")
+    }
+
+    fn encode(
+        &self,
+        _func: &ir::Function,
+        _inst: &ir::InstructionData,
+        _ctrl_typevar: ir::Type,
+    ) -> Result<Encoding, Legalize> {
+        panic!("Should not be called when new-style backend is available!")
+    }
+
+    fn encoding_info(&self) -> EncInfo {
+        panic!("Should not be called when new-style backend is available!")
+    }
+
+    fn legalize_signature(&self, _sig: &mut Cow<ir::Signature>, _current: bool) {
+        panic!("Should not be called when new-style backend is available!")
+    }
+
+    fn regclass_for_abi_type(&self, _ty: ir::Type) -> RegClass {
+        panic!("Should not be called when new-style backend is available!")
+    }
+
+    fn allocatable_registers(&self, _func: &ir::Function) -> RegisterSet {
+        panic!("Should not be called when new-style backend is available!")
+    }
+
+    fn prologue_epilogue(&self, _func: &mut ir::Function) -> CodegenResult<()> {
+        panic!("Should not be called when new-style backend is available!")
+    }
+
+    #[cfg(feature = "testing_hooks")]
+    fn emit_inst(
+        &self,
+        _func: &ir::Function,
+        _inst: ir::Inst,
+        _divert: &mut RegDiversions,
+        _sink: &mut dyn binemit::CodeSink,
+    ) {
+        panic!("Should not be called when new-style backend is available!")
+    }
+
+    /// Emit a whole function into memory.
+    fn emit_function_to_memory(&self, _func: &ir::Function, _sink: &mut binemit::MemoryCodeSink) {
+        panic!("Should not be called when new-style backend is available!")
+    }
+
+    fn get_mach_backend(&self) -> Option<&dyn MachBackend> {
+        Some(&*self.backend)
+    }
+
+    fn unsigned_add_overflow_condition(&self) -> ir::condcodes::IntCC {
+        self.backend.unsigned_add_overflow_condition()
+    }
+
+    fn unsigned_sub_overflow_condition(&self) -> ir::condcodes::IntCC {
+        self.backend.unsigned_sub_overflow_condition()
+    }
+}
diff --git a/cranelift/codegen/src/machinst/blockorder.rs b/cranelift/codegen/src/machinst/blockorder.rs
new file mode 100644
index 0000000000..bfd4bf665a
--- /dev/null
+++ b/cranelift/codegen/src/machinst/blockorder.rs
@@ -0,0 +1,59 @@
+//! Computation of basic block order in emitted code.
+
+use crate::machinst::*;
+
+/// Simple reverse postorder-based block order emission.
+///
+/// TODO: use a proper algorithm, such as the bottom-up straight-line-section
+/// construction algorithm.
+struct BlockRPO {
+    visited: Vec<bool>,
+    postorder: Vec<BlockIndex>,
+    deferred_last: Option<BlockIndex>,
+}
+
+impl BlockRPO {
+    fn new<I: VCodeInst>(vcode: &VCode<I>) -> BlockRPO {
+        BlockRPO {
+            visited: vec![false; vcode.num_blocks()],
+            postorder: vec![],
+            deferred_last: None,
+        }
+    }
+
+    fn visit<I: VCodeInst>(&mut self, vcode: &VCode<I>, block: BlockIndex) {
+        self.visited[block as usize] = true;
+        for succ in vcode.succs(block) {
+            if !self.visited[*succ as usize] {
+                self.visit(vcode, *succ);
+            }
+        }
+
+        let (start, end) = &vcode.block_ranges[block as usize];
+        for i in *start..*end {
+            if vcode.insts[i as usize].is_epilogue_placeholder() {
+                debug_assert!(self.deferred_last.is_none());
+                self.deferred_last = Some(block);
+                return;
+            }
+        }
+
+        self.postorder.push(block);
+    }
+
+    fn rpo(self) -> Vec<BlockIndex> {
+        let mut rpo = self.postorder;
+        rpo.reverse();
+        if let Some(block) = self.deferred_last {
+            rpo.push(block);
+        }
+        rpo
+    }
+}
+
+/// Compute the final block order.
+pub fn compute_final_block_order<I: VCodeInst>(vcode: &VCode<I>) -> Vec<BlockIndex> {
+    let mut rpo = BlockRPO::new(vcode);
+    rpo.visit(vcode, vcode.entry());
+    rpo.rpo()
+}
diff --git a/cranelift/codegen/src/machinst/compile.rs b/cranelift/codegen/src/machinst/compile.rs
new file mode 100644
index 0000000000..458db9ea36
--- /dev/null
+++ b/cranelift/codegen/src/machinst/compile.rs
@@ -0,0 +1,76 @@
+//! Compilation backend pipeline: optimized IR to VCode / binemit.
+
+use crate::ir::Function;
+use crate::machinst::*;
+use crate::settings;
+use crate::timing;
+
+use log::debug;
+use regalloc::{allocate_registers, RegAllocAlgorithm};
+use std::env;
+
+/// Compile the given function down to VCode with allocated registers, ready
+/// for binary emission.
+pub fn compile<B: LowerBackend>(
+    f: &mut Function,
+    b: &B,
+    abi: Box<dyn ABIBody<B::MInst>>,
+    flags: &settings::Flags,
+) -> VCode<B::MInst>
+where
+    B::MInst: ShowWithRRU,
+{
+    // This lowers the CL IR.
+    let mut vcode = Lower::new(f, abi).lower(b);
+
+    let universe = &B::MInst::reg_universe();
+
+    debug!("vcode from lowering: \n{}", vcode.show_rru(Some(universe)));
+
+    // Perform register allocation.
+    let algorithm = match env::var("REGALLOC") {
+        Ok(str) => match str.as_str() {
+            "lsrac" => RegAllocAlgorithm::LinearScanChecked,
+            "lsra" => RegAllocAlgorithm::LinearScan,
+            // to wit: btc doesn't mean "bitcoin" here
+            "btc" => RegAllocAlgorithm::BacktrackingChecked,
+            _ => RegAllocAlgorithm::Backtracking,
+        },
+        // By default use backtracking, which is the fastest.
+        Err(_) => RegAllocAlgorithm::Backtracking,
+    };
+
+    let result = {
+        let _tt = timing::regalloc();
+        allocate_registers(
+            &mut vcode, algorithm, universe, /*request_block_annotations=*/ false,
+        )
+        .map_err(|err| {
+            debug!(
+                "Register allocation error for vcode\n{}\nError: {:?}",
+                vcode.show_rru(Some(universe)),
+                err
+            );
+            err
+        })
+        .expect("register allocation")
+    };
+
+    // Reorder vcode into final order and copy out final instruction sequence
+    // all at once. This also inserts prologues/epilogues.
+    vcode.replace_insns_from_regalloc(result, flags);
+
+    vcode.remove_redundant_branches();
+
+    // Do final passes over code to finalize branches.
+    vcode.finalize_branches();
+
+    debug!(
+        "vcode after regalloc: final version:\n{}",
+        vcode.show_rru(Some(universe))
+    );
+
+    //println!("{}\n", vcode.show_rru(Some(&B::MInst::reg_universe())));
+
+    vcode
+}
diff --git a/cranelift/codegen/src/machinst/lower.rs b/cranelift/codegen/src/machinst/lower.rs
new file mode 100644
index 0000000000..2165416ebc
--- /dev/null
+++ b/cranelift/codegen/src/machinst/lower.rs
@@ -0,0 +1,723 @@
+//! This module implements lowering (instruction selection) from Cranelift IR
+//! to machine instructions with virtual registers. This is *almost* the final
+//! machine code, except for register allocation.
+
+use crate::binemit::CodeSink;
+use crate::dce::has_side_effect;
+use crate::entity::SecondaryMap;
+use crate::ir::{
+    Block, ExternalName, Function, GlobalValueData, Inst, InstructionData, MemFlags, Opcode,
+    Signature, SourceLoc, Type, Value, ValueDef,
+};
+use crate::isa::registers::RegUnit;
+use crate::machinst::{
+    ABIBody, BlockIndex, MachInst, MachInstEmit, VCode, VCodeBuilder, VCodeInst,
+};
+use crate::num_uses::NumUses;
+
+use regalloc::Function as RegallocFunction;
+use regalloc::{RealReg, Reg, RegClass, Set, VirtualReg, Writable};
+
+use alloc::boxed::Box;
+use alloc::vec::Vec;
+use log::debug;
+use smallvec::SmallVec;
+use std::collections::VecDeque;
+use std::ops::Range;
+
+/// A context that machine-specific lowering code can use to emit lowered instructions. This is the
+/// view of the machine-independent per-function lowering context that is seen by the machine
+/// backend.
+pub trait LowerCtx<I> {
+    /// Get the instdata for a given IR instruction.
+    fn data(&self, ir_inst: Inst) -> &InstructionData;
+    /// Get the controlling type for a polymorphic IR instruction.
+    fn ty(&self, ir_inst: Inst) -> Type;
+    /// Emit a machine instruction.
+    fn emit(&mut self, mach_inst: I);
+    /// Indicate that an IR instruction has been merged, and so one of its
+    /// uses is gone (replaced by uses of the instruction's inputs). This
+    /// helps the lowering algorithm to perform on-the-fly DCE, skipping over
+    /// unused instructions (such as immediates incorporated directly).
+    fn merged(&mut self, from_inst: Inst);
+    /// Get the producing instruction, if any, and output number, for the `idx`th input to the
+    /// given IR instruction
+    fn input_inst(&self, ir_inst: Inst, idx: usize) -> Option<(Inst, usize)>;
+    /// Map a Value to its associated writable (probably virtual) Reg.
+    fn value_to_writable_reg(&self, val: Value) -> Writable<Reg>;
+    /// Map a Value to its associated (probably virtual) Reg.
+    fn value_to_reg(&self, val: Value) -> Reg;
+    /// Get the `idx`th input to the given IR instruction as a virtual register.
+    fn input(&self, ir_inst: Inst, idx: usize) -> Reg;
+    /// Get the `idx`th output of the given IR instruction as a virtual register.
+    fn output(&self, ir_inst: Inst, idx: usize) -> Writable<Reg>;
+    /// Get the number of inputs to the given IR instruction.
+    fn num_inputs(&self, ir_inst: Inst) -> usize;
+    /// Get the number of outputs to the given IR instruction.
+    fn num_outputs(&self, ir_inst: Inst) -> usize;
+    /// Get the type for an instruction's input.
+    fn input_ty(&self, ir_inst: Inst, idx: usize) -> Type;
+    /// Get the type for an instruction's output.
+    fn output_ty(&self, ir_inst: Inst, idx: usize) -> Type;
+    /// Get a new temp.
+    fn tmp(&mut self, rc: RegClass, ty: Type) -> Writable<Reg>;
+    /// Get the number of block params.
+    fn num_bb_params(&self, bb: Block) -> usize;
+    /// Get the register for a block param.
+    fn bb_param(&self, bb: Block, idx: usize) -> Reg;
+    /// Get the register for a return value.
+    fn retval(&self, idx: usize) -> Writable<Reg>;
+    /// Get the target for a call instruction, as an `ExternalName`.
+    fn call_target<'b>(&'b self, ir_inst: Inst) -> Option<&'b ExternalName>;
+    /// Get the signature for a call or call-indirect instruction.
+    fn call_sig<'b>(&'b self, ir_inst: Inst) -> Option<&'b Signature>;
+    /// Get the symbol name and offset for a symbol_value instruction.
+    fn symbol_value<'b>(&'b self, ir_inst: Inst) -> Option<(&'b ExternalName, i64)>;
+    /// Returns the memory flags of a given memory access.
+    fn memflags(&self, ir_inst: Inst) -> Option<MemFlags>;
+    /// Get the source location for a given instruction.
+    fn srcloc(&self, ir_inst: Inst) -> SourceLoc;
+}
+
+/// A machine backend.
+pub trait LowerBackend {
+    /// The machine instruction type.
+    type MInst: VCodeInst;
+
+    /// Lower a single instruction. Instructions are lowered in reverse order.
+    /// This function need not handle branches; those are always passed to
+    /// `lower_branch_group` below.
+    fn lower<C: LowerCtx<Self::MInst>>(&self, ctx: &mut C, inst: Inst);
+
+    /// Lower a block-terminating group of branches (which together can be seen as one
+    /// N-way branch), given a vcode BlockIndex for each target.
+    fn lower_branch_group<C: LowerCtx<Self::MInst>>(
+        &self,
+        ctx: &mut C,
+        insts: &[Inst],
+        targets: &[BlockIndex],
+        fallthrough: Option<BlockIndex>,
+    );
+}
+
+/// Machine-independent lowering driver / machine-instruction container. Maintains a correspondence
+/// from original Inst to MachInsts.
+pub struct Lower<'a, I: VCodeInst> {
+    // The function to lower.
+    f: &'a Function,
+
+    // Lowered machine instructions.
+    vcode: VCodeBuilder<I>,
+
+    // Number of active uses (minus `dec_use()` calls by backend) of each instruction.
+    num_uses: SecondaryMap<Inst, u32>,
+
+    // Mapping from `Value` (SSA value in IR) to virtual register.
+    value_regs: SecondaryMap<Value, Reg>,
+
+    // Return-value vregs.
+    retval_regs: Vec<Reg>,
+
+    // Next virtual register number to allocate.
+    next_vreg: u32,
+}
+
+fn alloc_vreg(
+    value_regs: &mut SecondaryMap<Value, Reg>,
+    regclass: RegClass,
+    value: Value,
+    next_vreg: &mut u32,
+) -> VirtualReg {
+    if value_regs[value].get_index() == 0 {
+        // default value in map.
+        let v = *next_vreg;
+        *next_vreg += 1;
+        value_regs[value] = Reg::new_virtual(regclass, v);
+    }
+    value_regs[value].as_virtual_reg().unwrap()
+}
+
+enum GenerateReturn {
+    Yes,
+    No,
+}
+
+impl<'a, I: VCodeInst> Lower<'a, I> {
+    /// Prepare a new lowering context for the given IR function.
+    pub fn new(f: &'a Function, abi: Box<dyn ABIBody<I>>) -> Lower<'a, I> {
+        let mut vcode = VCodeBuilder::new(abi);
+
+        let num_uses = NumUses::compute(f).take_uses();
+
+        let mut next_vreg: u32 = 1;
+
+        // Default register should never be seen, but the `value_regs` map needs a default and we
+        // don't want to push `Option` everywhere. All values will be assigned registers by the
+        // loops over block parameters and instruction results below.
+        //
+        // We do not use vreg 0 so that we can detect any unassigned register that leaks through.
+        let default_register = Reg::new_virtual(RegClass::I32, 0);
+        let mut value_regs = SecondaryMap::with_default(default_register);
+
+        // Assign a vreg to each value.
+        for bb in f.layout.blocks() {
+            for param in f.dfg.block_params(bb) {
+                let vreg = alloc_vreg(
+                    &mut value_regs,
+                    I::rc_for_type(f.dfg.value_type(*param)),
+                    *param,
+                    &mut next_vreg,
+                );
+                vcode.set_vreg_type(vreg, f.dfg.value_type(*param));
+            }
+            for inst in f.layout.block_insts(bb) {
+                for result in f.dfg.inst_results(inst) {
+                    let vreg = alloc_vreg(
+                        &mut value_regs,
+                        I::rc_for_type(f.dfg.value_type(*result)),
+                        *result,
+                        &mut next_vreg,
+                    );
+                    vcode.set_vreg_type(vreg, f.dfg.value_type(*result));
+                }
+            }
+        }
+
+        // Assign a vreg to each return value.
+        let mut retval_regs = vec![];
+        for ret in &f.signature.returns {
+            let v = next_vreg;
+            next_vreg += 1;
+            let regclass = I::rc_for_type(ret.value_type);
+            let vreg = Reg::new_virtual(regclass, v);
+            retval_regs.push(vreg);
+            vcode.set_vreg_type(vreg.as_virtual_reg().unwrap(), ret.value_type);
+        }
+
+        Lower {
+            f,
+            vcode,
+            num_uses,
+            value_regs,
+            retval_regs,
+            next_vreg,
+        }
+    }
+
+    fn gen_arg_setup(&mut self) {
+        if let Some(entry_bb) = self.f.layout.entry_block() {
+            debug!(
+                "gen_arg_setup: entry BB {} args are:\n{:?}",
+                entry_bb,
+                self.f.dfg.block_params(entry_bb)
+            );
+            for (i, param) in self.f.dfg.block_params(entry_bb).iter().enumerate() {
+                let reg = Writable::from_reg(self.value_regs[*param]);
+                let insn = self.vcode.abi().gen_copy_arg_to_reg(i, reg);
+                self.vcode.push(insn);
+            }
+        }
+    }
+
+    fn gen_retval_setup(&mut self, gen_ret_inst: GenerateReturn) {
+        for (i, reg) in self.retval_regs.iter().enumerate() {
+            let insn = self.vcode.abi().gen_copy_reg_to_retval(i, *reg);
+            self.vcode.push(insn);
+        }
+        let inst = match gen_ret_inst {
+            GenerateReturn::Yes => self.vcode.abi().gen_ret(),
+            GenerateReturn::No => self.vcode.abi().gen_epilogue_placeholder(),
+        };
+        self.vcode.push(inst);
+    }
+
+    fn find_reachable_bbs(&self) -> SmallVec<[Block; 16]> {
+        if let Some(entry) = self.f.layout.entry_block() {
+            let mut ret = SmallVec::new();
+            let mut queue = VecDeque::new();
+            let mut visited = SecondaryMap::with_default(false);
+            queue.push_back(entry);
+            visited[entry] = true;
+            while !queue.is_empty() {
+                let b = queue.pop_front().unwrap();
+                ret.push(b);
+                let mut succs: SmallVec<[Block; 16]> = SmallVec::new();
+                for inst in self.f.layout.block_insts(b) {
+                    if self.f.dfg[inst].opcode().is_branch() {
+                        succs.extend(branch_targets(self.f, b, inst).into_iter());
+                    }
+                }
+                for succ in succs.into_iter() {
+                    if !visited[succ] {
+                        queue.push_back(succ);
+                        visited[succ] = true;
+                    }
+                }
+            }
+
+            ret
+        } else {
+            SmallVec::new()
+        }
+    }
+
+    /// Lower the function.
+    pub fn lower<B: LowerBackend<MInst = I>>(mut self, backend: &B) -> VCode<I> {
+        // Find all reachable blocks.
+        let mut bbs = self.find_reachable_bbs();
+        // Work backward (reverse block order, reverse through each block), skipping insns with zero
+        // uses.
+        bbs.reverse();
+
+        // This records a Block-to-BlockIndex map so that branch targets can be resolved.
+        let mut next_bindex = self.vcode.init_bb_map(&bbs[..]);
+
+        // Allocate a separate BlockIndex for each control-flow instruction so that we can create
+        // the edge blocks later. Each entry for a control-flow inst is the edge block; the list
+        // has (cf-inst, edge block, orig block) tuples.
+        let mut edge_blocks_by_inst: SecondaryMap<Inst, Vec<BlockIndex>> =
+            SecondaryMap::with_default(vec![]);
+        let mut edge_blocks: Vec<(Inst, BlockIndex, Block)> = vec![];
+
+        debug!("about to lower function: {:?}", self.f);
+        debug!("bb map: {:?}", self.vcode.blocks_by_bb());
+
+        for bb in bbs.iter() {
+            for inst in self.f.layout.block_insts(*bb) {
+                let op = self.f.dfg[inst].opcode();
+                if op.is_branch() {
+                    // Find the original target.
+                    let mut add_succ = |next_bb| {
+                        let edge_block = next_bindex;
+                        next_bindex += 1;
+                        edge_blocks_by_inst[inst].push(edge_block);
+                        edge_blocks.push((inst, edge_block, next_bb));
+                    };
+                    for succ in branch_targets(self.f, *bb, inst).into_iter() {
+                        add_succ(succ);
+                    }
+                }
+            }
+        }
+
+        for bb in bbs.iter() {
+            debug!("lowering bb: {}", bb);
+
+            // If this is a return block, produce the return value setup.
+            let last_insn = self.f.layout.block_insts(*bb).last().unwrap();
+            let last_insn_opcode = self.f.dfg[last_insn].opcode();
+            if last_insn_opcode.is_return() {
+                let gen_ret = if last_insn_opcode == Opcode::Return {
+                    GenerateReturn::Yes
+                } else {
+                    debug_assert!(last_insn_opcode == Opcode::FallthroughReturn);
+                    GenerateReturn::No
+                };
+                self.gen_retval_setup(gen_ret);
+                self.vcode.end_ir_inst();
+            }
+
+            // Find the branches at the end first, and process those, if any.
+            let mut branches: SmallVec<[Inst; 2]> = SmallVec::new();
+            let mut targets: SmallVec<[BlockIndex; 2]> = SmallVec::new();
+
+            for inst in self.f.layout.block_insts(*bb).rev() {
+                debug!("lower: inst {}", inst);
+                if edge_blocks_by_inst[inst].len() > 0 {
+                    branches.push(inst);
+                    for target in edge_blocks_by_inst[inst].iter().rev().cloned() {
+                        targets.push(target);
+                    }
+                } else {
+                    // We've reached the end of the branches -- process all as a group, first.
+                    if branches.len() > 0 {
+                        let fallthrough = self.f.layout.next_block(*bb);
+                        let fallthrough = fallthrough.map(|bb| self.vcode.bb_to_bindex(bb));
+                        branches.reverse();
+                        targets.reverse();
+                        debug!(
+                            "lower_branch_group: targets = {:?} branches = {:?}",
+                            targets, branches
+                        );
+                        backend.lower_branch_group(
+                            &mut self,
+                            &branches[..],
+                            &targets[..],
+                            fallthrough,
+                        );
+                        self.vcode.end_ir_inst();
+                        branches.clear();
+                        targets.clear();
+                    }
+
+                    // Only codegen an instruction if it either has a side
+                    // effect, or has at least one use of one of its results.
+                    let num_uses = self.num_uses[inst];
+                    let side_effect = has_side_effect(self.f, inst);
+                    if side_effect || num_uses > 0 {
+                        backend.lower(&mut self, inst);
+                        self.vcode.end_ir_inst();
+                    } else {
+                        // If we're skipping the instruction, we need to dec-ref
+                        // its arguments.
+                        for arg in self.f.dfg.inst_args(inst) {
+                            let val = self.f.dfg.resolve_aliases(*arg);
+                            match self.f.dfg.value_def(val) {
+                                ValueDef::Result(src_inst, _) => {
+                                    self.dec_use(src_inst);
+                                }
+                                _ => {}
+                            }
+                        }
+                    }
+                }
+            }
+
+            // There are possibly some branches left if the block contained only branches.
+            if branches.len() > 0 {
+                let fallthrough = self.f.layout.next_block(*bb);
+                let fallthrough = fallthrough.map(|bb| self.vcode.bb_to_bindex(bb));
+                branches.reverse();
+                targets.reverse();
+                debug!(
+                    "lower_branch_group: targets = {:?} branches = {:?}",
+                    targets, branches
+                );
+                backend.lower_branch_group(&mut self, &branches[..], &targets[..], fallthrough);
+                self.vcode.end_ir_inst();
+                branches.clear();
+                targets.clear();
+            }
+
+            // If this is the entry block, produce the argument setup.
+            if Some(*bb) == self.f.layout.entry_block() {
+                self.gen_arg_setup();
+                self.vcode.end_ir_inst();
+            }
+
+            let vcode_bb = self.vcode.end_bb();
+            debug!("finished building bb: BlockIndex {}", vcode_bb);
+            debug!("bb_to_bindex map says: {}", self.vcode.bb_to_bindex(*bb));
+            assert!(vcode_bb == self.vcode.bb_to_bindex(*bb));
+            if Some(*bb) == self.f.layout.entry_block() {
+                self.vcode.set_entry(vcode_bb);
+            }
+        }
+
+        // Now create the edge blocks, with phi lowering (block parameter copies).
+        for (inst, edge_block, orig_block) in edge_blocks.into_iter() {
+            debug!(
+                "creating edge block: inst {}, edge_block {}, orig_block {}",
+                inst, edge_block, orig_block
+            );
+
+            // Create a temporary for each block parameter.
+            let phi_classes: Vec<(Type, RegClass)> = self
+                .f
+                .dfg
+                .block_params(orig_block)
+                .iter()
+                .map(|p| self.f.dfg.value_type(*p))
+                .map(|ty| (ty, I::rc_for_type(ty)))
+                .collect();
+
+            // FIXME sewardj 2020Feb29: use SmallVec
+            let mut src_regs = vec![];
+            let mut dst_regs = vec![];
+
+            // Create all of the phi uses (reads) from jump args to temps.
+
+            // Round up all the source and destination regs
+            for (i, arg) in self.f.dfg.inst_variable_args(inst).iter().enumerate() {
+                let arg = self.f.dfg.resolve_aliases(*arg);
+                debug!("jump arg {} is {}", i, arg);
+                src_regs.push(self.value_regs[arg]);
+            }
+            for (i, param) in self.f.dfg.block_params(orig_block).iter().enumerate() {
+                debug!("bb arg {} is {}", i, param);
+                dst_regs.push(Writable::from_reg(self.value_regs[*param]));
+            }
+            debug_assert!(src_regs.len() == dst_regs.len());
+            debug_assert!(phi_classes.len() == dst_regs.len());
+
+            // If, as is mostly the case, the source and destination register
+            // sets are non overlapping, then we can copy directly, so as to
+            // save the register allocator work.
+            if !Set::<Reg>::from_vec(src_regs.clone()).intersects(&Set::<Reg>::from_vec(
+                dst_regs.iter().map(|r| r.to_reg()).collect(),
+            )) {
+                for (dst_reg, (src_reg, (ty, _))) in
+                    dst_regs.iter().zip(src_regs.iter().zip(phi_classes))
+                {
+                    self.vcode.push(I::gen_move(*dst_reg, *src_reg, ty));
+                }
+            } else {
+                // There's some overlap, so play safe and copy via temps.
+
+                let tmp_regs: Vec<Writable<Reg>> = phi_classes
+                    .iter()
+                    .map(|&(ty, rc)| self.tmp(rc, ty)) // borrows `self` mutably.
+                    .collect();
+
+                debug!("phi_temps = {:?}", tmp_regs);
+                debug_assert!(tmp_regs.len() == src_regs.len());
+
+                for (tmp_reg, (src_reg, &(ty, _))) in
+                    tmp_regs.iter().zip(src_regs.iter().zip(phi_classes.iter()))
+                {
+                    self.vcode.push(I::gen_move(*tmp_reg, *src_reg, ty));
+                }
+                for (dst_reg, (tmp_reg, &(ty, _))) in
+                    dst_regs.iter().zip(tmp_regs.iter().zip(phi_classes.iter()))
+                {
+                    self.vcode.push(I::gen_move(*dst_reg, tmp_reg.to_reg(), ty));
+                }
+            }
+
+            // Create the unconditional jump to the original target block.
+            self.vcode
+                .push(I::gen_jump(self.vcode.bb_to_bindex(orig_block)));
+
+            // End the IR inst and block. (We lower this as if it were one IR instruction so that
+            // we can emit machine instructions in forward order.)
+            self.vcode.end_ir_inst();
+            let blocknum = self.vcode.end_bb();
+            assert!(blocknum == edge_block);
+        }
+
+        // Now that we've emitted all instructions into the VCodeBuilder, let's build the VCode.
+        self.vcode.build()
+    }
+
+    /// Reduce the use-count of an IR instruction. Use this when, e.g., isel incorporates the
+    /// computation of an input instruction directly, so that input instruction has one
+    /// fewer use.
+    fn dec_use(&mut self, ir_inst: Inst) {
+        assert!(self.num_uses[ir_inst] > 0);
+        self.num_uses[ir_inst] -= 1;
+        debug!(
+            "incref: ir_inst {} now has {} uses",
+            ir_inst, self.num_uses[ir_inst]
+        );
+    }
+
+    /// Increase the use-count of an IR instruction. Use this when, e.g., isel incorporates
+    /// the computation of an input instruction directly, so that input instruction's
+    /// inputs are now used directly by the merged instruction.
+    fn inc_use(&mut self, ir_inst: Inst) {
+        self.num_uses[ir_inst] += 1;
+        debug!(
+            "decref: ir_inst {} now has {} uses",
+            ir_inst, self.num_uses[ir_inst]
+        );
+    }
+}
+
+impl<'a, I: VCodeInst> LowerCtx<I> for Lower<'a, I> {
+    /// Get the instdata for a given IR instruction.
+    fn data(&self, ir_inst: Inst) -> &InstructionData {
+        &self.f.dfg[ir_inst]
+    }
+
+    /// Get the controlling type for a polymorphic IR instruction.
+    fn ty(&self, ir_inst: Inst) -> Type {
+        self.f.dfg.ctrl_typevar(ir_inst)
+    }
+
+    /// Emit a machine instruction.
+    fn emit(&mut self, mach_inst: I) {
+        self.vcode.push(mach_inst);
+    }
+
+    /// Indicate that a merge has occurred.
+    fn merged(&mut self, from_inst: Inst) {
+        debug!("merged: inst {}", from_inst);
+        // First, inc-ref all inputs of `from_inst`, because they are now used
+        // directly by `into_inst`.
+        for arg in self.f.dfg.inst_args(from_inst) {
+            let arg = self.f.dfg.resolve_aliases(*arg);
+            match self.f.dfg.value_def(arg) {
+                ValueDef::Result(src_inst, _) => {
+                    debug!(" -> inc-reffing src inst {}", src_inst);
+                    self.inc_use(src_inst);
+                }
+                _ => {}
+            }
+        }
+        // Then, dec-ref the merged instruction itself. It still retains references
+        // to its arguments (inc-ref'd above). If its refcount has reached zero,
+        // it will be skipped during emission and its args will be dec-ref'd at that
+        // time.
+        self.dec_use(from_inst);
+    }
+
+    /// Get the producing instruction, if any, and output number, for the `idx`th input to the
+    /// given IR instruction.
+    fn input_inst(&self, ir_inst: Inst, idx: usize) -> Option<(Inst, usize)> {
+        let val = self.f.dfg.inst_args(ir_inst)[idx];
+        let val = self.f.dfg.resolve_aliases(val);
+        match self.f.dfg.value_def(val) {
+            ValueDef::Result(src_inst, result_idx) => Some((src_inst, result_idx)),
+            _ => None,
+        }
+    }
+
+    /// Map a Value to its associated writable (probably virtual) Reg.
+    fn value_to_writable_reg(&self, val: Value) -> Writable<Reg> {
+        let val = self.f.dfg.resolve_aliases(val);
+        Writable::from_reg(self.value_regs[val])
+    }
+
+    /// Map a Value to its associated (probably virtual) Reg.
+    fn value_to_reg(&self, val: Value) -> Reg {
+        let val = self.f.dfg.resolve_aliases(val);
+        self.value_regs[val]
+    }
+
+    /// Get the `idx`th input to the given IR instruction as a virtual register.
+    fn input(&self, ir_inst: Inst, idx: usize) -> Reg {
+        let val = self.f.dfg.inst_args(ir_inst)[idx];
+        let val = self.f.dfg.resolve_aliases(val);
+        self.value_to_reg(val)
+    }
+
+    /// Get the `idx`th output of the given IR instruction as a virtual register.
+    fn output(&self, ir_inst: Inst, idx: usize) -> Writable<Reg> {
+        let val = self.f.dfg.inst_results(ir_inst)[idx];
+        self.value_to_writable_reg(val)
+    }
+
+    /// Get a new temp.
+    fn tmp(&mut self, rc: RegClass, ty: Type) -> Writable<Reg> {
+        let v = self.next_vreg;
+        self.next_vreg += 1;
+        let vreg = Reg::new_virtual(rc, v);
+        self.vcode.set_vreg_type(vreg.as_virtual_reg().unwrap(), ty);
+        Writable::from_reg(vreg)
+    }
+
+    /// Get the number of inputs for the given IR instruction.
+    fn num_inputs(&self, ir_inst: Inst) -> usize {
+        self.f.dfg.inst_args(ir_inst).len()
+    }
+
+    /// Get the number of outputs for the given IR instruction.
+    fn num_outputs(&self, ir_inst: Inst) -> usize {
+        self.f.dfg.inst_results(ir_inst).len()
+    }
+
+    /// Get the type for an instruction's input.
+    fn input_ty(&self, ir_inst: Inst, idx: usize) -> Type {
+        let val = self.f.dfg.inst_args(ir_inst)[idx];
+        let val = self.f.dfg.resolve_aliases(val);
+        self.f.dfg.value_type(val)
+    }
+
+    /// Get the type for an instruction's output.
+    fn output_ty(&self, ir_inst: Inst, idx: usize) -> Type {
+        self.f.dfg.value_type(self.f.dfg.inst_results(ir_inst)[idx])
+    }
+
+    /// Get the number of block params.
+    fn num_bb_params(&self, bb: Block) -> usize {
+        self.f.dfg.block_params(bb).len()
+    }
+
+    /// Get the register for a block param.
+    fn bb_param(&self, bb: Block, idx: usize) -> Reg {
+        let val = self.f.dfg.block_params(bb)[idx];
+        self.value_regs[val]
+    }
+
+    /// Get the register for a return value.
+    fn retval(&self, idx: usize) -> Writable<Reg> {
+        Writable::from_reg(self.retval_regs[idx])
+    }
+
+    /// Get the target for a call instruction, as an `ExternalName`.
+    fn call_target<'b>(&'b self, ir_inst: Inst) -> Option<&'b ExternalName> {
+        match &self.f.dfg[ir_inst] {
+            &InstructionData::Call { func_ref, .. }
+            | &InstructionData::FuncAddr { func_ref, .. } => {
+                let funcdata = &self.f.dfg.ext_funcs[func_ref];
+                Some(&funcdata.name)
+            }
+            _ => None,
+        }
+    }
+    /// Get the signature for a call or call-indirect instruction.
+    fn call_sig<'b>(&'b self, ir_inst: Inst) -> Option<&'b Signature> {
+        match &self.f.dfg[ir_inst] {
+            &InstructionData::Call { func_ref, .. } => {
+                let funcdata = &self.f.dfg.ext_funcs[func_ref];
+                Some(&self.f.dfg.signatures[funcdata.signature])
+            }
+            &InstructionData::CallIndirect { sig_ref, .. } => Some(&self.f.dfg.signatures[sig_ref]),
+            _ => None,
+        }
+    }
+
+    /// Get the symbol name and offset for a symbol_value instruction.
+    fn symbol_value<'b>(&'b self, ir_inst: Inst) -> Option<(&'b ExternalName, i64)> {
+        match &self.f.dfg[ir_inst] {
+            &InstructionData::UnaryGlobalValue { global_value, .. } => {
+                let gvdata = &self.f.global_values[global_value];
+                match gvdata {
+                    &GlobalValueData::Symbol {
+                        ref name,
+                        ref offset,
+                        ..
+                    } => {
+                        let offset = offset.bits();
+                        Some((name, offset))
+                    }
+                    _ => None,
+                }
+            }
+            _ => None,
+        }
+    }
+
+    /// Returns the memory flags of a given memory access.
+    fn memflags(&self, ir_inst: Inst) -> Option<MemFlags> {
+        match &self.f.dfg[ir_inst] {
+            &InstructionData::Load { flags, .. }
+            | &InstructionData::LoadComplex { flags, .. }
+            | &InstructionData::Store { flags, .. }
+            | &InstructionData::StoreComplex { flags, .. } => Some(flags),
+            _ => None,
+        }
+    }
+
+    /// Get the source location for a given instruction.
+    fn srcloc(&self, ir_inst: Inst) -> SourceLoc {
+        self.f.srclocs[ir_inst]
+    }
+}
+
+fn branch_targets(f: &Function, block: Block, inst: Inst) -> SmallVec<[Block; 16]> {
+    let mut ret = SmallVec::new();
+    if f.dfg[inst].opcode() == Opcode::Fallthrough {
+        ret.push(f.layout.next_block(block).unwrap());
+    } else {
+        match &f.dfg[inst] {
+            &InstructionData::Jump { destination, .. }
+            | &InstructionData::Branch { destination, .. }
+            | &InstructionData::BranchInt { destination, .. }
+            | &InstructionData::BranchIcmp { destination, .. }
+            | &InstructionData::BranchFloat { destination, .. } => {
+                ret.push(destination);
+            }
+            &InstructionData::BranchTable {
+                destination, table, ..
+            } => {
+                ret.push(destination);
+                for dest in f.jump_tables[table].as_slice() {
+                    ret.push(*dest);
+                }
+            }
+            _ => {}
+        }
+    }
+    ret
+}
diff --git a/cranelift/codegen/src/machinst/mod.rs b/cranelift/codegen/src/machinst/mod.rs
new file mode 100644
index 0000000000..93c9126b32
--- /dev/null
+++ b/cranelift/codegen/src/machinst/mod.rs
@@ -0,0 +1,288 @@
+//! This module exposes the machine-specific backend definition pieces.
+//!
+//! The MachInst infrastructure is the compiler backend, from CLIF
+//! (ir::Function) to machine code. The purpose of this infrastructure is, at a
+//! high level, to do instruction selection/lowering (to machine instructions),
+//! register allocation, and then perform all the fixups to branches, constant
+//! data references, etc., needed to actually generate machine code.
+//!
+//! The container for machine instructions, at various stages of construction,
+//! is the `VCode` struct. We refer to a sequence of machine instructions organized
+//! into basic blocks as "vcode". This is short for "virtual-register code", though
+//! it's a bit of a misnomer because near the end of the pipeline, vcode has all
+//! real registers. Nevertheless, the name is catchy and we like it.
+//!
+//! The compilation pipeline, from an `ir::Function` (already optimized as much as
+//! you like by machine-independent optimization passes) onward, is as follows.
+//! (N.B.: though we show the VCode separately at each stage, the passes
+//! mutate the VCode in place; these are not separate copies of the code.)
+//!
+//! |    ir::Function                (SSA IR, machine-independent opcodes)
+//! |        |
+//! |        |  [lower]
+//! |        |
+//! |    VCode<arch_backend::Inst>   (machine instructions:
+//! |        |                        - mostly virtual registers.
+//! |        |                        - cond branches in two-target form.
+//! |        |                        - branch targets are block indices.
+//! |        |                        - in-memory constants held by insns,
+//! |        |                          with unknown offsets.
+//! |        |                        - critical edges (actually all edges)
+//! |        |                          are split.)
+//! |        | [regalloc]
+//! |        |
+//! |    VCode<arch_backend::Inst>   (machine instructions:
+//! |        |                        - all real registers.
+//! |        |                        - new instruction sequence returned
+//! |        |                          out-of-band in RegAllocResult.
+//! |        |                        - instruction sequence has spills,
+//! |        |                          reloads, and moves inserted.
+//! |        |                        - other invariants same as above.)
+//! |        |
+//! |        | [preamble/postamble]
+//! |        |
+//! |    VCode<arch_backend::Inst>   (machine instructions:
+//! |        |                        - stack-frame size known.
+//! |        |                        - out-of-band instruction sequence
+//! |        |                          has preamble prepended to entry
+//! |        |                          block, and postamble injected before
+//! |        |                          every return instruction.
+//! |        |                        - all symbolic stack references to
+//! |        |                          stackslots and spillslots are resolved
+//! |        |                          to concrete FP-offset mem addresses.)
+//! |        | [block/insn ordering]
+//! |        |
+//! |    VCode<arch_backend::Inst>   (machine instructions:
+//! |        |                        - vcode.final_block_order is filled in.
+//! |        |                        - new insn sequence from regalloc is
+//! |        |                          placed back into vcode and block
+//! |        |                          boundaries are updated.)
+//! |        | [redundant branch/block
+//! |        |  removal]
+//! |        |
+//! |    VCode<arch_backend::Inst>   (machine instructions:
+//! |        |                        - all blocks that were just an
+//! |        |                          unconditional branch are removed.)
+//! |        |
+//! |        | [branch finalization
+//! |        |  (fallthroughs)]
+//! |        |
+//! |    VCode<arch_backend::Inst>   (machine instructions:
+//! |        |                        - all branches are in lowered one-
+//! |        |                          target form, but targets are still
+//! |        |                          block indices.)
+//! |        |
+//! |        | [branch finalization
+//! |        |  (offsets)]
+//! |        |
+//! |    VCode<arch_backend::Inst>   (machine instructions:
+//! |        |                        - all branch offsets from start of
+//! |        |                          function are known, and all branches
+//! |        |                          have resolved-offset targets.)
+//! |        |
+//! |        | [MemArg finalization]
+//! |        |
+//! |    VCode<arch_backend::Inst>   (machine instructions:
+//! |        |                        - all MemArg references to the constant
+//! |        |                          pool are replaced with offsets.
+//! |        |                        - all constant-pool data is collected
+//! |        |                          in the VCode.)
+//! |        |
+//! |        | [binary emission]
+//! |        |
+//! |    Vec<u8>                     (machine code!)
+//! |
+
+#![allow(unused_imports)]
+
+use crate::binemit::{
+    CodeInfo, CodeOffset, CodeSink, MemoryCodeSink, RelocSink, StackmapSink, TrapSink,
+};
+use crate::entity::EntityRef;
+use crate::entity::SecondaryMap;
+use crate::ir::condcodes::IntCC;
+use crate::ir::ValueLocations;
+use crate::ir::{DataFlowGraph, Function, Inst, Opcode, Type, Value};
+use crate::isa::RegUnit;
+use crate::result::CodegenResult;
+use crate::settings::Flags;
+use crate::HashMap;
+use alloc::boxed::Box;
+use alloc::vec::Vec;
+use core::fmt::Debug;
+use core::iter::Sum;
+use regalloc::Map as RegallocMap;
+use regalloc::RegUsageCollector;
+use regalloc::{RealReg, RealRegUniverse, Reg, RegClass, SpillSlot, VirtualReg, Writable};
+use smallvec::SmallVec;
+use std::hash::Hash;
+use std::string::String;
+use target_lexicon::Triple;
+
+pub mod lower;
+pub use lower::*;
+pub mod vcode;
+pub use vcode::*;
+pub mod compile;
+pub use compile::*;
+pub mod blockorder;
+pub use blockorder::*;
+pub mod abi;
+pub use abi::*;
+pub mod pp;
+pub use pp::*;
+pub mod sections;
+pub use sections::*;
+pub mod adapter;
+pub use adapter::*;
+
+/// A machine instruction.
+pub trait MachInst: Clone + Debug {
+    /// Return the registers referenced by this machine instruction along with
+    /// the modes of reference (use, def, modify).
+    fn get_regs(&self, collector: &mut RegUsageCollector);
+
+    /// Map virtual registers to physical registers using the given virt->phys
+    /// maps corresponding to the program points prior to, and after, this instruction.
+    fn map_regs(
+        &mut self,
+        pre_map: &RegallocMap<VirtualReg, RealReg>,
+        post_map: &RegallocMap<VirtualReg, RealReg>,
+    );
+
+    /// If this is a simple move, return the (source, destination) tuple of registers.
+    fn is_move(&self) -> Option<(Writable<Reg>, Reg)>;
+
+    /// Is this a terminator (branch or ret)? If so, return its type
+    /// (ret/uncond/cond) and target if applicable.
+    fn is_term<'a>(&'a self) -> MachTerminator<'a>;
+
+    /// Returns true if the instruction is an epilogue placeholder.
+    fn is_epilogue_placeholder(&self) -> bool;
+
+    /// Generate a move.
+    fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Self;
+
+    /// Generate a zero-length no-op.
+    fn gen_zero_len_nop() -> Self;
+
+    /// Possibly operate on a value directly in a spill-slot rather than a
+    /// register. Useful if the machine has register-memory instruction forms
+    /// (e.g., add directly from or directly to memory), like x86.
+    fn maybe_direct_reload(&self, reg: VirtualReg, slot: SpillSlot) -> Option<Self>;
+
+    /// Determine a register class to store the given CraneLift type.
+    fn rc_for_type(ty: Type) -> RegClass;
+
+    /// Generate a jump to another target. Used during lowering of
+    /// control flow.
+    fn gen_jump(target: BlockIndex) -> Self;
+
+    /// Generate a NOP. The `preferred_size` parameter allows the caller to
+    /// request a NOP of that size, or as close to it as possible. The machine
+    /// backend may return a NOP whose binary encoding is smaller than the
+    /// preferred size, but must not return a NOP that is larger. However,
+    /// the instruction must have a nonzero size.
+    fn gen_nop(preferred_size: usize) -> Self;
+
+    /// Rewrite block targets using the block-target map.
+    fn with_block_rewrites(&mut self, block_target_map: &[BlockIndex]);
+
+    /// Finalize branches once the block order (fallthrough) is known.
+    fn with_fallthrough_block(&mut self, fallthrough_block: Option<BlockIndex>);
+
+    /// Update instruction once block offsets are known.  These offsets are
+    /// relative to the beginning of the function. `targets` is indexed by
+    /// BlockIndex.
+    fn with_block_offsets(&mut self, my_offset: CodeOffset, targets: &[CodeOffset]);
+
+    /// Get the register universe for this backend.
+    fn reg_universe() -> RealRegUniverse;
+
+    /// Align a basic block offset (from start of function).  By default, no
+    /// alignment occurs.
+    fn align_basic_block(offset: CodeOffset) -> CodeOffset {
+        offset
+    }
+}
+
+/// Describes a block terminator (not call) in the vcode, when its branches
+/// have not yet been finalized (so a branch may have two targets).
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub enum MachTerminator<'a> {
+    /// Not a terminator.
+    None,
+    /// A return instruction.
+    Ret,
+    /// An unconditional branch to another block.
+    Uncond(BlockIndex),
+    /// A conditional branch to one of two other blocks.
+    Cond(BlockIndex, BlockIndex),
+    /// An indirect branch with known possible targets.
+    Indirect(&'a [BlockIndex]),
+}
+
+/// A trait describing the ability to encode a MachInst into binary machine code.
+pub trait MachInstEmit<O: MachSectionOutput> {
+    /// Emit the instruction.
+    fn emit(&self, code: &mut O);
+}
+
+/// The result of a `MachBackend::compile_function()` call. Contains machine
+/// code (as bytes) and a disassembly, if requested.
+pub struct MachCompileResult {
+    /// Machine code.
+    pub sections: MachSections,
+    /// Size of stack frame, in bytes.
+    pub frame_size: u32,
+    /// Disassembly, if requested.
+    pub disasm: Option<String>,
+}
+
+impl MachCompileResult {
+    /// Get a `CodeInfo` describing section sizes from this compilation result.
+    pub fn code_info(&self) -> CodeInfo {
+        let code_size = self.sections.total_size();
+        CodeInfo {
+            code_size,
+            jumptables_size: 0,
+            rodata_size: 0,
+            total_size: code_size,
+        }
+    }
+}
+
+/// Top-level machine backend trait, which wraps all monomorphized code and
+/// allows a virtual call from the machine-independent `Function::compile()`.
+pub trait MachBackend {
+    /// Compile the given function. Consumes the function.
+    fn compile_function(
+        &self,
+        func: Function,
+        want_disasm: bool,
+    ) -> CodegenResult<MachCompileResult>;
+
+    /// Return flags for this backend.
+    fn flags(&self) -> &Flags;
+
+    /// Return triple for this backend.
+    fn triple(&self) -> Triple;
+
+    /// Return name for this backend.
+    fn name(&self) -> &'static str;
+
+    /// Return the register universe for this backend.
+    fn reg_universe(&self) -> RealRegUniverse;
+
+    /// Machine-specific condcode info needed by TargetIsa.
+    fn unsigned_add_overflow_condition(&self) -> IntCC {
+        // TODO: this is what x86 specifies. Is this right for arm64?
+        IntCC::UnsignedLessThan
+    }
+
+    /// Machine-specific condcode info needed by TargetIsa.
+    fn unsigned_sub_overflow_condition(&self) -> IntCC {
+        // TODO: this is what x86 specifies. Is this right for arm64?
+        IntCC::UnsignedLessThan
+    }
+}
diff --git a/cranelift/codegen/src/machinst/pp.rs b/cranelift/codegen/src/machinst/pp.rs
new file mode 100644
index 0000000000..40e7c1b842
--- /dev/null
+++ b/cranelift/codegen/src/machinst/pp.rs
@@ -0,0 +1,66 @@
+//! Pretty-printing for machine code (virtual-registerized or final).
+
+use regalloc::{RealRegUniverse, Reg, Writable};
+
+use std::fmt::Debug;
+use std::hash::Hash;
+use std::string::{String, ToString};
+
+// FIXME: Should this go into regalloc.rs instead?
+
+/// A trait for printing instruction bits and pieces, with the the ability to
+/// take a contextualising RealRegUniverse that is used to give proper names to
+/// registers.
+pub trait ShowWithRRU {
+    /// Return a string that shows the implementing object in context of the
+    /// given `RealRegUniverse`, if provided.
+    fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String;
+
+    /// The same as |show_rru|, but with an optional hint giving a size in
+    /// bytes.  Its interpretation is object-dependent, and it is intended to
+    /// pass around enough information to facilitate printing sub-parts of
+    /// real registers correctly.  Objects may ignore size hints that are
+    /// irrelevant to them.
+    fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, _size: u8) -> String {
+        // Default implementation is to ignore the hint.
+        self.show_rru(mb_rru)
+    }
+}
+
+impl ShowWithRRU for Reg {
+    fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
+        if self.is_real() {
+            if let Some(rru) = mb_rru {
+                let reg_ix = self.get_index();
+                if reg_ix < rru.regs.len() {
+                    return rru.regs[reg_ix].1.to_string();
+                } else {
+                    // We have a real reg which isn't listed in the universe.
+                    // Per the regalloc.rs interface requirements, this is
+                    // Totally Not Allowed.  Print it generically anyway, so
+                    // we have something to debug.
+                    return format!("!!{:?}!!", self);
+                }
+            }
+        }
+        // The reg is virtual, or we have no universe.  Be generic.
+        format!("%{:?}", self)
+    }
+
+    fn show_rru_sized(&self, _mb_rru: Option<&RealRegUniverse>, _size: u8) -> String {
+        // For the specific case of Reg, we demand not to have a size hint,
+        // since interpretation of the size is target specific, but this code
+        // is used by all targets.
+        panic!("Reg::show_rru_sized: impossible to implement");
+    }
+}
+
+impl<R: ShowWithRRU + Copy + Ord + Hash + Eq + Debug> ShowWithRRU for Writable<R> {
+    fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
+        self.to_reg().show_rru(mb_rru)
+    }
+
+    fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, size: u8) -> String {
+        self.to_reg().show_rru_sized(mb_rru, size)
+    }
+}
diff --git a/cranelift/codegen/src/machinst/sections.rs b/cranelift/codegen/src/machinst/sections.rs
new file mode 100644
index 0000000000..3e387239d0
--- /dev/null
+++ b/cranelift/codegen/src/machinst/sections.rs
@@ -0,0 +1,351 @@
+//! In-memory representation of compiled machine code, in multiple sections
+//! (text, constant pool / rodata, etc). Emission occurs into multiple sections
+//! simultaneously, so we buffer the result in memory and hand off to the
+//! caller at the end of compilation.
+
+use crate::binemit::{Addend, CodeOffset, CodeSink, Reloc, RelocSink, StackmapSink, TrapSink};
+use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode};
+
+use alloc::vec::Vec;
+
+/// A collection of sections with defined start-offsets.
+pub struct MachSections {
+    /// Sections, in offset order.
+    pub sections: Vec<MachSection>,
+}
+
+impl MachSections {
+    /// New, empty set of sections.
+    pub fn new() -> MachSections {
+        MachSections { sections: vec![] }
+    }
+
+    /// Add a section with a known offset and size. Returns the index.
+    pub fn add_section(&mut self, start: CodeOffset, length: CodeOffset) -> usize {
+        let idx = self.sections.len();
+        self.sections.push(MachSection::new(start, length));
+        idx
+    }
+
+    /// Mutably borrow the given section by index.
+    pub fn get_section<'a>(&'a mut self, idx: usize) -> &'a mut MachSection {
+        &mut self.sections[idx]
+    }
+
+    /// Get mutable borrows of two sections simultaneously. Used during
+    /// instruction emission to provide references to the .text and .rodata
+    /// (constant pool) sections.
+    pub fn two_sections<'a>(
+        &'a mut self,
+        idx1: usize,
+        idx2: usize,
+    ) -> (&'a mut MachSection, &'a mut MachSection) {
+        assert!(idx1 < idx2);
+        assert!(idx1 < self.sections.len());
+        assert!(idx2 < self.sections.len());
+        let (first, rest) = self.sections.split_at_mut(idx2);
+        (&mut first[idx1], &mut rest[0])
+    }
+
+    /// Emit this set of sections to a set of sinks for the code,
+    /// relocations, traps, and stackmap.
+    pub fn emit<CS: CodeSink>(&self, sink: &mut CS) {
+        // N.B.: we emit every section into the .text section as far as
+        // the `CodeSink` is concerned; we do not bother to segregate
+        // the contents into the actual program text, the jumptable and the
+        // rodata (constant pool). This allows us to generate code assuming
+        // that these will not be relocated relative to each other, and avoids
+        // having to designate each section as belonging in one of the three
+        // fixed categories defined by `CodeSink`. If this becomes a problem
+        // later (e.g. because of memory permissions or similar), we can
+        // add this designation and segregate the output; take care, however,
+        // to add the appropriate relocations in this case.
+
+        for section in &self.sections {
+            if section.data.len() > 0 {
+                while sink.offset() < section.start_offset {
+                    sink.put1(0);
+                }
+                section.emit(sink);
+            }
+        }
+        sink.begin_jumptables();
+        sink.begin_rodata();
+        sink.end_codegen();
+    }
+
+    /// Get the total required size for these sections.
+    pub fn total_size(&self) -> CodeOffset {
+        if self.sections.len() == 0 {
+            0
+        } else {
+            // Find the last non-empty section.
+            self.sections
+                .iter()
+                .rev()
+                .find(|s| s.data.len() > 0)
+                .map(|s| s.cur_offset_from_start())
+                .unwrap_or(0)
+        }
+    }
+}
+
+/// An abstraction over MachSection and MachSectionSize: some
+/// receiver of section data.
+pub trait MachSectionOutput {
+    /// Get the current offset from the start of all sections.
+    fn cur_offset_from_start(&self) -> CodeOffset;
+
+    /// Get the start offset of this section.
+    fn start_offset(&self) -> CodeOffset;
+
+    /// Add 1 byte to the section.
+    fn put1(&mut self, _: u8);
+
+    /// Add 2 bytes to the section.
+    fn put2(&mut self, value: u16) {
+        self.put1((value & 0xff) as u8);
+        self.put1(((value >> 8) & 0xff) as u8);
+    }
+
+    /// Add 4 bytes to the section.
+    fn put4(&mut self, value: u32) {
+        self.put1((value & 0xff) as u8);
+        self.put1(((value >> 8) & 0xff) as u8);
+        self.put1(((value >> 16) & 0xff) as u8);
+        self.put1(((value >> 24) & 0xff) as u8);
+    }
+
+    /// Add 8 bytes to the section.
+    fn put8(&mut self, value: u64) {
+        self.put1((value & 0xff) as u8);
+        self.put1(((value >> 8) & 0xff) as u8);
+        self.put1(((value >> 16) & 0xff) as u8);
+        self.put1(((value >> 24) & 0xff) as u8);
+        self.put1(((value >> 32) & 0xff) as u8);
+        self.put1(((value >> 40) & 0xff) as u8);
+        self.put1(((value >> 48) & 0xff) as u8);
+        self.put1(((value >> 56) & 0xff) as u8);
+    }
+
+    /// Add a slice of bytes to the section.
+    fn put_data(&mut self, data: &[u8]);
+
+    /// Add a relocation at the current offset.
+    fn add_reloc(&mut self, loc: SourceLoc, kind: Reloc, name: &ExternalName, addend: Addend);
+
+    /// Add a trap record at the current offset.
+    fn add_trap(&mut self, loc: SourceLoc, code: TrapCode);
+
+    /// Add a call return address record at the current offset.
+    fn add_call_site(&mut self, loc: SourceLoc, opcode: Opcode);
+
+    /// Align up to the given alignment.
+    fn align_to(&mut self, align_to: CodeOffset) {
+        assert!(align_to.is_power_of_two());
+        while self.cur_offset_from_start() & (align_to - 1) != 0 {
+            self.put1(0);
+        }
+    }
+}
+
+/// A section of output to be emitted to a CodeSink / RelocSink in bulk.
+/// Multiple sections may be created with known start offsets in advance; the
+/// usual use-case is to create the .text (code) and .rodata (constant pool) at
+/// once, after computing the length of the code, so that constant references
+/// can use known offsets as instructions are emitted.
+pub struct MachSection {
+    /// The starting offset of this section.
+    pub start_offset: CodeOffset,
+    /// The limit of this section, defined by the start of the next section.
+    pub length_limit: CodeOffset,
+    /// The section contents, as raw bytes.
+    pub data: Vec<u8>,
+    /// Any relocations referring to this section.
+    pub relocs: Vec<MachReloc>,
+    /// Any trap records referring to this section.
+    pub traps: Vec<MachTrap>,
+    /// Any call site record referring to this section.
+    pub call_sites: Vec<MachCallSite>,
+}
+
+impl MachSection {
+    /// Create a new section, known to start at `start_offset` and with a size limited to `length_limit`.
+    pub fn new(start_offset: CodeOffset, length_limit: CodeOffset) -> MachSection {
+        MachSection {
+            start_offset,
+            length_limit,
+            data: vec![],
+            relocs: vec![],
+            traps: vec![],
+            call_sites: vec![],
+        }
+    }
+
+    /// Emit this section to the CodeSink and other associated sinks.  The
+    /// current offset of the CodeSink must match the starting offset of this
+    /// section.
+    pub fn emit<CS: CodeSink>(&self, sink: &mut CS) {
+        assert!(sink.offset() == self.start_offset);
+
+        let mut next_reloc = 0;
+        let mut next_trap = 0;
+        let mut next_call_site = 0;
+        for (idx, byte) in self.data.iter().enumerate() {
+            if next_reloc < self.relocs.len() {
+                let reloc = &self.relocs[next_reloc];
+                if reloc.offset == idx as CodeOffset {
+                    sink.reloc_external(reloc.srcloc, reloc.kind, &reloc.name, reloc.addend);
+                    next_reloc += 1;
+                }
+            }
+            if next_trap < self.traps.len() {
+                let trap = &self.traps[next_trap];
+                if trap.offset == idx as CodeOffset {
+                    sink.trap(trap.code, trap.srcloc);
+                    next_trap += 1;
+                }
+            }
+            if next_call_site < self.call_sites.len() {
+                let call_site = &self.call_sites[next_call_site];
+                if call_site.ret_addr == idx as CodeOffset {
+                    sink.add_call_site(call_site.opcode, call_site.srcloc);
+                    next_call_site += 1;
+                }
+            }
+            sink.put1(*byte);
+        }
+    }
+}
+
+impl MachSectionOutput for MachSection {
+    fn cur_offset_from_start(&self) -> CodeOffset {
+        self.start_offset + self.data.len() as CodeOffset
+    }
+
+    fn start_offset(&self) -> CodeOffset {
+        self.start_offset
+    }
+
+    fn put1(&mut self, value: u8) {
+        assert!(((self.data.len() + 1) as CodeOffset) <= self.length_limit);
+        self.data.push(value);
+    }
+
+    fn put_data(&mut self, data: &[u8]) {
+        assert!(((self.data.len() + data.len()) as CodeOffset) <= self.length_limit);
+        self.data.extend_from_slice(data);
+    }
+
+    fn add_reloc(&mut self, srcloc: SourceLoc, kind: Reloc, name: &ExternalName, addend: Addend) {
+        let name = name.clone();
+        self.relocs.push(MachReloc {
+            offset: self.data.len() as CodeOffset,
+            srcloc,
+            kind,
+            name,
+            addend,
+        });
+    }
+
+    fn add_trap(&mut self, srcloc: SourceLoc, code: TrapCode) {
+        self.traps.push(MachTrap {
+            offset: self.data.len() as CodeOffset,
+            srcloc,
+            code,
+        });
+    }
+
+    fn add_call_site(&mut self, srcloc: SourceLoc, opcode: Opcode) {
+        self.call_sites.push(MachCallSite {
+            ret_addr: self.data.len() as CodeOffset,
+            srcloc,
+            opcode,
+        });
+    }
+}
+
+/// A MachSectionOutput implementation that records only size.
+pub struct MachSectionSize {
+    /// The starting offset of this section.
+    pub start_offset: CodeOffset,
+    /// The current offset of this section.
+    pub offset: CodeOffset,
+}
+
+impl MachSectionSize {
+    /// Create a new size-counting dummy section.
+    pub fn new(start_offset: CodeOffset) -> MachSectionSize {
+        MachSectionSize {
+            start_offset,
+            offset: start_offset,
+        }
+    }
+
+    /// Return the size this section would take if emitted with a real sink.
+    pub fn size(&self) -> CodeOffset {
+        self.offset - self.start_offset
+    }
+}
+
+impl MachSectionOutput for MachSectionSize {
+    fn cur_offset_from_start(&self) -> CodeOffset {
+        // All size-counting sections conceptually start at offset 0; this doesn't
+        // matter when counting code size.
+        self.offset
+    }
+
+    fn start_offset(&self) -> CodeOffset {
+        self.start_offset
+    }
+
+    fn put1(&mut self, _: u8) {
+        self.offset += 1;
+    }
+
+    fn put_data(&mut self, data: &[u8]) {
+        self.offset += data.len() as CodeOffset;
+    }
+
+    fn add_reloc(&mut self, _: SourceLoc, _: Reloc, _: &ExternalName, _: Addend) {}
+
+    fn add_trap(&mut self, _: SourceLoc, _: TrapCode) {}
+
+    fn add_call_site(&mut self, _: SourceLoc, _: Opcode) {}
+}
+
+/// A relocation resulting from a compilation.
+pub struct MachReloc {
+    /// The offset at which the relocation applies, *relative to the
+    /// containing section*.
+    pub offset: CodeOffset,
+    /// The original source location.
+    pub srcloc: SourceLoc,
+    /// The kind of relocation.
+    pub kind: Reloc,
+    /// The external symbol / name to which this relocation refers.
+    pub name: ExternalName,
+    /// The addend to add to the symbol value.
+    pub addend: i64,
+}
+
+/// A trap record resulting from a compilation.
+pub struct MachTrap {
+    /// The offset at which the trap instruction occurs, *relative to the
+    /// containing section*.
+    pub offset: CodeOffset,
+    /// The original source location.
+    pub srcloc: SourceLoc,
+    /// The trap code.
+    pub code: TrapCode,
+}
+
+/// A call site record resulting from a compilation.
+pub struct MachCallSite {
+    /// The offset of the call's return address, *relative to the containing section*.
+    pub ret_addr: CodeOffset,
+    /// The original source location.
+    pub srcloc: SourceLoc,
+    /// The call's opcode.
+    pub opcode: Opcode,
+}
diff --git a/cranelift/codegen/src/machinst/vcode.rs b/cranelift/codegen/src/machinst/vcode.rs
new file mode 100644
index 0000000000..64b1a4012a
--- /dev/null
+++ b/cranelift/codegen/src/machinst/vcode.rs
@@ -0,0 +1,738 @@
+//! This implements the VCode container: a CFG of Insts that have been lowered.
+//!
+//! VCode is virtual-register code. An instruction in VCode is almost a machine
+//! instruction; however, its register slots can refer to virtual registers in
+//! addition to real machine registers.
+//!
+//! VCode is structured with traditional basic blocks, and
+//! each block must be terminated by an unconditional branch (one target), a
+//! conditional branch (two targets), or a return (no targets). Note that this
+//! slightly differs from the machine code of most ISAs: in most ISAs, a
+//! conditional branch has one target (and the not-taken case falls through).
+//! However, we expect that machine backends will elide branches to the following
+//! block (i.e., zero-offset jumps), and will be able to codegen a branch-cond /
+//! branch-uncond pair if *both* targets are not fallthrough. This allows us to
+//! play with layout prior to final binary emission, as well, if we want.
+//!
+//! See the main module comment in `mod.rs` for more details on the VCode-based
+//! backend pipeline.
+
+use crate::binemit::Reloc;
+use crate::ir;
+use crate::machinst::*;
+use crate::settings;
+
+use regalloc::Function as RegallocFunction;
+use regalloc::Set as RegallocSet;
+use regalloc::{BlockIx, InstIx, Range, RegAllocResult, RegClass, RegUsageCollector};
+
+use alloc::boxed::Box;
+use alloc::vec::Vec;
+use log::debug;
+use smallvec::SmallVec;
+use std::fmt;
+use std::iter;
+use std::ops::Index;
+use std::string::String;
+
+/// Index referring to an instruction in VCode.
+pub type InsnIndex = u32;
+/// Index referring to a basic block in VCode.
+pub type BlockIndex = u32;
+
+/// VCodeInst wraps all requirements for a MachInst to be in VCode: it must be
+/// a `MachInst` and it must be able to emit itself at least to a `SizeCodeSink`.
+pub trait VCodeInst: MachInst + MachInstEmit<MachSection> + MachInstEmit<MachSectionSize> {}
+impl<I: MachInst + MachInstEmit<MachSection> + MachInstEmit<MachSectionSize>> VCodeInst for I {}
+
+/// A function in "VCode" (virtualized-register code) form, after lowering.
+/// This is essentially a standard CFG of basic blocks, where each basic block
+/// consists of lowered instructions produced by the machine-specific backend.
+pub struct VCode<I: VCodeInst> {
+    /// Function liveins.
+    liveins: RegallocSet<RealReg>,
+
+    /// Function liveouts.
+    liveouts: RegallocSet<RealReg>,
+
+    /// VReg IR-level types.
+    vreg_types: Vec<Type>,
+
+    /// Lowered machine instructions in order corresponding to the original IR.
+    pub insts: Vec<I>,
+
+    /// Entry block.
+    entry: BlockIndex,
+
+    /// Block instruction indices.
+    pub block_ranges: Vec<(InsnIndex, InsnIndex)>,
+
+    /// Block successors: index range in the successor-list below.
+    block_succ_range: Vec<(usize, usize)>,
+
+    /// Block successor lists, concatenated into one Vec. The `block_succ_range`
+    /// list of tuples above gives (start, end) ranges within this list that
+    /// correspond to each basic block's successors.
+    block_succs: Vec<BlockIndex>,
+
+    /// Block indices by IR block.
+    block_by_bb: SecondaryMap<ir::Block, BlockIndex>,
+
+    /// IR block for each VCode Block. The length of this Vec will likely be
+    /// less than the total number of Blocks, because new Blocks (for edge
+    /// splits, for example) are appended during lowering.
+    bb_by_block: Vec<ir::Block>,
+
+    /// Order of block IDs in final generated code.
+    final_block_order: Vec<BlockIndex>,
+
+    /// Final block offsets. Computed during branch finalization and used
+    /// during emission.
+    final_block_offsets: Vec<CodeOffset>,
+
+    /// Size of code, accounting for block layout / alignment.
+    code_size: CodeOffset,
+
+    /// ABI object.
+    abi: Box<dyn ABIBody<I>>,
+}
+
+/// A builder for a VCode function body. This builder is designed for the
+/// lowering approach that we take: we traverse basic blocks in forward
+/// (original IR) order, but within each basic block, we generate code from
+/// bottom to top; and within each IR instruction that we visit in this reverse
+/// order, we emit machine instructions in *forward* order again.
+///
+/// Hence, to produce the final instructions in proper order, we perform two
+/// swaps.  First, the machine instructions (`I` instances) are produced in
+/// forward order for an individual IR instruction. Then these are *reversed*
+/// and concatenated to `bb_insns` at the end of the IR instruction lowering.
+/// The `bb_insns` vec will thus contain all machine instructions for a basic
+/// block, in reverse order. Finally, when we're done with a basic block, we
+/// reverse the whole block's vec of instructions again, and concatenate onto
+/// the VCode's insts.
+pub struct VCodeBuilder<I: VCodeInst> {
+    /// In-progress VCode.
+    vcode: VCode<I>,
+
+    /// Current basic block instructions, in reverse order (because blocks are
+    /// built bottom-to-top).
+    bb_insns: SmallVec<[I; 32]>,
+
+    /// Current IR-inst instructions, in forward order.
+    ir_inst_insns: SmallVec<[I; 4]>,
+
+    /// Start of succs for the current block in the concatenated succs list.
+    succ_start: usize,
+}
+
+impl<I: VCodeInst> VCodeBuilder<I> {
+    /// Create a new VCodeBuilder.
+    pub fn new(abi: Box<dyn ABIBody<I>>) -> VCodeBuilder<I> {
+        let vcode = VCode::new(abi);
+        VCodeBuilder {
+            vcode,
+            bb_insns: SmallVec::new(),
+            ir_inst_insns: SmallVec::new(),
+            succ_start: 0,
+        }
+    }
+
+    /// Access the ABI object.
+    pub fn abi(&mut self) -> &mut dyn ABIBody<I> {
+        &mut *self.vcode.abi
+    }
+
+    /// Set the type of a VReg.
+    pub fn set_vreg_type(&mut self, vreg: VirtualReg, ty: Type) {
+        while self.vcode.vreg_types.len() <= vreg.get_index() {
+            self.vcode.vreg_types.push(ir::types::I8); // Default type.
+        }
+        self.vcode.vreg_types[vreg.get_index()] = ty;
+    }
+
+    /// Return the underlying bb-to-BlockIndex map.
+    pub fn blocks_by_bb(&self) -> &SecondaryMap<ir::Block, BlockIndex> {
+        &self.vcode.block_by_bb
+    }
+
+    /// Initialize the bb-to-BlockIndex map. Returns the first free
+    /// BlockIndex.
+    pub fn init_bb_map(&mut self, blocks: &[ir::Block]) -> BlockIndex {
+        let mut bindex: BlockIndex = 0;
+        for bb in blocks.iter() {
+            self.vcode.block_by_bb[*bb] = bindex;
+            self.vcode.bb_by_block.push(*bb);
+            bindex += 1;
+        }
+        bindex
+    }
+
+    /// Get the BlockIndex for an IR block.
+    pub fn bb_to_bindex(&self, bb: ir::Block) -> BlockIndex {
+        self.vcode.block_by_bb[bb]
+    }
+
+    /// Set the current block as the entry block.
+    pub fn set_entry(&mut self, block: BlockIndex) {
+        self.vcode.entry = block;
+    }
+
+    /// End the current IR instruction. Must be called after pushing any
+    /// instructions and prior to ending the basic block.
+    pub fn end_ir_inst(&mut self) {
+        while let Some(i) = self.ir_inst_insns.pop() {
+            self.bb_insns.push(i);
+        }
+    }
+
+    /// End the current basic block. Must be called after emitting vcode insts
+    /// for IR insts and prior to ending the function (building the VCode).
+    pub fn end_bb(&mut self) -> BlockIndex {
+        assert!(self.ir_inst_insns.is_empty());
+        let block_num = self.vcode.block_ranges.len() as BlockIndex;
+        // Push the instructions.
+        let start_idx = self.vcode.insts.len() as InsnIndex;
+        while let Some(i) = self.bb_insns.pop() {
+            self.vcode.insts.push(i);
+        }
+        let end_idx = self.vcode.insts.len() as InsnIndex;
+        // Add the instruction index range to the list of blocks.
+        self.vcode.block_ranges.push((start_idx, end_idx));
+        // End the successors list.
+        let succ_end = self.vcode.block_succs.len();
+        self.vcode
+            .block_succ_range
+            .push((self.succ_start, succ_end));
+        self.succ_start = succ_end;
+
+        block_num
+    }
+
+    /// Push an instruction for the current BB and current IR inst within the BB.
+    pub fn push(&mut self, insn: I) {
+        match insn.is_term() {
+            MachTerminator::None | MachTerminator::Ret => {}
+            MachTerminator::Uncond(target) => {
+                self.vcode.block_succs.push(target);
+            }
+            MachTerminator::Cond(true_branch, false_branch) => {
+                self.vcode.block_succs.push(true_branch);
+                self.vcode.block_succs.push(false_branch);
+            }
+            MachTerminator::Indirect(targets) => {
+                for target in targets {
+                    self.vcode.block_succs.push(*target);
+                }
+            }
+        }
+        self.ir_inst_insns.push(insn);
+    }
+
+    /// Build the final VCode.
+    pub fn build(self) -> VCode<I> {
+        assert!(self.ir_inst_insns.is_empty());
+        assert!(self.bb_insns.is_empty());
+        self.vcode
+    }
+}
+
+fn block_ranges(indices: &[InstIx], len: usize) -> Vec<(usize, usize)> {
+    let v = indices
+        .iter()
+        .map(|iix| iix.get() as usize)
+        .chain(iter::once(len))
+        .collect::<Vec<usize>>();
+    v.windows(2).map(|p| (p[0], p[1])).collect()
+}
+
+fn is_redundant_move<I: VCodeInst>(insn: &I) -> bool {
+    if let Some((to, from)) = insn.is_move() {
+        to.to_reg() == from
+    } else {
+        false
+    }
+}
+
+fn is_trivial_jump_block<I: VCodeInst>(vcode: &VCode<I>, block: BlockIndex) -> Option<BlockIndex> {
+    let range = vcode.block_insns(BlockIx::new(block));
+
+    debug!(
+        "is_trivial_jump_block: block {} has len {}",
+        block,
+        range.len()
+    );
+
+    if range.len() != 1 {
+        return None;
+    }
+    let insn = range.first();
+
+    debug!(
+        " -> only insn is: {:?} with terminator {:?}",
+        vcode.get_insn(insn),
+        vcode.get_insn(insn).is_term()
+    );
+
+    match vcode.get_insn(insn).is_term() {
+        MachTerminator::Uncond(target) => Some(target),
+        _ => None,
+    }
+}
+
+impl<I: VCodeInst> VCode<I> {
+    /// New empty VCode.
+    fn new(abi: Box<dyn ABIBody<I>>) -> VCode<I> {
+        VCode {
+            liveins: abi.liveins(),
+            liveouts: abi.liveouts(),
+            vreg_types: vec![],
+            insts: vec![],
+            entry: 0,
+            block_ranges: vec![],
+            block_succ_range: vec![],
+            block_succs: vec![],
+            block_by_bb: SecondaryMap::with_default(0),
+            bb_by_block: vec![],
+            final_block_order: vec![],
+            final_block_offsets: vec![],
+            code_size: 0,
+            abi,
+        }
+    }
+
+    /// Get the IR-level type of a VReg.
+    pub fn vreg_type(&self, vreg: VirtualReg) -> Type {
+        self.vreg_types[vreg.get_index()]
+    }
+
+    /// Get the entry block.
+    pub fn entry(&self) -> BlockIndex {
+        self.entry
+    }
+
+    /// Get the number of blocks. Block indices will be in the range `0 ..
+    /// (self.num_blocks() - 1)`.
+    pub fn num_blocks(&self) -> usize {
+        self.block_ranges.len()
+    }
+
+    /// Stack frame size for the full function's body.
+    pub fn frame_size(&self) -> u32 {
+        self.abi.frame_size()
+    }
+
+    /// Get the successors for a block.
+    pub fn succs(&self, block: BlockIndex) -> &[BlockIndex] {
+        let (start, end) = self.block_succ_range[block as usize];
+        &self.block_succs[start..end]
+    }
+
+    /// Take the results of register allocation, with a sequence of
+    /// instructions including spliced fill/reload/move instructions, and replace
+    /// the VCode with them.
+    pub fn replace_insns_from_regalloc(
+        &mut self,
+        result: RegAllocResult<Self>,
+        flags: &settings::Flags,
+    ) {
+        self.final_block_order = compute_final_block_order(self);
+
+        // Record the spillslot count and clobbered registers for the ABI/stack
+        // setup code.
+        self.abi.set_num_spillslots(result.num_spill_slots as usize);
+        self.abi
+            .set_clobbered(result.clobbered_registers.map(|r| Writable::from_reg(*r)));
+
+        // We want to move instructions over in final block order, using the new
+        // block-start map given by the regalloc.
+        let block_ranges: Vec<(usize, usize)> =
+            block_ranges(result.target_map.elems(), result.insns.len());
+        let mut final_insns = vec![];
+        let mut final_block_ranges = vec![(0, 0); self.num_blocks()];
+
+        for block in &self.final_block_order {
+            let (start, end) = block_ranges[*block as usize];
+            let final_start = final_insns.len() as InsnIndex;
+
+            if *block == self.entry {
+                // Start with the prologue.
+                final_insns.extend(self.abi.gen_prologue(flags).into_iter());
+            }
+
+            for i in start..end {
+                let insn = &result.insns[i];
+
+                // Elide redundant moves at this point (we only know what is
+                // redundant once registers are allocated).
+                if is_redundant_move(insn) {
+                    continue;
+                }
+
+                // Whenever encountering a return instruction, replace it
+                // with the epilogue.
+                let is_ret = insn.is_term() == MachTerminator::Ret;
+                if is_ret {
+                    final_insns.extend(self.abi.gen_epilogue(flags).into_iter());
+                } else {
+                    final_insns.push(insn.clone());
+                }
+            }
+
+            let final_end = final_insns.len() as InsnIndex;
+            final_block_ranges[*block as usize] = (final_start, final_end);
+        }
+
+        self.insts = final_insns;
+        self.block_ranges = final_block_ranges;
+    }
+
+    /// Removes redundant branches, rewriting targets to point directly to the
+    /// ultimate block at the end of a chain of trivial one-target jumps.
+    pub fn remove_redundant_branches(&mut self) {
+        // For each block, compute the actual target block, looking through up to one
+        // block with single-target jumps (this will remove empty edge blocks inserted
+        // by phi-lowering).
+        let block_rewrites: Vec<BlockIndex> = (0..self.num_blocks() as u32)
+            .map(|bix| is_trivial_jump_block(self, bix).unwrap_or(bix))
+            .collect();
+        let mut refcounts: Vec<usize> = vec![0; self.num_blocks()];
+
+        debug!(
+            "remove_redundant_branches: block_rewrites = {:?}",
+            block_rewrites
+        );
+
+        refcounts[self.entry as usize] = 1;
+
+        for block in 0..self.num_blocks() as u32 {
+            for insn in self.block_insns(BlockIx::new(block)) {
+                self.get_insn_mut(insn)
+                    .with_block_rewrites(&block_rewrites[..]);
+                match self.get_insn(insn).is_term() {
+                    MachTerminator::Uncond(bix) => {
+                        refcounts[bix as usize] += 1;
+                    }
+                    MachTerminator::Cond(bix1, bix2) => {
+                        refcounts[bix1 as usize] += 1;
+                        refcounts[bix2 as usize] += 1;
+                    }
+                    MachTerminator::Indirect(blocks) => {
+                        for block in blocks {
+                            refcounts[*block as usize] += 1;
+                        }
+                    }
+                    _ => {}
+                }
+            }
+        }
+
+        let deleted: Vec<bool> = refcounts.iter().map(|r| *r == 0).collect();
+
+        let block_order = std::mem::replace(&mut self.final_block_order, vec![]);
+        self.final_block_order = block_order
+            .into_iter()
+            .filter(|b| !deleted[*b as usize])
+            .collect();
+
+        // Rewrite successor information based on the block-rewrite map.
+        for succ in &mut self.block_succs {
+            let new_succ = block_rewrites[*succ as usize];
+            *succ = new_succ;
+        }
+    }
+
+    /// Mutate branch instructions to (i) lower two-way condbrs to one-way,
+    /// depending on fallthrough; and (ii) use concrete offsets.
+    pub fn finalize_branches(&mut self)
+    where
+        I: MachInstEmit<MachSectionSize>,
+    {
+        // Compute fallthrough block, indexed by block.
+        let num_final_blocks = self.final_block_order.len();
+        let mut block_fallthrough: Vec<Option<BlockIndex>> = vec![None; self.num_blocks()];
+        for i in 0..(num_final_blocks - 1) {
+            let from = self.final_block_order[i];
+            let to = self.final_block_order[i + 1];
+            block_fallthrough[from as usize] = Some(to);
+        }
+
+        // Pass over VCode instructions and finalize two-way branches into
+        // one-way branches with fallthrough.
+        for block in 0..self.num_blocks() {
+            let next_block = block_fallthrough[block];
+            let (start, end) = self.block_ranges[block];
+
+            for iix in start..end {
+                let insn = &mut self.insts[iix as usize];
+                insn.with_fallthrough_block(next_block);
+            }
+        }
+
+        // Compute block offsets.
+        let mut code_section = MachSectionSize::new(0);
+        let mut block_offsets = vec![0; self.num_blocks()];
+        for block in &self.final_block_order {
+            code_section.offset = I::align_basic_block(code_section.offset);
+            block_offsets[*block as usize] = code_section.offset;
+            let (start, end) = self.block_ranges[*block as usize];
+            for iix in start..end {
+                self.insts[iix as usize].emit(&mut code_section);
+            }
+        }
+
+        // We now have the section layout.
+        self.final_block_offsets = block_offsets;
+        self.code_size = code_section.size();
+
+        // Update branches with known block offsets. This looks like the
+        // traversal above, but (i) does not update block_offsets, rather uses
+        // it (so forward references are now possible), and (ii) mutates the
+        // instructions.
+        let mut code_section = MachSectionSize::new(0);
+        for block in &self.final_block_order {
+            code_section.offset = I::align_basic_block(code_section.offset);
+            let (start, end) = self.block_ranges[*block as usize];
+            for iix in start..end {
+                self.insts[iix as usize]
+                    .with_block_offsets(code_section.offset, &self.final_block_offsets[..]);
+                self.insts[iix as usize].emit(&mut code_section);
+            }
+        }
+    }
+
+    /// Emit the instructions to a list of sections.
+    pub fn emit(&self) -> MachSections
+    where
+        I: MachInstEmit<MachSection>,
+    {
+        let mut sections = MachSections::new();
+        let code_idx = sections.add_section(0, self.code_size);
+        let code_section = sections.get_section(code_idx);
+
+        for block in &self.final_block_order {
+            let new_offset = I::align_basic_block(code_section.cur_offset_from_start());
+            while new_offset > code_section.cur_offset_from_start() {
+                // Pad with NOPs up to the aligned block offset.
+                let nop = I::gen_nop((new_offset - code_section.cur_offset_from_start()) as usize);
+                nop.emit(code_section);
+            }
+            assert_eq!(code_section.cur_offset_from_start(), new_offset);
+
+            let (start, end) = self.block_ranges[*block as usize];
+            for iix in start..end {
+                self.insts[iix as usize].emit(code_section);
+            }
+        }
+
+        sections
+    }
+
+    /// Get the IR block for a BlockIndex, if one exists.
+    pub fn bindex_to_bb(&self, block: BlockIndex) -> Option<ir::Block> {
+        if (block as usize) < self.bb_by_block.len() {
+            Some(self.bb_by_block[block as usize])
+        } else {
+            None
+        }
+    }
+}
+
+impl<I: VCodeInst> RegallocFunction for VCode<I> {
+    type Inst = I;
+
+    fn insns(&self) -> &[I] {
+        &self.insts[..]
+    }
+
+    fn insns_mut(&mut self) -> &mut [I] {
+        &mut self.insts[..]
+    }
+
+    fn get_insn(&self, insn: InstIx) -> &I {
+        &self.insts[insn.get() as usize]
+    }
+
+    fn get_insn_mut(&mut self, insn: InstIx) -> &mut I {
+        &mut self.insts[insn.get() as usize]
+    }
+
+    fn blocks(&self) -> Range<BlockIx> {
+        Range::new(BlockIx::new(0), self.block_ranges.len())
+    }
+
+    fn entry_block(&self) -> BlockIx {
+        BlockIx::new(self.entry)
+    }
+
+    fn block_insns(&self, block: BlockIx) -> Range<InstIx> {
+        let (start, end) = self.block_ranges[block.get() as usize];
+        Range::new(InstIx::new(start), (end - start) as usize)
+    }
+
+    fn block_succs(&self, block: BlockIx) -> Vec<BlockIx> {
+        let (start, end) = self.block_succ_range[block.get() as usize];
+        self.block_succs[start..end]
+            .iter()
+            .cloned()
+            .map(BlockIx::new)
+            .collect()
+    }
+
+    fn is_ret(&self, insn: InstIx) -> bool {
+        match self.insts[insn.get() as usize].is_term() {
+            MachTerminator::Ret => true,
+            _ => false,
+        }
+    }
+
+    fn get_regs(insn: &I, collector: &mut RegUsageCollector) {
+        insn.get_regs(collector)
+    }
+
+    fn map_regs(
+        insn: &mut I,
+        pre_map: &RegallocMap<VirtualReg, RealReg>,
+        post_map: &RegallocMap<VirtualReg, RealReg>,
+    ) {
+        insn.map_regs(pre_map, post_map);
+    }
+
+    fn is_move(&self, insn: &I) -> Option<(Writable<Reg>, Reg)> {
+        insn.is_move()
+    }
+
+    fn get_spillslot_size(&self, regclass: RegClass, vreg: VirtualReg) -> u32 {
+        let ty = self.vreg_type(vreg);
+        self.abi.get_spillslot_size(regclass, ty)
+    }
+
+    fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, vreg: VirtualReg) -> I {
+        let ty = self.vreg_type(vreg);
+        self.abi.gen_spill(to_slot, from_reg, ty)
+    }
+
+    fn gen_reload(&self, to_reg: Writable<RealReg>, from_slot: SpillSlot, vreg: VirtualReg) -> I {
+        let ty = self.vreg_type(vreg);
+        self.abi.gen_reload(to_reg, from_slot, ty)
+    }
+
+    fn gen_move(&self, to_reg: Writable<RealReg>, from_reg: RealReg, vreg: VirtualReg) -> I {
+        let ty = self.vreg_type(vreg);
+        I::gen_move(to_reg.map(|r| r.to_reg()), from_reg.to_reg(), ty)
+    }
+
+    fn gen_zero_len_nop(&self) -> I {
+        I::gen_zero_len_nop()
+    }
+
+    fn maybe_direct_reload(&self, insn: &I, reg: VirtualReg, slot: SpillSlot) -> Option<I> {
+        insn.maybe_direct_reload(reg, slot)
+    }
+
+    fn func_liveins(&self) -> RegallocSet<RealReg> {
+        self.liveins.clone()
+    }
+
+    fn func_liveouts(&self) -> RegallocSet<RealReg> {
+        self.liveouts.clone()
+    }
+}
+
+// N.B.: Debug impl assumes that VCode has already been through all compilation
+// passes, and so has a final block order and offsets.
+
+impl<I: VCodeInst> fmt::Debug for VCode<I> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        writeln!(f, "VCode_Debug {{")?;
+        writeln!(f, "  Entry block: {}", self.entry)?;
+        writeln!(f, "  Final block order: {:?}", self.final_block_order)?;
+
+        for block in 0..self.num_blocks() {
+            writeln!(f, "Block {}:", block,)?;
+            for succ in self.succs(block as BlockIndex) {
+                writeln!(f, "  (successor: Block {})", succ)?;
+            }
+            let (start, end) = self.block_ranges[block];
+            writeln!(f, "  (instruction range: {} .. {})", start, end)?;
+            for inst in start..end {
+                writeln!(f, "  Inst {}: {:?}", inst, self.insts[inst as usize])?;
+            }
+        }
+
+        writeln!(f, "}}")?;
+        Ok(())
+    }
+}
+
+// Pretty-printing with `RealRegUniverse` context.
+impl<I: VCodeInst + ShowWithRRU> ShowWithRRU for VCode<I> {
+    fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
+        use crate::alloc::string::ToString;
+        use std::fmt::Write;
+
+        // Calculate an order in which to display the blocks.  This is the same
+        // as final_block_order, but also includes blocks which are in the
+        // representation but not in final_block_order.
+        let mut display_order = Vec::<usize>::new();
+        // First display blocks in |final_block_order|
+        for bix in &self.final_block_order {
+            assert!((*bix as usize) < self.num_blocks());
+            display_order.push(*bix as usize);
+        }
+        // Now also take care of those not listed in |final_block_order|.
+        // This is quadratic, but it's also debug-only code.
+        for bix in 0..self.num_blocks() {
+            if display_order.contains(&bix) {
+                continue;
+            }
+            display_order.push(bix);
+        }
+
+        let mut s = String::new();
+        s = s + &format!("VCode_ShowWithRRU {{{{");
+        s = s + &"\n".to_string();
+        s = s + &format!("  Entry block: {}", self.entry);
+        s = s + &"\n".to_string();
+        s = s + &format!("  Final block order: {:?}", self.final_block_order);
+        s = s + &"\n".to_string();
+
+        for i in 0..self.num_blocks() {
+            let block = display_order[i];
+
+            let omitted =
+                (if !self.final_block_order.is_empty() && i >= self.final_block_order.len() {
+                    "** OMITTED **"
+                } else {
+                    ""
+                })
+                .to_string();
+
+            s = s + &format!("Block {}: {}", block, omitted);
+            s = s + &"\n".to_string();
+            if let Some(bb) = self.bindex_to_bb(block as BlockIndex) {
+                s = s + &format!("  (original IR block: {})\n", bb);
+            }
+            for succ in self.succs(block as BlockIndex) {
+                s = s + &format!("  (successor: Block {})", succ);
+                s = s + &"\n".to_string();
+            }
+            let (start, end) = self.block_ranges[block];
+            s = s + &format!("  (instruction range: {} .. {})", start, end);
+            s = s + &"\n".to_string();
+            for inst in start..end {
+                s = s + &format!(
+                    "  Inst {}:   {}",
+                    inst,
+                    self.insts[inst as usize].show_rru(mb_rru)
+                );
+                s = s + &"\n".to_string();
+            }
+        }
+
+        s = s + &format!("}}}}");
+        s = s + &"\n".to_string();
+
+        s
+    }
+}
diff --git a/cranelift/codegen/src/num_uses.rs b/cranelift/codegen/src/num_uses.rs
new file mode 100644
index 0000000000..c08741020c
--- /dev/null
+++ b/cranelift/codegen/src/num_uses.rs
@@ -0,0 +1,68 @@
+//! A pass that computes the number of uses of any given instruction.
+
+#![allow(dead_code)]
+#![allow(unused_imports)]
+
+use crate::cursor::{Cursor, FuncCursor};
+use crate::dce::has_side_effect;
+use crate::entity::SecondaryMap;
+use crate::ir::dfg::ValueDef;
+use crate::ir::instructions::InstructionData;
+use crate::ir::Value;
+use crate::ir::{DataFlowGraph, Function, Inst, Opcode};
+
+/// Auxiliary data structure that counts the number of uses of any given
+/// instruction in a Function. This is used during instruction selection
+/// to essentially do incremental DCE: when an instruction is no longer
+/// needed because its computation has been isel'd into another machine
+/// instruction at every use site, we can skip it.
+#[derive(Clone, Debug)]
+pub struct NumUses {
+    uses: SecondaryMap<Inst, u32>,
+}
+
+impl NumUses {
+    fn new() -> NumUses {
+        NumUses {
+            uses: SecondaryMap::with_default(0),
+        }
+    }
+
+    /// Compute the NumUses analysis result for a function.
+    pub fn compute(func: &Function) -> NumUses {
+        let mut uses = NumUses::new();
+        for bb in func.layout.blocks() {
+            for inst in func.layout.block_insts(bb) {
+                for arg in func.dfg.inst_args(inst) {
+                    let v = func.dfg.resolve_aliases(*arg);
+                    uses.add_value(&func.dfg, v);
+                }
+            }
+        }
+        uses
+    }
+
+    fn add_value(&mut self, dfg: &DataFlowGraph, v: Value) {
+        match dfg.value_def(v) {
+            ValueDef::Result(inst, _) => {
+                self.uses[inst] += 1;
+            }
+            _ => {}
+        }
+    }
+
+    /// How many times is an instruction used?
+    pub fn use_count(&self, i: Inst) -> usize {
+        self.uses[i] as usize
+    }
+
+    /// Is an instruction used at all?
+    pub fn is_used(&self, i: Inst) -> bool {
+        self.use_count(i) > 0
+    }
+
+    /// Take the complete uses map, consuming this analysis result.
+    pub fn take_uses(self) -> SecondaryMap<Inst, u32> {
+        self.uses
+    }
+}