Switch Cranelift over to regalloc2. (#3989)

This PR switches Cranelift over to the new register allocator, regalloc2. See [this document](https://gist.github.com/cfallin/08553421a91f150254fe878f67301801) for a summary of the design changes. This switchover has implications for core VCode/MachInst types and the lowering pass. Overall, this change brings improvements to both compile time and speed of generated code (runtime), as reported in #3942: ``` Benchmark Compilation (wallclock) Execution (wallclock) blake3-scalar 25% faster 28% faster blake3-simd no diff no diff meshoptimizer 19% faster 17% faster pulldown-cmark 17% faster no diff bz2 15% faster no diff SpiderMonkey, 21% faster 2% faster fib(30) clang.wasm 42% faster N/A ```
2022-04-14 10:28:21 -07:00
parent bfae6384aa
commit a0318f36f0
181 changed files with 16887 additions and 21587 deletions
--- a/cranelift/codegen/src/machinst/abi.rs
+++ b/cranelift/codegen/src/machinst/abi.rs
@@ -5,7 +5,6 @@ use crate::ir::{Signature, StackSlot};
 use crate::isa::CallConv;
 use crate::machinst::*;
 use crate::settings;
-use regalloc::{Reg, Set, SpillSlot, Writable};
 use smallvec::SmallVec;

 /// A small vector of instructions (with some reasonable size); appropriate for
@@ -42,12 +41,6 @@ pub trait ABICallee {
    /// Get the calling convention implemented by this ABI object.
    fn call_conv(&self) -> CallConv;

-    /// Get the liveins of the function.
-    fn liveins(&self) -> Set<RealReg>;
-
-    /// Get the liveouts of the function.
-    fn liveouts(&self) -> Set<RealReg>;
-
    /// Number of arguments.
    fn num_args(&self) -> usize;

@@ -106,7 +99,7 @@ pub trait ABICallee {
    fn set_num_spillslots(&mut self, slots: usize);

    /// Update with the clobbered registers, post-regalloc.
-    fn set_clobbered(&mut self, clobbered: Set<Writable<RealReg>>);
+    fn set_clobbered(&mut self, clobbered: Vec<Writable<RealReg>>);

    /// Get the address of a stackslot.
    fn stackslot_addr(&self, slot: StackSlot, offset: u32, into_reg: Writable<Reg>) -> Self::I;
--- a/cranelift/codegen/src/machinst/abi_impl.rs
+++ b/cranelift/codegen/src/machinst/abi_impl.rs
@@ -125,6 +125,7 @@

 use super::abi::*;
 use crate::binemit::StackMap;
+use crate::fx::FxHashSet;
 use crate::ir::types::*;
 use crate::ir::{ArgumentExtension, ArgumentPurpose, StackSlot};
 use crate::machinst::*;
@@ -132,7 +133,6 @@ use crate::settings;
 use crate::CodegenResult;
 use crate::{ir, isa};
 use alloc::vec::Vec;
-use regalloc::{RealReg, Reg, RegClass, Set, SpillSlot, Writable};
 use smallvec::{smallvec, SmallVec};
 use std::convert::TryFrom;
 use std::marker::PhantomData;
@@ -257,16 +257,6 @@ pub enum ArgsOrRets {
    Rets,
 }

-/// Is an instruction returned by an ABI machine-specific backend a safepoint,
-/// or not?
-#[derive(Clone, Copy, Debug, PartialEq, Eq)]
-pub enum InstIsSafepoint {
-    /// The instruction is a safepoint.
-    Yes,
-    /// The instruction is not a safepoint.
-    No,
-}
-
 /// Abstract location for a machine-specific ABI impl to translate into the
 /// appropriate addressing mode.
 #[derive(Clone, Copy, Debug)]
@@ -319,11 +309,7 @@ pub trait ABIMachineSpec {

    /// Returns word register class.
    fn word_reg_class() -> RegClass {
-        match Self::word_bits() {
-            32 => RegClass::I32,
-            64 => RegClass::I64,
-            _ => unreachable!(),
-        }
+        RegClass::Int
    }

    /// Returns required stack alignment in bytes.
@@ -366,7 +352,7 @@ pub trait ABIMachineSpec {
    ) -> Self::I;

    /// Generate a return instruction.
-    fn gen_ret() -> Self::I;
+    fn gen_ret(rets: Vec<Reg>) -> Self::I;

    /// Generate an "epilogue placeholder" instruction, recognized by lowering
    /// when using the Baldrdash ABI.
@@ -442,7 +428,7 @@ pub trait ABIMachineSpec {
    /// contains the registers in a sorted order.
    fn get_clobbered_callee_saves(
        call_conv: isa::CallConv,
-        regs: &Set<Writable<RealReg>>,
+        regs: &[Writable<RealReg>],
    ) -> Vec<Writable<RealReg>>;

    /// Determine whether it is necessary to generate the usual frame-setup
@@ -466,7 +452,7 @@ pub trait ABIMachineSpec {
        call_conv: isa::CallConv,
        setup_frame: bool,
        flags: &settings::Flags,
-        clobbered_callee_saves: &Vec<Writable<RealReg>>,
+        clobbered_callee_saves: &[Writable<RealReg>],
        fixed_frame_storage_size: u32,
        outgoing_args_size: u32,
    ) -> (u64, SmallVec<[Self::I; 16]>);
@@ -478,7 +464,7 @@ pub trait ABIMachineSpec {
    fn gen_clobber_restore(
        call_conv: isa::CallConv,
        flags: &settings::Flags,
-        clobbers: &Set<Writable<RealReg>>,
+        clobbers: &[Writable<RealReg>],
        fixed_frame_storage_size: u32,
        outgoing_args_size: u32,
    ) -> SmallVec<[Self::I; 16]>;
@@ -493,7 +479,7 @@ pub trait ABIMachineSpec {
        tmp: Writable<Reg>,
        callee_conv: isa::CallConv,
        callee_conv: isa::CallConv,
-    ) -> SmallVec<[(InstIsSafepoint, Self::I); 2]>;
+    ) -> SmallVec<[Self::I; 2]>;

    /// Generate a memcpy invocation. Used to set up struct args. May clobber
    /// caller-save registers; we only memcpy before we start to set up args for
@@ -530,6 +516,7 @@ pub trait ABIMachineSpec {
 }

 /// ABI information shared between body (callee) and caller.
+#[derive(Clone)]
 struct ABISig {
    /// Argument locations (regs or stack slots). Stack offsets are relative to
    /// SP on entry to function.
@@ -604,7 +591,7 @@ pub struct ABICalleeImpl<M: ABIMachineSpec> {
    /// Stack size to be reserved for outgoing arguments.
    outgoing_args_size: u32,
    /// Clobbered registers, from regalloc.
-    clobbered: Set<Writable<RealReg>>,
+    clobbered: Vec<Writable<RealReg>>,
    /// Total number of spillslots, from regalloc.
    spillslots: Option<usize>,
    /// Storage allocated for the fixed part of the stack frame.  This is
@@ -655,24 +642,13 @@ fn get_special_purpose_param_register(
    let idx = f.signature.special_param_index(purpose)?;
    match &abi.args[idx] {
        &ABIArg::Slots { ref slots, .. } => match &slots[0] {
-            &ABIArgSlot::Reg { reg, .. } => Some(reg.to_reg()),
+            &ABIArgSlot::Reg { reg, .. } => Some(reg.into()),
            _ => None,
        },
        _ => None,
    }
 }

-fn ty_from_class(class: RegClass) -> Type {
-    match class {
-        RegClass::I32 => I32,
-        RegClass::I64 => I64,
-        RegClass::F32 => F32,
-        RegClass::F64 => F64,
-        RegClass::V128 => I8X16,
-        _ => panic!("Unknown regclass: {:?}", class),
-    }
-}
-
 impl<M: ABIMachineSpec> ABICalleeImpl<M> {
    /// Create a new body ABI instance.
    pub fn new(
@@ -739,7 +715,7 @@ impl<M: ABIMachineSpec> ABICalleeImpl<M> {
            stackslots,
            stackslots_size: stack_offset,
            outgoing_args_size: 0,
-            clobbered: Set::empty(),
+            clobbered: vec![],
            spillslots: None,
            fixed_frame_storage_size: 0,
            total_frame_size: None,
@@ -961,34 +937,6 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
        self.sig.call_conv
    }

-    fn liveins(&self) -> Set<RealReg> {
-        let mut set: Set<RealReg> = Set::empty();
-        for arg in &self.sig.args {
-            if let &ABIArg::Slots { ref slots, .. } = arg {
-                for slot in slots {
-                    if let ABIArgSlot::Reg { reg, .. } = slot {
-                        set.insert(*reg);
-                    }
-                }
-            }
-        }
-        set
-    }
-
-    fn liveouts(&self) -> Set<RealReg> {
-        let mut set: Set<RealReg> = Set::empty();
-        for ret in &self.sig.rets {
-            if let &ABIArg::Slots { ref slots, .. } = ret {
-                for slot in slots {
-                    if let ABIArgSlot::Reg { reg, .. } = slot {
-                        set.insert(*reg);
-                    }
-                }
-            }
-        }
-        set
-    }
-
    fn num_args(&self) -> usize {
        self.sig.args.len()
    }
@@ -1019,7 +967,7 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
                        // Extension mode doesn't matter (we're copying out, not in; we
                        // ignore high bits by convention).
                        &ABIArgSlot::Reg { reg, ty, .. } => {
-                            insts.push(M::gen_move(*into_reg, reg.to_reg(), ty));
+                            insts.push(M::gen_move(*into_reg, reg.into(), ty));
                        }
                        &ABIArgSlot::Stack { offset, ty, .. } => {
                            insts.push(M::gen_load_stack(
@@ -1069,20 +1017,21 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
        match &self.sig.rets[idx] {
            &ABIArg::Slots { ref slots, .. } => {
                assert_eq!(from_regs.len(), slots.len());
-                for (slot, from_reg) in slots.iter().zip(from_regs.regs().iter()) {
+                for (slot, &from_reg) in slots.iter().zip(from_regs.regs().iter()) {
                    match slot {
                        &ABIArgSlot::Reg {
                            reg, ty, extension, ..
                        } => {
                            let from_bits = ty_bits(ty) as u8;
                            let ext = M::get_ext_mode(self.sig.call_conv, extension);
+                            let reg: Writable<Reg> = Writable::from_reg(Reg::from(reg));
                            match (ext, from_bits) {
                                (ArgumentExtension::Uext, n) | (ArgumentExtension::Sext, n)
                                    if n < word_bits =>
                                {
                                    let signed = ext == ArgumentExtension::Sext;
                                    ret.push(M::gen_extend(
-                                        Writable::from_reg(reg.to_reg()),
+                                        reg,
                                        from_reg.to_reg(),
                                        signed,
                                        from_bits,
@@ -1090,11 +1039,7 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
                                    ));
                                }
                                _ => {
-                                    ret.push(M::gen_move(
-                                        Writable::from_reg(reg.to_reg()),
-                                        from_reg.to_reg(),
-                                        ty,
-                                    ));
+                                    ret.push(M::gen_move(reg, from_reg.to_reg(), ty));
                                }
                            };
                        }
@@ -1118,7 +1063,7 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
                                (ArgumentExtension::Uext, n) | (ArgumentExtension::Sext, n)
                                    if n < word_bits =>
                                {
-                                    assert_eq!(M::word_reg_class(), from_reg.to_reg().get_class());
+                                    assert_eq!(M::word_reg_class(), from_reg.to_reg().class());
                                    let signed = ext == ArgumentExtension::Sext;
                                    ret.push(M::gen_extend(
                                        Writable::from_reg(from_reg.to_reg()),
@@ -1166,7 +1111,22 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
    }

    fn gen_ret(&self) -> Self::I {
-        M::gen_ret()
+        let mut rets = vec![];
+        for ret in &self.sig.rets {
+            match ret {
+                ABIArg::Slots { slots, .. } => {
+                    for slot in slots {
+                        match slot {
+                            ABIArgSlot::Reg { reg, .. } => rets.push(Reg::from(*reg)),
+                            _ => {}
+                        }
+                    }
+                }
+                _ => {}
+            }
+        }
+
+        M::gen_ret(rets)
    }

    fn gen_epilogue_placeholder(&self) -> Self::I {
@@ -1177,7 +1137,7 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
        self.spillslots = Some(slots);
    }

-    fn set_clobbered(&mut self, clobbered: Set<Writable<RealReg>>) {
+    fn set_clobbered(&mut self, clobbered: Vec<Writable<RealReg>>) {
        self.clobbered = clobbered;
    }

@@ -1198,7 +1158,7 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
        into_regs: ValueRegs<Writable<Reg>>,
    ) -> SmallInstVec<Self::I> {
        // Offset from beginning of spillslot area, which is at nominal SP + stackslots_size.
-        let islot = slot.get() as i64;
+        let islot = slot.index() as i64;
        let spill_off = islot * M::word_bytes() as i64;
        let sp_off = self.stackslots_size as i64 + spill_off;
        log::trace!("load_spillslot: slot {:?} -> sp_off {}", slot, sp_off);
@@ -1214,7 +1174,7 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
        from_regs: ValueRegs<Reg>,
    ) -> SmallInstVec<Self::I> {
        // Offset from beginning of spillslot area, which is at nominal SP + stackslots_size.
-        let islot = slot.get() as i64;
+        let islot = slot.index() as i64;
        let spill_off = islot * M::word_bytes() as i64;
        let sp_off = self.stackslots_size as i64 + spill_off;
        log::trace!("store_spillslot: slot {:?} -> sp_off {}", slot, sp_off);
@@ -1245,7 +1205,7 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
        let first_spillslot_word =
            ((self.stackslots_size + virtual_sp_offset as u32) / bytes) as usize;
        for &slot in slots {
-            let slot = slot.get() as usize;
+            let slot = slot.index();
            bits[first_spillslot_word + slot] = true;
        }

@@ -1347,7 +1307,10 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
                insts.extend(M::gen_epilogue_frame_restore(&self.flags));
            }

-            insts.push(M::gen_ret());
+            // This `ret` doesn't need any return registers attached
+            // because we are post-regalloc and don't need to
+            // represent the implicit uses anymore.
+            insts.push(M::gen_ret(vec![]));
        }

        log::trace!("Epilogue: {:?}", insts);
@@ -1368,19 +1331,19 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
    }

    fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg) -> Self::I {
-        let ty = ty_from_class(from_reg.to_reg().get_class());
-        self.store_spillslot(to_slot, ty, ValueRegs::one(from_reg.to_reg()))
+        let ty = Self::I::canonical_type_for_rc(Reg::from(from_reg).class());
+        self.store_spillslot(to_slot, ty, ValueRegs::one(Reg::from(from_reg)))
            .into_iter()
            .next()
            .unwrap()
    }

    fn gen_reload(&self, to_reg: Writable<RealReg>, from_slot: SpillSlot) -> Self::I {
-        let ty = ty_from_class(to_reg.to_reg().get_class());
+        let ty = Self::I::canonical_type_for_rc(to_reg.to_reg().class());
        self.load_spillslot(
            from_slot,
            ty,
-            writable_value_regs(ValueRegs::one(to_reg.to_reg().to_reg())),
+            writable_value_regs(ValueRegs::one(Reg::from(to_reg.to_reg()))),
        )
        .into_iter()
        .next()
@@ -1390,13 +1353,13 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {

 fn abisig_to_uses_and_defs<M: ABIMachineSpec>(sig: &ABISig) -> (Vec<Reg>, Vec<Writable<Reg>>) {
    // Compute uses: all arg regs.
-    let mut uses = Vec::new();
+    let mut uses = FxHashSet::default();
    for arg in &sig.args {
        if let &ABIArg::Slots { ref slots, .. } = arg {
            for slot in slots {
                match slot {
                    &ABIArgSlot::Reg { reg, .. } => {
-                        uses.push(reg.to_reg());
+                        uses.insert(Reg::from(reg));
                    }
                    _ => {}
                }
@@ -1405,13 +1368,15 @@ fn abisig_to_uses_and_defs<M: ABIMachineSpec>(sig: &ABISig) -> (Vec<Reg>, Vec<Wr
    }

    // Compute defs: all retval regs, and all caller-save (clobbered) regs.
-    let mut defs = M::get_regs_clobbered_by_call(sig.call_conv);
+    let mut defs: FxHashSet<_> = M::get_regs_clobbered_by_call(sig.call_conv)
+        .into_iter()
+        .collect();
    for ret in &sig.rets {
        if let &ABIArg::Slots { ref slots, .. } = ret {
            for slot in slots {
                match slot {
                    &ABIArgSlot::Reg { reg, .. } => {
-                        defs.push(Writable::from_reg(reg.to_reg()));
+                        defs.insert(Writable::from_reg(Reg::from(reg)));
                    }
                    _ => {}
                }
@@ -1419,6 +1384,11 @@ fn abisig_to_uses_and_defs<M: ABIMachineSpec>(sig: &ABISig) -> (Vec<Reg>, Vec<Wr
        }
    }

+    let mut uses = uses.into_iter().collect::<Vec<_>>();
+    let mut defs = defs.into_iter().collect::<Vec<_>>();
+    uses.sort_unstable();
+    defs.sort_unstable();
+
    (uses, defs)
 }

@@ -1567,14 +1537,14 @@ impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
                        } => {
                            let ext = M::get_ext_mode(self.sig.call_conv, extension);
                            if ext != ir::ArgumentExtension::None && ty_bits(ty) < word_bits {
-                                assert_eq!(word_rc, reg.get_class());
+                                assert_eq!(word_rc, reg.class());
                                let signed = match ext {
                                    ir::ArgumentExtension::Uext => false,
                                    ir::ArgumentExtension::Sext => true,
                                    _ => unreachable!(),
                                };
                                ctx.emit(M::gen_extend(
-                                    Writable::from_reg(reg.to_reg()),
+                                    Writable::from_reg(Reg::from(reg)),
                                    *from_reg,
                                    signed,
                                    ty_bits(ty) as u8,
@@ -1582,7 +1552,7 @@ impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
                                ));
                            } else {
                                ctx.emit(M::gen_move(
-                                    Writable::from_reg(reg.to_reg()),
+                                    Writable::from_reg(Reg::from(reg)),
                                    *from_reg,
                                    ty,
                                ));
@@ -1597,7 +1567,7 @@ impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
                            let mut ty = ty;
                            let ext = M::get_ext_mode(self.sig.call_conv, extension);
                            if ext != ir::ArgumentExtension::None && ty_bits(ty) < word_bits {
-                                assert_eq!(word_rc, from_reg.get_class());
+                                assert_eq!(word_rc, from_reg.class());
                                let signed = match ext {
                                    ir::ArgumentExtension::Uext => false,
                                    ir::ArgumentExtension::Sext => true,
@@ -1680,7 +1650,7 @@ impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
                        // Extension mode doesn't matter because we're copying out, not in,
                        // and we ignore high bits in our own registers by convention.
                        &ABIArgSlot::Reg { reg, ty, .. } => {
-                            ctx.emit(M::gen_move(*into_reg, reg.to_reg(), ty));
+                            ctx.emit(M::gen_move(*into_reg, Reg::from(reg), ty));
                        }
                        &ABIArgSlot::Stack { offset, ty, .. } => {
                            let ret_area_base = self.sig.stack_arg_space;
@@ -1716,7 +1686,7 @@ impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
            self.emit_copy_regs_to_arg(ctx, i, ValueRegs::one(rd.to_reg()));
        }
        let tmp = ctx.alloc_tmp(word_type).only_reg().unwrap();
-        for (is_safepoint, inst) in M::gen_call(
+        for inst in M::gen_call(
            &self.dest,
            uses,
            defs,
@@ -1727,10 +1697,7 @@ impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
        )
        .into_iter()
        {
-            match is_safepoint {
-                InstIsSafepoint::Yes => ctx.emit_safepoint(inst),
-                InstIsSafepoint::No => ctx.emit(inst),
-            }
+            ctx.emit(inst);
        }
    }
 }
--- a/cranelift/codegen/src/machinst/blockorder.rs
+++ b/cranelift/codegen/src/machinst/blockorder.rs
@@ -127,6 +127,9 @@ pub enum LoweredBlock {
        /// to the next, i.e., corresponding to the included edge-block. This
        /// will be an instruction in `block`.
        edge_inst: Inst,
+        /// The successor index in this edge, to distinguish multiple
+        /// edges between the same block pair.
+        succ_idx: usize,
        /// The successor CLIF block.
        succ: Block,
    },
@@ -138,6 +141,9 @@ pub enum LoweredBlock {
        /// The edge (jump) instruction corresponding to the included
        /// edge-block. This will be an instruction in `pred`.
        edge_inst: Inst,
+        /// The successor index in this edge, to distinguish multiple
+        /// edges between the same block pair.
+        succ_idx: usize,
        /// The original CLIF block included in this lowered block.
        block: Block,
    },
@@ -150,6 +156,9 @@ pub enum LoweredBlock {
        /// The edge (jump) instruction corresponding to this edge's transition.
        /// This will be an instruction in `pred`.
        edge_inst: Inst,
+        /// The successor index in this edge, to distinguish multiple
+        /// edges between the same block pair.
+        succ_idx: usize,
        /// The successor CLIF block.
        succ: Block,
    },
@@ -168,29 +177,34 @@ impl LoweredBlock {
    }

    /// The associated in-edge, if any.
+    #[cfg(test)]
    pub fn in_edge(self) -> Option<(Block, Inst, Block)> {
        match self {
            LoweredBlock::EdgeAndOrig {
                pred,
                edge_inst,
                block,
+                ..
            } => Some((pred, edge_inst, block)),
            _ => None,
        }
    }

    /// the associated out-edge, if any. Also includes edge-only blocks.
+    #[cfg(test)]
    pub fn out_edge(self) -> Option<(Block, Inst, Block)> {
        match self {
            LoweredBlock::OrigAndEdge {
                block,
                edge_inst,
                succ,
+                ..
            } => Some((block, edge_inst, succ)),
            LoweredBlock::Edge {
                pred,
                edge_inst,
                succ,
+                ..
            } => Some((pred, edge_inst, succ)),
            _ => None,
        }
@@ -207,15 +221,17 @@ impl BlockLoweringOrder {
        let mut block_out_count = SecondaryMap::with_default(0);

        // Cache the block successors to avoid re-examining branches below.
-        let mut block_succs: SmallVec<[(Inst, Block); 128]> = SmallVec::new();
+        let mut block_succs: SmallVec<[(Inst, usize, Block); 128]> = SmallVec::new();
        let mut block_succ_range = SecondaryMap::with_default((0, 0));
        let mut fallthrough_return_block = None;
        for block in f.layout.blocks() {
            let block_succ_start = block_succs.len();
+            let mut succ_idx = 0;
            visit_block_succs(f, block, |inst, succ| {
                block_out_count[block] += 1;
                block_in_count[succ] += 1;
-                block_succs.push((inst, succ));
+                block_succs.push((inst, succ_idx, succ));
+                succ_idx += 1;
            });
            let block_succ_end = block_succs.len();
            block_succ_range[block] = (block_succ_start, block_succ_end);
@@ -262,13 +278,14 @@ impl BlockLoweringOrder {
                    // At an orig block; successors are always edge blocks,
                    // possibly with orig blocks following.
                    let range = block_succ_range[block];
-                    for &(edge_inst, succ) in &block_succs[range.0..range.1] {
+                    for &(edge_inst, succ_idx, succ) in &block_succs[range.0..range.1] {
                        if block_in_count[succ] == 1 {
                            ret.push((
                                edge_inst,
                                LoweredBlock::EdgeAndOrig {
                                    pred: block,
                                    edge_inst,
+                                    succ_idx,
                                    block: succ,
                                },
                            ));
@@ -278,6 +295,7 @@ impl BlockLoweringOrder {
                                LoweredBlock::Edge {
                                    pred: block,
                                    edge_inst,
+                                    succ_idx,
                                    succ,
                                },
                            ));
@@ -298,12 +316,13 @@ impl BlockLoweringOrder {
                        // implicit return succ).
                        if range.1 - range.0 > 0 {
                            debug_assert!(range.1 - range.0 == 1);
-                            let (succ_edge_inst, succ_succ) = block_succs[range.0];
+                            let (succ_edge_inst, succ_succ_idx, succ_succ) = block_succs[range.0];
                            ret.push((
                                edge_inst,
                                LoweredBlock::OrigAndEdge {
                                    block: succ,
                                    edge_inst: succ_edge_inst,
+                                    succ_idx: succ_succ_idx,
                                    succ: succ_succ,
                                },
                            ));
@@ -395,7 +414,7 @@ impl BlockLoweringOrder {
        let mut lowered_succ_ranges = vec![];
        let mut lb_to_bindex = FxHashMap::default();
        for (block, succ_range) in rpo.into_iter() {
-            let index = lowered_order.len() as BlockIndex;
+            let index = BlockIndex::new(lowered_order.len());
            lb_to_bindex.insert(block, index);
            lowered_order.push(block);
            lowered_succ_ranges.push(succ_range);
@@ -416,7 +435,7 @@ impl BlockLoweringOrder {

        let mut orig_map = SecondaryMap::with_default(None);
        for (i, lb) in lowered_order.iter().enumerate() {
-            let i = i as BlockIndex;
+            let i = BlockIndex::new(i);
            if let Some(b) = lb.orig_block() {
                orig_map[b] = Some(i);
            }
@@ -441,7 +460,7 @@ impl BlockLoweringOrder {

    /// Get the successor indices for a lowered block.
    pub fn succ_indices(&self, block: BlockIndex) -> &[(Inst, BlockIndex)] {
-        let range = self.lowered_succ_ranges[block as usize];
+        let range = self.lowered_succ_ranges[block.index()];
        &self.lowered_succ_indices[range.0..range.1]
    }

--- a/cranelift/codegen/src/machinst/buffer.rs
+++ b/cranelift/codegen/src/machinst/buffer.rs
@@ -269,7 +269,7 @@ impl MachLabel {
    /// Get a label for a block. (The first N MachLabels are always reseved for
    /// the N blocks in the vcode.)
    pub fn from_block(bindex: BlockIndex) -> MachLabel {
-        MachLabel(bindex)
+        MachLabel(bindex.index() as u32)
    }

    /// Get the numeric label index.
@@ -334,7 +334,7 @@ impl<I: VCodeInst> MachBuffer<I> {
    /// times, e.g. after calling `add_{cond,uncond}_branch()` and
    /// before emitting branch bytes.
    fn check_label_branch_invariants(&self) {
-        if !cfg!(debug_assertions) || cfg!(fuzzing) {
+        if !cfg!(fuzzing) {
            return;
        }
        let cur_off = self.cur_offset();
@@ -489,12 +489,11 @@ impl<I: VCodeInst> MachBuffer<I> {
    }

    /// Reserve the first N MachLabels for blocks.
-    pub fn reserve_labels_for_blocks(&mut self, blocks: BlockIndex) {
+    pub fn reserve_labels_for_blocks(&mut self, blocks: usize) {
        trace!("MachBuffer: first {} labels are for blocks", blocks);
        debug_assert!(self.label_offsets.is_empty());
-        self.label_offsets
-            .resize(blocks as usize, UNKNOWN_LABEL_OFFSET);
-        self.label_aliases.resize(blocks as usize, UNKNOWN_LABEL);
+        self.label_offsets.resize(blocks, UNKNOWN_LABEL_OFFSET);
+        self.label_aliases.resize(blocks, UNKNOWN_LABEL);

        // Post-invariant: as for `get_label()`.
    }
@@ -1599,14 +1598,14 @@ impl MachBranch {
 /// resolving labels internally in the buffer.
 pub struct MachTextSectionBuilder<I: VCodeInst> {
    buf: MachBuffer<I>,
-    next_func: u32,
+    next_func: usize,
    force_veneers: bool,
 }

 impl<I: VCodeInst> MachTextSectionBuilder<I> {
    pub fn new(num_funcs: u32) -> MachTextSectionBuilder<I> {
        let mut buf = MachBuffer::new();
-        buf.reserve_labels_for_blocks(num_funcs);
+        buf.reserve_labels_for_blocks(num_funcs as usize);
        MachTextSectionBuilder {
            buf,
            next_func: 0,
@@ -1627,7 +1626,8 @@ impl<I: VCodeInst> TextSectionBuilder for MachTextSectionBuilder<I> {
        self.buf.align_to(align.unwrap_or(I::LabelUse::ALIGN));
        let pos = self.buf.cur_offset();
        if named {
-            self.buf.bind_label(MachLabel::from_block(self.next_func));
+            self.buf
+                .bind_label(MachLabel::from_block(BlockIndex::new(self.next_func)));
            self.next_func += 1;
        }
        self.buf.put_data(func);
@@ -1635,7 +1635,7 @@ impl<I: VCodeInst> TextSectionBuilder for MachTextSectionBuilder<I> {
    }

    fn resolve_reloc(&mut self, offset: u64, reloc: Reloc, addend: Addend, target: u32) -> bool {
-        let label = MachLabel::from_block(target);
+        let label = MachLabel::from_block(BlockIndex::new(target as usize));
        let offset = u32::try_from(offset).unwrap();
        match I::LabelUse::from_reloc(reloc, addend) {
            Some(label_use) => {
@@ -1652,7 +1652,7 @@ impl<I: VCodeInst> TextSectionBuilder for MachTextSectionBuilder<I> {

    fn finish(&mut self) -> Vec<u8> {
        // Double-check all functions were pushed.
-        assert_eq!(self.next_func, self.buf.label_offsets.len() as u32);
+        assert_eq!(self.next_func, self.buf.label_offsets.len());

        // Finish up any veneers, if necessary.
        self.buf
@@ -1675,7 +1675,7 @@ mod test {
    use std::vec::Vec;

    fn label(n: u32) -> MachLabel {
-        MachLabel::from_block(n)
+        MachLabel::from_block(BlockIndex::new(n as usize))
    }
    fn target(n: u32) -> BranchTarget {
        BranchTarget::Label(label(n))
@@ -1690,7 +1690,7 @@ mod test {
        buf.reserve_labels_for_blocks(2);
        buf.bind_label(label(0));
        let inst = Inst::Jump { dest: target(1) };
-        inst.emit(&mut buf, &info, &mut state);
+        inst.emit(&[], &mut buf, &info, &mut state);
        buf.bind_label(label(1));
        let buf = buf.finish();
        assert_eq!(0, buf.total_size());
@@ -1710,15 +1710,15 @@ mod test {
            taken: target(1),
            not_taken: target(2),
        };
-        inst.emit(&mut buf, &info, &mut state);
+        inst.emit(&[], &mut buf, &info, &mut state);

        buf.bind_label(label(1));
        let inst = Inst::Jump { dest: target(3) };
-        inst.emit(&mut buf, &info, &mut state);
+        inst.emit(&[], &mut buf, &info, &mut state);

        buf.bind_label(label(2));
        let inst = Inst::Jump { dest: target(3) };
-        inst.emit(&mut buf, &info, &mut state);
+        inst.emit(&[], &mut buf, &info, &mut state);

        buf.bind_label(label(3));

@@ -1740,17 +1740,17 @@ mod test {
            taken: target(1),
            not_taken: target(2),
        };
-        inst.emit(&mut buf, &info, &mut state);
+        inst.emit(&[], &mut buf, &info, &mut state);

        buf.bind_label(label(1));
        let inst = Inst::Udf {
            trap_code: TrapCode::Interrupt,
        };
-        inst.emit(&mut buf, &info, &mut state);
+        inst.emit(&[], &mut buf, &info, &mut state);

        buf.bind_label(label(2));
        let inst = Inst::Nop4;
-        inst.emit(&mut buf, &info, &mut state);
+        inst.emit(&[], &mut buf, &info, &mut state);

        buf.bind_label(label(3));

@@ -1762,9 +1762,9 @@ mod test {
            kind: CondBrKind::NotZero(xreg(0)),
            trap_code: TrapCode::Interrupt,
        };
-        inst.emit(&mut buf2, &info, &mut state);
+        inst.emit(&[], &mut buf2, &info, &mut state);
        let inst = Inst::Nop4;
-        inst.emit(&mut buf2, &info, &mut state);
+        inst.emit(&[], &mut buf2, &info, &mut state);

        let buf2 = buf2.finish();

@@ -1785,7 +1785,7 @@ mod test {
            taken: target(2),
            not_taken: target(3),
        };
-        inst.emit(&mut buf, &info, &mut state);
+        inst.emit(&[], &mut buf, &info, &mut state);

        buf.bind_label(label(1));
        while buf.cur_offset() < 2000000 {
@@ -1793,16 +1793,16 @@ mod test {
                buf.emit_island(0);
            }
            let inst = Inst::Nop4;
-            inst.emit(&mut buf, &info, &mut state);
+            inst.emit(&[], &mut buf, &info, &mut state);
        }

        buf.bind_label(label(2));
        let inst = Inst::Nop4;
-        inst.emit(&mut buf, &info, &mut state);
+        inst.emit(&[], &mut buf, &info, &mut state);

        buf.bind_label(label(3));
        let inst = Inst::Nop4;
-        inst.emit(&mut buf, &info, &mut state);
+        inst.emit(&[], &mut buf, &info, &mut state);

        let buf = buf.finish();

@@ -1831,7 +1831,7 @@ mod test {
            // go directly to the target.
            not_taken: BranchTarget::ResolvedOffset(2000000 + 4 - 4),
        };
-        inst.emit(&mut buf2, &info, &mut state);
+        inst.emit(&[], &mut buf2, &info, &mut state);

        let buf2 = buf2.finish();

@@ -1848,16 +1848,16 @@ mod test {

        buf.bind_label(label(0));
        let inst = Inst::Nop4;
-        inst.emit(&mut buf, &info, &mut state);
+        inst.emit(&[], &mut buf, &info, &mut state);

        buf.bind_label(label(1));
        let inst = Inst::Nop4;
-        inst.emit(&mut buf, &info, &mut state);
+        inst.emit(&[], &mut buf, &info, &mut state);

        buf.bind_label(label(2));
        while buf.cur_offset() < 2000000 {
            let inst = Inst::Nop4;
-            inst.emit(&mut buf, &info, &mut state);
+            inst.emit(&[], &mut buf, &info, &mut state);
        }

        buf.bind_label(label(3));
@@ -1866,7 +1866,7 @@ mod test {
            taken: target(0),
            not_taken: target(1),
        };
-        inst.emit(&mut buf, &info, &mut state);
+        inst.emit(&[], &mut buf, &info, &mut state);

        let buf = buf.finish();

@@ -1879,11 +1879,11 @@ mod test {
            taken: BranchTarget::ResolvedOffset(8),
            not_taken: BranchTarget::ResolvedOffset(4 - (2000000 + 4)),
        };
-        inst.emit(&mut buf2, &info, &mut state);
+        inst.emit(&[], &mut buf2, &info, &mut state);
        let inst = Inst::Jump {
            dest: BranchTarget::ResolvedOffset(-(2000000 + 8)),
        };
-        inst.emit(&mut buf2, &info, &mut state);
+        inst.emit(&[], &mut buf2, &info, &mut state);

        let buf2 = buf2.finish();

@@ -1937,38 +1937,38 @@ mod test {
            taken: target(1),
            not_taken: target(2),
        };
-        inst.emit(&mut buf, &info, &mut state);
+        inst.emit(&[], &mut buf, &info, &mut state);

        buf.bind_label(label(1));
        let inst = Inst::Jump { dest: target(3) };
-        inst.emit(&mut buf, &info, &mut state);
+        inst.emit(&[], &mut buf, &info, &mut state);

        buf.bind_label(label(2));
        let inst = Inst::Nop4;
-        inst.emit(&mut buf, &info, &mut state);
-        inst.emit(&mut buf, &info, &mut state);
+        inst.emit(&[], &mut buf, &info, &mut state);
+        inst.emit(&[], &mut buf, &info, &mut state);
        let inst = Inst::Jump { dest: target(0) };
-        inst.emit(&mut buf, &info, &mut state);
+        inst.emit(&[], &mut buf, &info, &mut state);

        buf.bind_label(label(3));
        let inst = Inst::Jump { dest: target(4) };
-        inst.emit(&mut buf, &info, &mut state);
+        inst.emit(&[], &mut buf, &info, &mut state);

        buf.bind_label(label(4));
        let inst = Inst::Jump { dest: target(5) };
-        inst.emit(&mut buf, &info, &mut state);
+        inst.emit(&[], &mut buf, &info, &mut state);

        buf.bind_label(label(5));
        let inst = Inst::Jump { dest: target(7) };
-        inst.emit(&mut buf, &info, &mut state);
+        inst.emit(&[], &mut buf, &info, &mut state);

        buf.bind_label(label(6));
        let inst = Inst::Nop4;
-        inst.emit(&mut buf, &info, &mut state);
+        inst.emit(&[], &mut buf, &info, &mut state);

        buf.bind_label(label(7));
-        let inst = Inst::Ret;
-        inst.emit(&mut buf, &info, &mut state);
+        let inst = Inst::Ret { rets: vec![] };
+        inst.emit(&[], &mut buf, &info, &mut state);

        let buf = buf.finish();

@@ -2009,23 +2009,23 @@ mod test {

        buf.bind_label(label(0));
        let inst = Inst::Jump { dest: target(1) };
-        inst.emit(&mut buf, &info, &mut state);
+        inst.emit(&[], &mut buf, &info, &mut state);

        buf.bind_label(label(1));
        let inst = Inst::Jump { dest: target(2) };
-        inst.emit(&mut buf, &info, &mut state);
+        inst.emit(&[], &mut buf, &info, &mut state);

        buf.bind_label(label(2));
        let inst = Inst::Jump { dest: target(3) };
-        inst.emit(&mut buf, &info, &mut state);
+        inst.emit(&[], &mut buf, &info, &mut state);

        buf.bind_label(label(3));
        let inst = Inst::Jump { dest: target(4) };
-        inst.emit(&mut buf, &info, &mut state);
+        inst.emit(&[], &mut buf, &info, &mut state);

        buf.bind_label(label(4));
        let inst = Inst::Jump { dest: target(1) };
-        inst.emit(&mut buf, &info, &mut state);
+        inst.emit(&[], &mut buf, &info, &mut state);

        let buf = buf.finish();

--- a/cranelift/codegen/src/machinst/compile.rs
+++ b/cranelift/codegen/src/machinst/compile.rs
@@ -2,12 +2,11 @@

 use crate::ir::Function;
 use crate::isa::TargetIsa;
-use crate::log::DeferredDisplay;
 use crate::machinst::*;
-use crate::settings;
 use crate::timing;

-use regalloc::{allocate_registers_with_opts, Algorithm, Options, PrettyPrint};
+use regalloc2::RegallocOptions;
+use regalloc2::{self, MachineEnv};

 /// Compile the given function down to VCode with allocated registers, ready
 /// for binary emission.
@@ -15,100 +14,38 @@ pub fn compile<B: LowerBackend + TargetIsa>(
    f: &Function,
    b: &B,
    abi: Box<dyn ABICallee<I = B::MInst>>,
-    reg_universe: &RealRegUniverse,
+    machine_env: &MachineEnv,
    emit_info: <B::MInst as MachInstEmit>::Info,
-) -> CodegenResult<VCode<B::MInst>>
-where
-    B::MInst: PrettyPrint,
-{
+) -> CodegenResult<(VCode<B::MInst>, regalloc2::Output)> {
    // Compute lowered block order.
    let block_order = BlockLoweringOrder::new(f);
    // Build the lowering context.
    let lower = Lower::new(f, abi, emit_info, block_order)?;
    // Lower the IR.
-    let (mut vcode, stack_map_request_info) = {
+    let vcode = {
        let _tt = timing::vcode_lower();
        lower.lower(b)?
    };

-    // Creating the vcode string representation may be costly for large functions, so defer its
-    // rendering.
-    log::trace!(
-        "vcode from lowering: \n{}",
-        DeferredDisplay::new(|| vcode.show_rru(Some(reg_universe)))
-    );
+    log::trace!("vcode from lowering: \n{:?}", vcode);

    // Perform register allocation.
-    let (run_checker, algorithm) = match vcode.flags().regalloc() {
-        settings::Regalloc::Backtracking => (false, Algorithm::Backtracking(Default::default())),
-        settings::Regalloc::BacktrackingChecked => {
-            (true, Algorithm::Backtracking(Default::default()))
-        }
-        settings::Regalloc::ExperimentalLinearScan => {
-            (false, Algorithm::LinearScan(Default::default()))
-        }
-        settings::Regalloc::ExperimentalLinearScanChecked => {
-            (true, Algorithm::LinearScan(Default::default()))
-        }
-    };
-
-    #[cfg(feature = "regalloc-snapshot")]
-    {
-        use std::fs;
-        use std::path::Path;
-        if let Some(path) = std::env::var("SERIALIZE_REGALLOC").ok() {
-            let snapshot = regalloc::IRSnapshot::from_function(&vcode, reg_universe);
-            let serialized = bincode::serialize(&snapshot).expect("couldn't serialize snapshot");
-
-            let file_path = Path::new(&path).join(Path::new(&format!("ir{}.bin", f.name)));
-            fs::write(file_path, &serialized).expect("couldn't write IR snapshot file");
-        }
-    }
-
-    // If either there are no reference-typed values, or else there are
-    // but there are no safepoints at which we need to know about them,
-    // then we don't need stack maps.
-    let sri = if stack_map_request_info.reftyped_vregs.len() > 0
-        && stack_map_request_info.safepoint_insns.len() > 0
-    {
-        Some(&stack_map_request_info)
-    } else {
-        None
-    };
-
-    let result = {
+    let regalloc_result = {
        let _tt = timing::regalloc();
-        allocate_registers_with_opts(
-            &mut vcode,
-            reg_universe,
-            sri,
-            Options {
-                run_checker,
-                algorithm,
-            },
-        )
-        .map_err(|err| {
-            log::error!(
-                "Register allocation error for vcode\n{}\nError: {:?}",
-                vcode.show_rru(Some(reg_universe)),
+        let mut options = RegallocOptions::default();
+        options.verbose_log = log::log_enabled!(log::Level::Trace);
+        regalloc2::run(&vcode, machine_env, &options)
+            .map_err(|err| {
+                log::error!(
+                    "Register allocation error for vcode\n{:?}\nError: {:?}\nCLIF for error:\n{:?}",
+                    vcode,
+                    err,
+                    f,
+                );
                err
-            );
-            err
-        })
-        .expect("register allocation")
+            })
+            .expect("register allocation")
    };

-    // Reorder vcode into final order and copy out final instruction sequence
-    // all at once. This also inserts prologues/epilogues.
-    {
-        let _tt = timing::vcode_post_ra();
-        vcode.replace_insns_from_regalloc(result);
-    }
-
-    log::trace!(
-        "vcode after regalloc: final version:\n{}",
-        DeferredDisplay::new(|| vcode.show_rru(Some(reg_universe)))
-    );
-
-    Ok(vcode)
+    Ok((vcode, regalloc_result))
 }
--- a/cranelift/codegen/src/machinst/debug.rs
+++ b/cranelift/codegen/src/machinst/debug.rs
@@ -1,525 +0,0 @@
-//! Debug info analysis: computes value-label ranges from value-label markers in
-//! generated VCode.
-//!
-//! We "reverse-engineer" debug info like this because it is far more reliable
-//! than generating it while emitting code and keeping it in sync.
-//!
-//! This works by (i) observing "value-label marker" instructions, which are
-//! semantically just an assignment from a register to a "value label" (which
-//! one can think of as another register; they represent, e.g., Wasm locals) at
-//! a certain point in the code, and (ii) observing loads and stores to the
-//! stack and register moves.
-//!
-//! We track, at every program point, the correspondence between each value
-//! label and *all* locations in which it resides. E.g., if it is stored to the
-//! stack, we remember that it is in both a register and the stack slot; but if
-//! the register is later overwritten, then we have it just in the stack slot.
-//! This allows us to avoid false-positives observing loads/stores that we think
-//! are spillslots but really aren't.
-//!
-//! We do a standard forward dataflow analysis to compute this info.
-
-use crate::ir::ValueLabel;
-use crate::machinst::*;
-use crate::value_label::{LabelValueLoc, ValueLabelsRanges, ValueLocRange};
-use log::trace;
-use regalloc::{Reg, RegUsageCollector};
-use std::collections::{HashMap, HashSet};
-use std::hash::Hash;
-
-/// Location of a labeled value: in a register or in a stack slot. Note that a
-/// value may live in more than one location; `AnalysisInfo` maps each
-/// value-label to multiple `ValueLoc`s.
-#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
-enum ValueLoc {
-    Reg(Reg),
-    /// Nominal-SP offset.
-    Stack(i64),
-}
-
-impl From<ValueLoc> for LabelValueLoc {
-    fn from(v: ValueLoc) -> Self {
-        match v {
-            ValueLoc::Reg(r) => LabelValueLoc::Reg(r),
-            ValueLoc::Stack(off) => LabelValueLoc::SPOffset(off),
-        }
-    }
-}
-
-impl ValueLoc {
-    fn is_reg(self) -> bool {
-        match self {
-            ValueLoc::Reg(_) => true,
-            _ => false,
-        }
-    }
-    fn is_stack(self) -> bool {
-        match self {
-            ValueLoc::Stack(_) => true,
-            _ => false,
-        }
-    }
-}
-
-/// Mappings at one program point.
-#[derive(Clone, Debug)]
-struct AnalysisInfo {
-    /// Nominal SP relative to real SP. If `None`, then the offset is
-    /// indeterminate (i.e., we merged to the lattice 'bottom' element). This
-    /// should not happen in well-formed code.
-    nominal_sp_offset: Option<i64>,
-    /// Forward map from labeled values to sets of locations.
-    label_to_locs: HashMap<ValueLabel, HashSet<ValueLoc>>,
-    /// Reverse map for each register indicating the value it holds, if any.
-    reg_to_label: HashMap<Reg, ValueLabel>,
-    /// Reverse map for each stack offset indicating the value it holds, if any.
-    stack_to_label: HashMap<i64, ValueLabel>,
-}
-
-/// Get the registers written (mod'd or def'd) by a machine instruction.
-fn get_inst_writes<M: MachInst>(m: &M) -> Vec<Reg> {
-    // TODO: expose this part of regalloc.rs's interface publicly.
-    let mut vecs = RegUsageCollector::get_empty_reg_vecs_test_framework_only(false);
-    let mut coll = RegUsageCollector::new(&mut vecs);
-    m.get_regs(&mut coll);
-    vecs.defs.extend(vecs.mods.into_iter());
-    vecs.defs
-}
-
-impl AnalysisInfo {
-    /// Create a new analysis state. This is the "top" lattice element at which
-    /// the fixpoint dataflow analysis starts.
-    fn new() -> Self {
-        AnalysisInfo {
-            nominal_sp_offset: Some(0),
-            label_to_locs: HashMap::new(),
-            reg_to_label: HashMap::new(),
-            stack_to_label: HashMap::new(),
-        }
-    }
-
-    /// Remove all locations for a given labeled value. Used when the labeled
-    /// value is redefined (so old values become stale).
-    fn clear_label(&mut self, label: ValueLabel) {
-        if let Some(locs) = self.label_to_locs.remove(&label) {
-            for loc in locs {
-                match loc {
-                    ValueLoc::Reg(r) => {
-                        self.reg_to_label.remove(&r);
-                    }
-                    ValueLoc::Stack(off) => {
-                        self.stack_to_label.remove(&off);
-                    }
-                }
-            }
-        }
-    }
-
-    /// Remove a label from a register, if any. Used, e.g., if the register is
-    /// overwritten.
-    fn clear_reg(&mut self, reg: Reg) {
-        if let Some(label) = self.reg_to_label.remove(&reg) {
-            if let Some(locs) = self.label_to_locs.get_mut(&label) {
-                locs.remove(&ValueLoc::Reg(reg));
-            }
-        }
-    }
-
-    /// Remove a label from a stack offset, if any. Used, e.g., when the stack
-    /// slot is overwritten.
-    fn clear_stack_off(&mut self, off: i64) {
-        if let Some(label) = self.stack_to_label.remove(&off) {
-            if let Some(locs) = self.label_to_locs.get_mut(&label) {
-                locs.remove(&ValueLoc::Stack(off));
-            }
-        }
-    }
-
-    /// Indicate that a labeled value is newly defined and its new value is in
-    /// `reg`.
-    fn def_label_at_reg(&mut self, label: ValueLabel, reg: Reg) {
-        self.clear_label(label);
-        self.label_to_locs
-            .entry(label)
-            .or_insert_with(|| HashSet::new())
-            .insert(ValueLoc::Reg(reg));
-        self.reg_to_label.insert(reg, label);
-    }
-
-    /// Process a store from a register to a stack slot (offset).
-    fn store_reg(&mut self, reg: Reg, off: i64) {
-        self.clear_stack_off(off);
-        if let Some(label) = self.reg_to_label.get(&reg) {
-            if let Some(locs) = self.label_to_locs.get_mut(label) {
-                locs.insert(ValueLoc::Stack(off));
-            }
-            self.stack_to_label.insert(off, *label);
-        }
-    }
-
-    /// Process a load from a stack slot (offset) to a register.
-    fn load_reg(&mut self, reg: Reg, off: i64) {
-        self.clear_reg(reg);
-        if let Some(&label) = self.stack_to_label.get(&off) {
-            if let Some(locs) = self.label_to_locs.get_mut(&label) {
-                locs.insert(ValueLoc::Reg(reg));
-            }
-            self.reg_to_label.insert(reg, label);
-        }
-    }
-
-    /// Process a move from one register to another.
-    fn move_reg(&mut self, to: Reg, from: Reg) {
-        self.clear_reg(to);
-        if let Some(&label) = self.reg_to_label.get(&from) {
-            if let Some(locs) = self.label_to_locs.get_mut(&label) {
-                locs.insert(ValueLoc::Reg(to));
-            }
-            self.reg_to_label.insert(to, label);
-        }
-    }
-
-    /// Update the analysis state w.r.t. an instruction's effects. Given the
-    /// state just before `inst`, this method updates `self` to be the state
-    /// just after `inst`.
-    fn step<M: MachInst>(&mut self, inst: &M) {
-        for write in get_inst_writes(inst) {
-            self.clear_reg(write);
-        }
-        if let Some((label, reg)) = inst.defines_value_label() {
-            self.def_label_at_reg(label, reg);
-        }
-        match inst.stack_op_info() {
-            Some(MachInstStackOpInfo::LoadNomSPOff(reg, offset)) => {
-                self.load_reg(reg, offset + self.nominal_sp_offset.unwrap());
-            }
-            Some(MachInstStackOpInfo::StoreNomSPOff(reg, offset)) => {
-                self.store_reg(reg, offset + self.nominal_sp_offset.unwrap());
-            }
-            Some(MachInstStackOpInfo::NomSPAdj(offset)) => {
-                if self.nominal_sp_offset.is_some() {
-                    self.nominal_sp_offset = Some(self.nominal_sp_offset.unwrap() + offset);
-                }
-            }
-            _ => {}
-        }
-        if let Some((to, from)) = inst.is_move() {
-            let to = to.to_reg();
-            self.move_reg(to, from);
-        }
-    }
-}
-
-/// Trait used to implement the dataflow analysis' meet (intersect) function
-/// onthe `AnalysisInfo` components. For efficiency, this is implemented as a
-/// mutation on the LHS, rather than a pure functional operation.
-trait IntersectFrom {
-    fn intersect_from(&mut self, other: &Self) -> IntersectResult;
-}
-
-/// Result of an intersection operation. Indicates whether the mutated LHS
-/// (which becomes the intersection result) differs from the original LHS. Also
-/// indicates if the value has become "empty" and should be removed from a
-/// parent container, if any.
-struct IntersectResult {
-    /// Did the intersection change the LHS input (the one that was mutated into
-    /// the result)? This is needed to drive the fixpoint loop; when no more
-    /// changes occur, then we have converted.
-    changed: bool,
-    /// Is the resulting value "empty"? This can be used when a container, such
-    /// as a map, holds values of this (intersection result) type; when
-    /// `is_empty` is true for the merge of the values at a particular key, we
-    /// can remove that key from the merged (intersected) result. This is not
-    /// necessary for analysis correctness but reduces the memory and runtime
-    /// cost of the fixpoint loop.
-    is_empty: bool,
-}
-
-impl IntersectFrom for AnalysisInfo {
-    fn intersect_from(&mut self, other: &Self) -> IntersectResult {
-        let mut changed = false;
-        changed |= self
-            .nominal_sp_offset
-            .intersect_from(&other.nominal_sp_offset)
-            .changed;
-        changed |= self
-            .label_to_locs
-            .intersect_from(&other.label_to_locs)
-            .changed;
-        changed |= self
-            .reg_to_label
-            .intersect_from(&other.reg_to_label)
-            .changed;
-        changed |= self
-            .stack_to_label
-            .intersect_from(&other.stack_to_label)
-            .changed;
-        IntersectResult {
-            changed,
-            is_empty: false,
-        }
-    }
-}
-
-impl<K, V> IntersectFrom for HashMap<K, V>
-where
-    K: Copy + Eq + Hash,
-    V: IntersectFrom,
-{
-    /// Intersection for hashmap: remove keys that are not in both inputs;
-    /// recursively intersect values for keys in common.
-    fn intersect_from(&mut self, other: &Self) -> IntersectResult {
-        let mut changed = false;
-        let mut remove_keys = vec![];
-        for k in self.keys() {
-            if !other.contains_key(k) {
-                remove_keys.push(*k);
-            }
-        }
-        for k in &remove_keys {
-            changed = true;
-            self.remove(k);
-        }
-
-        remove_keys.clear();
-        for k in other.keys() {
-            if let Some(v) = self.get_mut(k) {
-                let result = v.intersect_from(other.get(k).unwrap());
-                changed |= result.changed;
-                if result.is_empty {
-                    remove_keys.push(*k);
-                }
-            }
-        }
-        for k in &remove_keys {
-            changed = true;
-            self.remove(k);
-        }
-
-        IntersectResult {
-            changed,
-            is_empty: self.len() == 0,
-        }
-    }
-}
-impl<T> IntersectFrom for HashSet<T>
-where
-    T: Copy + Eq + Hash,
-{
-    /// Intersection for hashset: just take the set intersection.
-    fn intersect_from(&mut self, other: &Self) -> IntersectResult {
-        let mut changed = false;
-        let mut remove = vec![];
-        for val in self.iter() {
-            if !other.contains(val) {
-                remove.push(*val);
-            }
-        }
-        for val in remove {
-            changed = true;
-            self.remove(&val);
-        }
-
-        IntersectResult {
-            changed,
-            is_empty: self.len() == 0,
-        }
-    }
-}
-impl IntersectFrom for ValueLabel {
-    // Intersection for labeled value: remove if not equal. This is equivalent
-    // to a three-level lattice with top, bottom, and unordered set of
-    // individual labels in between.
-    fn intersect_from(&mut self, other: &Self) -> IntersectResult {
-        IntersectResult {
-            changed: false,
-            is_empty: *self != *other,
-        }
-    }
-}
-impl<T> IntersectFrom for Option<T>
-where
-    T: Copy + Eq,
-{
-    /// Intersectino for Option<T>: recursively intersect if both `Some`, else
-    /// `None`.
-    fn intersect_from(&mut self, other: &Self) -> IntersectResult {
-        let mut changed = false;
-        if !(self.is_some() && other.is_some() && self == other) {
-            changed = true;
-            *self = None;
-        }
-        IntersectResult {
-            changed,
-            is_empty: self.is_none(),
-        }
-    }
-}
-
-/// Compute the value-label ranges (locations for program-point ranges for
-/// labeled values) from a given `VCode` compilation result.
-///
-/// In order to compute this information, we perform a dataflow analysis on the
-/// machine code. To do so, and translate the results into a form usable by the
-/// debug-info consumers, we need to know two additional things:
-///
-/// - The machine-code layout (code offsets) of the instructions. DWARF is
-///   encoded in terms of instruction *ends* (and we reason about value
-///   locations at program points *after* instructions, to match this), so we
-///   take an array `inst_ends`, giving us code offsets for each instruction's
-///   end-point. (Note that this is one *past* the last byte; so a 4-byte
-///   instruction at offset 0 has an end offset of 4.)
-///
-/// - The locations of the labels to which branches will jump. Branches can tell
-///   us about their targets in terms of `MachLabel`s, but we don't know where
-///   those `MachLabel`s will be placed in the linear array of instructions.  We
-///   take the array `label_insn_index` to provide this info: for a label with
-///   index `l`, `label_insn_index[l]` is the index of the instruction before
-///   which that label is bound.
-pub(crate) fn compute<I: VCodeInst>(
-    insts: &[I],
-    layout_info: &InstsLayoutInfo,
-) -> ValueLabelsRanges {
-    let inst_start = |idx: usize| {
-        if idx == 0 {
-            0
-        } else {
-            layout_info.inst_end_offsets[idx - 1]
-        }
-    };
-
-    trace!("compute: insts =");
-    for i in 0..insts.len() {
-        trace!(
-            " #{} end: {} -> {:?}",
-            i,
-            layout_info.inst_end_offsets[i],
-            insts[i]
-        );
-    }
-    trace!("label_insn_index: {:?}", layout_info.label_inst_indices);
-
-    // Info at each block head, indexed by label.
-    let mut block_starts: HashMap<u32, AnalysisInfo> = HashMap::new();
-
-    // Initialize state at entry.
-    block_starts.insert(0, AnalysisInfo::new());
-
-    // Worklist: label indices for basic blocks.
-    let mut worklist = Vec::new();
-    let mut worklist_set = HashSet::new();
-    worklist.push(0);
-    worklist_set.insert(0);
-
-    while !worklist.is_empty() {
-        let block = worklist.pop().unwrap();
-        worklist_set.remove(&block);
-
-        let mut state = block_starts.get(&block).unwrap().clone();
-        trace!("at block {} -> state: {:?}", block, state);
-        // Iterate for each instruction in the block (we break at the first
-        // terminator we see).
-        let mut index = layout_info.label_inst_indices[block as usize];
-        while index < insts.len() as u32 {
-            state.step(&insts[index as usize]);
-            trace!(" -> inst #{}: {:?}", index, insts[index as usize]);
-            trace!("    --> state: {:?}", state);
-
-            let term = insts[index as usize].is_term();
-            if term.is_term() {
-                for succ in term.get_succs() {
-                    trace!("    SUCCESSOR block {}", succ.get());
-                    if let Some(succ_state) = block_starts.get_mut(&succ.get()) {
-                        trace!("       orig state: {:?}", succ_state);
-                        if succ_state.intersect_from(&state).changed {
-                            if worklist_set.insert(succ.get()) {
-                                worklist.push(succ.get());
-                            }
-                            trace!("        (changed)");
-                        }
-                        trace!("       new state: {:?}", succ_state);
-                    } else {
-                        // First time seeing this block
-                        block_starts.insert(succ.get(), state.clone());
-                        worklist.push(succ.get());
-                        worklist_set.insert(succ.get());
-                    }
-                }
-                break;
-            }
-
-            index += 1;
-        }
-    }
-
-    // Now iterate over blocks one last time, collecting
-    // value-label locations.
-
-    let mut value_labels_ranges: ValueLabelsRanges = HashMap::new();
-    for block in 0..layout_info.label_inst_indices.len() {
-        let start_index = layout_info.label_inst_indices[block];
-        let end_index = if block == layout_info.label_inst_indices.len() - 1 {
-            insts.len() as u32
-        } else {
-            layout_info.label_inst_indices[block + 1]
-        };
-        let block = block as u32;
-        let mut state = block_starts.get(&block).unwrap().clone();
-        for index in start_index..end_index {
-            let offset = inst_start(index as usize);
-            let end = layout_info.inst_end_offsets[index as usize];
-
-            // Cold blocks cause instructions to occur out-of-order wrt
-            // others. We rely on the monotonic mapping from instruction
-            // index to offset in machine code for this analysis to work,
-            // so we just skip debuginfo for cold blocks. This should be
-            // generally fine, as cold blocks generally constitute
-            // slowpaths for expansions of particular ops, rather than
-            // user-written code.
-            if layout_info.start_of_cold_code.is_some()
-                && offset >= layout_info.start_of_cold_code.unwrap()
-            {
-                continue;
-            }
-
-            assert!(offset <= end);
-            state.step(&insts[index as usize]);
-
-            for (label, locs) in &state.label_to_locs {
-                trace!("   inst {} has label {:?} -> locs {:?}", index, label, locs);
-                // Find an appropriate loc: a register if possible, otherwise pick the first stack
-                // loc.
-                let reg = locs.iter().cloned().find(|l| l.is_reg());
-                let loc = reg.or_else(|| locs.iter().cloned().find(|l| l.is_stack()));
-                if let Some(loc) = loc {
-                    let loc = LabelValueLoc::from(loc);
-                    let list = value_labels_ranges.entry(*label).or_insert_with(|| vec![]);
-                    // If the existing location list for this value-label is
-                    // either empty, or has an end location that does not extend
-                    // to the current offset, then we have to append a new
-                    // entry. Otherwise, we can extend the current entry.
-                    //
-                    // Note that `end` is one past the end of the instruction;
-                    // it appears that `end` is exclusive, so a mapping valid at
-                    // offset 5 will have start = 5, end = 6.
-                    if list
-                        .last()
-                        .map(|last| last.end <= offset || last.loc != loc)
-                        .unwrap_or(true)
-                    {
-                        list.push(ValueLocRange {
-                            loc,
-                            start: end,
-                            end: end + 1,
-                        });
-                    } else {
-                        list.last_mut().unwrap().end = end + 1;
-                    }
-                }
-            }
-        }
-    }
-
-    trace!("ret: {:?}", value_labels_ranges);
-    value_labels_ranges
-}
--- a/cranelift/codegen/src/machinst/helpers.rs
+++ b/cranelift/codegen/src/machinst/helpers.rs
@@ -1,8 +1,8 @@
 //! Miscellaneous helpers for machine backends.

 use super::{InsnOutput, LowerCtx, VCodeInst, ValueRegs};
+use super::{Reg, Writable};
 use crate::ir::Type;
-use regalloc::{Reg, Writable};
 use std::ops::{Add, BitAnd, Not, Sub};

 /// Returns the size (in bits) of a given type.
--- a/cranelift/codegen/src/machinst/isle.rs
+++ b/cranelift/codegen/src/machinst/isle.rs
@@ -1,8 +1,7 @@
 use crate::ir::{types, Inst, Value, ValueList};
-use crate::machinst::{get_output_reg, InsnOutput, LowerCtx, MachInst, RegRenamer};
+use crate::machinst::{get_output_reg, InsnOutput, LowerCtx, Reg, Writable};
 use alloc::boxed::Box;
 use alloc::vec::Vec;
-use regalloc::{Reg, Writable};
 use smallvec::SmallVec;
 use std::cell::Cell;

@@ -107,7 +106,8 @@ macro_rules! isle_prelude_methods {

        #[inline]
        fn invalid_reg(&mut self) -> Reg {
-            Reg::invalid()
+            use crate::machinst::valueregs::InvalidSentinel;
+            Reg::invalid_sentinel()
        }

        #[inline]
@@ -467,7 +467,6 @@ where
    pub lower_ctx: &'a mut C,
    pub flags: &'a F,
    pub isa_flags: &'a I,
-    pub emitted_insts: SmallVec<[(C::I, bool); N]>,
 }

 /// Shared lowering code amongst all backends for doing ISLE-based lowering.
@@ -482,7 +481,6 @@ pub(crate) fn lower_common<C, F, I, IF, const N: usize>(
    outputs: &[InsnOutput],
    inst: Inst,
    isle_lower: IF,
-    map_regs: fn(&mut C::I, &RegRenamer),
 ) -> Result<(), ()>
 where
    C: LowerCtx,
@@ -495,7 +493,6 @@ where
        lower_ctx,
        flags,
        isa_flags,
-        emitted_insts: SmallVec::new(),
    };

    let temp_regs = isle_lower(&mut isle_ctx, inst).ok_or(())?;
@@ -514,10 +511,15 @@ where
    }

    // The ISLE generated code emits its own registers to define the
-    // instruction's lowered values in. We rename those registers to the
-    // registers they were assigned when their value was used as an operand in
-    // earlier lowerings.
-    let mut renamer = RegRenamer::default();
+    // instruction's lowered values in. However, other instructions
+    // that use this SSA value will be lowered assuming that the value
+    // is generated into a pre-assigned, different, register.
+    //
+    // To connect the two, we set up "aliases" in the VCodeBuilder
+    // that apply when it is building the Operand table for the
+    // regalloc to use. These aliases effectively rewrite any use of
+    // the pre-assigned register to the register that was returned by
+    // the ISLE lowering logic.
    for i in 0..outputs.len() {
        let regs = temp_regs[i];
        let dsts = get_output_reg(isle_ctx.lower_ctx, outputs[i]);
@@ -528,41 +530,11 @@ where
            // Flags values do not occupy any registers.
            assert!(regs.len() == 0);
        } else {
-            let (_, tys) = <C::I>::rc_for_type(ty).unwrap();
-            assert!(regs.len() == tys.len());
-            assert!(regs.len() == dsts.len());
-            for ((dst, temp), ty) in dsts.regs().iter().zip(regs.regs().iter()).zip(tys) {
-                renamer.add_rename(*temp, dst.to_reg(), *ty);
+            for (dst, temp) in dsts.regs().iter().zip(regs.regs().iter()) {
+                isle_ctx.lower_ctx.set_vreg_alias(dst.to_reg(), *temp);
            }
        }
    }
-    for (inst, _) in isle_ctx.emitted_insts.iter_mut() {
-        map_regs(inst, &renamer);
-    }
-
-    // If any renamed register wasn't actually defined in the ISLE-generated
-    // instructions then what we're actually doing is "renaming" an input to a
-    // new name which requires manually inserting a `mov` instruction. Note that
-    // this typically doesn't happen and is only here for cases where the input
-    // is sometimes passed through unmodified to the output, such as
-    // zero-extending a 64-bit input to a 128-bit output which doesn't actually
-    // change the input and simply produces another zero'd register.
-    for (old, new, ty) in renamer.unmapped_defs() {
-        isle_ctx
-            .lower_ctx
-            .emit(<C::I>::gen_move(Writable::from_reg(new), old, ty));
-    }
-
-    // Once everything is remapped we forward all emitted instructions to the
-    // `lower_ctx`. Note that this happens after the synthetic mov's above in
-    // case any of these instruction use those movs.
-    for (inst, is_safepoint) in isle_ctx.emitted_insts {
-        if is_safepoint {
-            lower_ctx.emit_safepoint(inst);
-        } else {
-            lower_ctx.emit(inst);
-        }
-    }

    Ok(())
 }
--- a/cranelift/codegen/src/machinst/lower.rs
+++ b/cranelift/codegen/src/machinst/lower.rs
@@ -18,17 +18,19 @@ use crate::ir::{
 };
 use crate::machinst::{
    non_writable_value_regs, writable_value_regs, ABICallee, BlockIndex, BlockLoweringOrder,
-    LoweredBlock, MachLabel, VCode, VCodeBuilder, VCodeConstant, VCodeConstantData, VCodeConstants,
-    VCodeInst, ValueRegs,
+    LoweredBlock, MachLabel, Reg, VCode, VCodeBuilder, VCodeConstant, VCodeConstantData,
+    VCodeConstants, VCodeInst, ValueRegs, Writable,
 };
 use crate::CodegenResult;
 use alloc::boxed::Box;
 use alloc::vec::Vec;
 use core::convert::TryInto;
-use regalloc::{Reg, StackmapRequestInfo, Writable};
+use regalloc2::VReg;
 use smallvec::{smallvec, SmallVec};
 use std::fmt::Debug;

+use super::{first_user_vreg_index, VCodeBuildDirection};
+
 /// An "instruction color" partitions CLIF instructions by side-effecting ops.
 /// All instructions with the same "color" are guaranteed not to be separated by
 /// any side-effecting op (for this purpose, loads are also considered
@@ -160,8 +162,6 @@ pub trait LowerCtx {
    fn alloc_tmp(&mut self, ty: Type) -> ValueRegs<Writable<Reg>>;
    /// Emit a machine instruction.
    fn emit(&mut self, mach_inst: Self::I);
-    /// Emit a machine instruction that is a safepoint.
-    fn emit_safepoint(&mut self, mach_inst: Self::I);
    /// Indicate that the side-effect of an instruction has been sunk to the
    /// current scan location. This should only be done with the instruction's
    /// original results are not used (i.e., `put_input_in_regs` is not invoked
@@ -178,6 +178,9 @@ pub trait LowerCtx {
    /// Cause the value in `reg` to be in a virtual reg, by copying it into a new virtual reg
    /// if `reg` is a real reg.  `ty` describes the type of the value in `reg`.
    fn ensure_in_vreg(&mut self, reg: Reg, ty: Type) -> Reg;
+
+    /// Note that one vreg is to be treated as an alias of another.
+    fn set_vreg_alias(&mut self, from: Reg, to: Reg);
 }

 /// A representation of all of the ways in which a value is available, aside
@@ -232,14 +235,6 @@ pub trait LowerBackend {
    }
 }

-/// A pending instruction to insert and auxiliary information about it: its source location and
-/// whether it is a safepoint.
-struct InstTuple<I: VCodeInst> {
-    loc: SourceLoc,
-    is_safepoint: bool,
-    inst: I,
-}
-
 /// Machine-independent lowering driver / machine-instruction container. Maintains a correspondence
 /// from original Inst to MachInsts.
 pub struct Lower<'func, I: VCodeInst> {
@@ -287,20 +282,10 @@ pub struct Lower<'func, I: VCodeInst> {
    inst_sunk: FxHashSet<Inst>,

    /// Next virtual register number to allocate.
-    next_vreg: u32,
-
-    /// Insts in reverse block order, before final copy to vcode.
-    block_insts: Vec<InstTuple<I>>,
-
-    /// Ranges in `block_insts` constituting BBs.
-    block_ranges: Vec<(usize, usize)>,
-
-    /// Instructions collected for the BB in progress, in reverse order, with
-    /// source-locs attached.
-    bb_insts: Vec<InstTuple<I>>,
+    next_vreg: usize,

    /// Instructions collected for the CLIF inst in progress, in forward order.
-    ir_insts: Vec<InstTuple<I>>,
+    ir_insts: Vec<I>,

    /// The register to use for GetPinnedReg, if any, on this architecture.
    pinned_reg: Option<Reg>,
@@ -324,22 +309,22 @@ pub enum RelocDistance {

 fn alloc_vregs<I: VCodeInst>(
    ty: Type,
-    next_vreg: &mut u32,
+    next_vreg: &mut usize,
    vcode: &mut VCodeBuilder<I>,
 ) -> CodegenResult<ValueRegs<Reg>> {
    let v = *next_vreg;
    let (regclasses, tys) = I::rc_for_type(ty)?;
-    *next_vreg += regclasses.len() as u32;
-    let regs = match regclasses {
-        &[rc0] => ValueRegs::one(Reg::new_virtual(rc0, v)),
-        &[rc0, rc1] => ValueRegs::two(Reg::new_virtual(rc0, v), Reg::new_virtual(rc1, v + 1)),
+    *next_vreg += regclasses.len();
+    let regs: ValueRegs<Reg> = match regclasses {
+        &[rc0] => ValueRegs::one(VReg::new(v, rc0).into()),
+        &[rc0, rc1] => ValueRegs::two(VReg::new(v, rc0).into(), VReg::new(v + 1, rc1).into()),
        // We can extend this if/when we support 32-bit targets; e.g.,
        // an i128 on a 32-bit machine will need up to four machine regs
        // for a `Value`.
        _ => panic!("Value must reside in 1 or 2 registers"),
    };
    for (&reg_ty, &reg) in tys.iter().zip(regs.regs().iter()) {
-        vcode.set_vreg_type(reg.to_virtual_reg(), reg_ty);
+        vcode.set_vreg_type(reg.to_virtual_reg().unwrap(), reg_ty);
    }
    Ok(regs)
 }
@@ -358,9 +343,15 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
        block_order: BlockLoweringOrder,
    ) -> CodegenResult<Lower<'func, I>> {
        let constants = VCodeConstants::with_capacity(f.dfg.constants.len());
-        let mut vcode = VCodeBuilder::new(abi, emit_info, block_order, constants);
+        let mut vcode = VCodeBuilder::new(
+            abi,
+            emit_info,
+            block_order,
+            constants,
+            VCodeBuildDirection::Backward,
+        );

-        let mut next_vreg: u32 = 0;
+        let mut next_vreg: usize = first_user_vreg_index();

        let mut value_regs = SecondaryMap::with_default(ValueRegs::invalid());

@@ -381,10 +372,11 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
                        let regs = alloc_vregs(ty, &mut next_vreg, &mut vcode)?;
                        value_regs[result] = regs;
                        log::trace!(
-                            "bb {} inst {} ({:?}): result regs {:?}",
+                            "bb {} inst {} ({:?}): result {} regs {:?}",
                            bb,
                            inst,
                            f.dfg[inst],
+                            result,
                            regs,
                        );
                    }
@@ -459,9 +451,6 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
            inst_sunk: FxHashSet::default(),
            cur_scan_entry_color: None,
            cur_inst: None,
-            block_insts: vec![],
-            block_ranges: vec![],
-            bb_insts: vec![],
            ir_insts: vec![],
            pinned_reg: None,
            vm_context,
@@ -475,6 +464,12 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
                entry_bb,
                self.f.dfg.block_params(entry_bb)
            );
+
+            // Make the vmctx available in debuginfo.
+            if let Some(vmctx_val) = self.f.special_param(ArgumentPurpose::VMContext) {
+                self.emit_value_label_marks_for_value(vmctx_val);
+            }
+
            for (i, param) in self.f.dfg.block_params(entry_bb).iter().enumerate() {
                if !self.vcode.abi().arg_is_needed_in_body(i) {
                    continue;
@@ -509,14 +504,6 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
    }

    fn gen_retval_setup(&mut self, gen_ret_inst: GenerateReturn) {
-        // Hack: to keep `vmctx` alive, if it exists, we emit a value label here
-        // for it if debug info is requested. This ensures that it exists either
-        // in a register or spillslot throughout the entire function body, and
-        // allows for a better debugging experience.
-        if let Some(vmctx_val) = self.f.special_param(ArgumentPurpose::VMContext) {
-            self.emit_value_label_marks_for_value(vmctx_val);
-        }
-
        let retval_regs = self.retval_regs.clone();
        for (i, regs) in retval_regs.into_iter().enumerate() {
            let regs = writable_value_regs(regs);
@@ -534,141 +521,16 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
            GenerateReturn::No => self.vcode.abi().gen_epilogue_placeholder(),
        };
        self.emit(inst);
-    }

-    fn lower_edge(&mut self, pred: Block, inst: Inst, succ: Block) -> CodegenResult<()> {
-        log::trace!("lower_edge: pred {} succ {}", pred, succ);
-
-        let num_args = self.f.dfg.block_params(succ).len();
-        debug_assert!(num_args == self.f.dfg.inst_variable_args(inst).len());
-
-        // Most blocks have no params, so skip all the hoop-jumping below and make an early exit.
-        if num_args == 0 {
-            return Ok(());
-        }
-
-        self.cur_inst = Some(inst);
-
-        // Make up two vectors of info:
-        //
-        // * one for dsts which are to be assigned constants.  We'll deal with those second, so
-        //   as to minimise live ranges.
-        //
-        // * one for dsts whose sources are non-constants.
-
-        let mut const_bundles: SmallVec<[_; 16]> = SmallVec::new();
-        let mut var_bundles: SmallVec<[_; 16]> = SmallVec::new();
-
-        let mut i = 0;
-        for (dst_val, src_val) in self
-            .f
-            .dfg
-            .block_params(succ)
-            .iter()
-            .zip(self.f.dfg.inst_variable_args(inst).iter())
-        {
-            let src_val = self.f.dfg.resolve_aliases(*src_val);
-            let ty = self.f.dfg.value_type(src_val);
-
-            debug_assert!(ty == self.f.dfg.value_type(*dst_val));
-            let dst_regs = self.value_regs[*dst_val];
-
-            let input = self.get_value_as_source_or_const(src_val);
-            log::trace!("jump arg {} is {}", i, src_val);
-            i += 1;
-
-            if let Some(c) = input.constant {
-                log::trace!(" -> constant {}", c);
-                const_bundles.push((ty, writable_value_regs(dst_regs), c));
-            } else {
-                let src_regs = self.put_value_in_regs(src_val);
-                log::trace!(" -> reg {:?}", src_regs);
-                // Skip self-assignments.  Not only are they pointless, they falsely trigger the
-                // overlap-check below and hence can cause a lot of unnecessary copying through
-                // temporaries.
-                if dst_regs != src_regs {
-                    var_bundles.push((ty, writable_value_regs(dst_regs), src_regs));
-                }
+        // Hack: generate a virtual instruction that uses vmctx in
+        // order to keep it alive for the duration of the function,
+        // for the benefit of debuginfo.
+        if self.f.dfg.values_labels.is_some() {
+            if let Some(vmctx_val) = self.f.special_param(ArgumentPurpose::VMContext) {
+                let vmctx_reg = self.value_regs[vmctx_val].only_reg().unwrap();
+                self.emit(I::gen_dummy_use(vmctx_reg));
            }
        }
-
-        // Deal first with the moves whose sources are variables.
-
-        // FIXME: use regalloc.rs' SparseSetU here.  This would avoid all heap allocation
-        // for cases of up to circa 16 args.  Currently not possible because regalloc.rs
-        // does not export it.
-        let mut src_reg_set = FxHashSet::<Reg>::default();
-        for (_, _, src_regs) in &var_bundles {
-            for &reg in src_regs.regs() {
-                src_reg_set.insert(reg);
-            }
-        }
-        let mut overlaps = false;
-        'outer: for (_, dst_regs, _) in &var_bundles {
-            for &reg in dst_regs.regs() {
-                if src_reg_set.contains(&reg.to_reg()) {
-                    overlaps = true;
-                    break 'outer;
-                }
-            }
-        }
-
-        // If, as is mostly the case, the source and destination register sets are non
-        // overlapping, then we can copy directly, so as to save the register allocator work.
-        if !overlaps {
-            for (ty, dst_regs, src_regs) in &var_bundles {
-                let (_, reg_tys) = I::rc_for_type(*ty)?;
-                for ((dst, src), reg_ty) in dst_regs
-                    .regs()
-                    .iter()
-                    .zip(src_regs.regs().iter())
-                    .zip(reg_tys.iter())
-                {
-                    self.emit(I::gen_move(*dst, *src, *reg_ty));
-                }
-            }
-        } else {
-            // There's some overlap, so play safe and copy via temps.
-            let mut tmp_regs = SmallVec::<[ValueRegs<Writable<Reg>>; 16]>::new();
-            for (ty, _, _) in &var_bundles {
-                tmp_regs.push(self.alloc_tmp(*ty));
-            }
-            for ((ty, _, src_reg), tmp_reg) in var_bundles.iter().zip(tmp_regs.iter()) {
-                let (_, reg_tys) = I::rc_for_type(*ty)?;
-                for ((tmp, src), reg_ty) in tmp_reg
-                    .regs()
-                    .iter()
-                    .zip(src_reg.regs().iter())
-                    .zip(reg_tys.iter())
-                {
-                    self.emit(I::gen_move(*tmp, *src, *reg_ty));
-                }
-            }
-            for ((ty, dst_reg, _), tmp_reg) in var_bundles.iter().zip(tmp_regs.iter()) {
-                let (_, reg_tys) = I::rc_for_type(*ty)?;
-                for ((dst, tmp), reg_ty) in dst_reg
-                    .regs()
-                    .iter()
-                    .zip(tmp_reg.regs().iter())
-                    .zip(reg_tys.iter())
-                {
-                    self.emit(I::gen_move(*dst, tmp.to_reg(), *reg_ty));
-                }
-            }
-        }
-
-        // Now, finally, deal with the moves whose sources are constants.
-        for (ty, dst_reg, const_val) in &const_bundles {
-            for inst in I::gen_constant(*dst_reg, *const_val as u128, *ty, |ty| {
-                self.alloc_tmp(ty).only_reg().unwrap()
-            })
-            .into_iter()
-            {
-                self.emit(inst);
-            }
-        }
-
-        Ok(())
    }

    /// Has this instruction been sunk to a use-site (i.e., away from its
@@ -694,21 +556,24 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
        self.cur_scan_entry_color = Some(self.block_end_colors[block]);
        // Lowering loop:
        // - For each non-branch instruction, in reverse order:
-        //   - If side-effecting (load, store, branch/call/return, possible trap), or if
-        //     used outside of this block, or if demanded by another inst, then lower.
+        //   - If side-effecting (load, store, branch/call/return,
+        //     possible trap), or if used outside of this block, or if
+        //     demanded by another inst, then lower.
        //
-        // That's it! Lowering of side-effecting ops will force all *needed*
-        // (live) non-side-effecting ops to be lowered at the right places, via
-        // the `use_input_reg()` callback on the `LowerCtx` (that's us). That's
-        // because `use_input_reg()` sets the eager/demand bit for any insts
-        // whose result registers are used.
+        // That's it! Lowering of side-effecting ops will force all
+        // *needed* (live) non-side-effecting ops to be lowered at the
+        // right places, via the `use_input_reg()` callback on the
+        // `LowerCtx` (that's us). That's because `use_input_reg()`
+        // sets the eager/demand bit for any insts whose result
+        // registers are used.
        //
-        // We build up the BB in reverse instruction order in `bb_insts`.
-        // Because the machine backend calls `ctx.emit()` in forward order, we
-        // collect per-IR-inst lowered instructions in `ir_insts`, then reverse
-        // these and append to `bb_insts` as we go backward through the block.
-        // `bb_insts` are then reversed again and appended to the VCode at the
-        // end of the BB (in the toplevel driver `lower()`).
+        // We set the VCodeBuilder to "backward" mode, so we emit
+        // blocks in reverse order wrt the BlockIndex sequence, and
+        // emit instructions in reverse order within blocks.  Because
+        // the machine backend calls `ctx.emit()` in forward order, we
+        // collect per-IR-inst lowered instructions in `ir_insts`,
+        // then reverse these and append to the VCode at the end of
+        // each IR instruction.
        for inst in self.f.layout.block_insts(block).rev() {
            let data = &self.f.dfg[inst];
            let has_side_effect = has_lowering_side_effect(self.f, inst);
@@ -750,9 +615,6 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
            if has_side_effect || value_needed {
                log::trace!("lowering: inst {}: {:?}", inst, self.f.dfg[inst]);
                backend.lower(self, inst)?;
-                // Emit value-label markers if needed, to later recover debug
-                // mappings.
-                self.emit_value_label_markers_for_inst(inst);
            }
            if data.opcode().is_return() {
                // Return: handle specially, using ABI-appropriate sequence.
@@ -767,11 +629,33 @@ impl<'func, I: VCodeInst> Lower<'func, I> {

            let loc = self.srcloc(inst);
            self.finish_ir_inst(loc);
+
+            // Emit value-label markers if needed, to later recover
+            // debug mappings. This must happen before the instruction
+            // (so after we emit, in bottom-to-top pass).
+            self.emit_value_label_markers_for_inst(inst);
        }
+
+        // Add the block params to this block.
+        self.add_block_params(block)?;
+
        self.cur_scan_entry_color = None;
        Ok(())
    }

+    fn add_block_params(&mut self, block: Block) -> CodegenResult<()> {
+        for &param in self.f.dfg.block_params(block) {
+            let ty = self.f.dfg.value_type(param);
+            let (_reg_rcs, reg_tys) = I::rc_for_type(ty)?;
+            debug_assert_eq!(reg_tys.len(), self.value_regs[param].len());
+            for (&reg, &rty) in self.value_regs[param].regs().iter().zip(reg_tys.iter()) {
+                self.vcode
+                    .add_block_param(reg.to_virtual_reg().unwrap(), rty);
+            }
+        }
+        Ok(())
+    }
+
    fn get_value_labels<'a>(&'a self, val: Value, depth: usize) -> Option<&'a [ValueLabelStart]> {
        if let Some(ref values_labels) = self.f.dfg.values_labels {
            log::trace!(
@@ -794,7 +678,6 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
    }

    fn emit_value_label_marks_for_value(&mut self, val: Value) {
-        let mut markers: SmallVec<[I; 4]> = smallvec![];
        let regs = self.value_regs[val];
        if regs.len() > 1 {
            return;
@@ -813,12 +696,9 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
                    reg,
                    label,
                );
-                markers.push(I::gen_value_label_marker(label, reg));
+                self.vcode.add_value_label(reg, label);
            }
        }
-        for marker in markers {
-            self.emit(marker);
-        }
    }

    fn emit_value_label_markers_for_inst(&mut self, inst: Inst) {
@@ -849,36 +729,17 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
    }

    fn finish_ir_inst(&mut self, loc: SourceLoc) {
-        // `bb_insts` is kept in reverse order, so emit the instructions in
-        // reverse order.
-        for mut tuple in self.ir_insts.drain(..).rev() {
-            tuple.loc = loc;
-            self.bb_insts.push(tuple);
+        self.vcode.set_srcloc(loc);
+        // The VCodeBuilder builds in reverse order (and reverses at
+        // the end), but `ir_insts` is in forward order, so reverse
+        // it.
+        for inst in self.ir_insts.drain(..).rev() {
+            self.vcode.push(inst);
        }
    }

    fn finish_bb(&mut self) {
-        let start = self.block_insts.len();
-        for tuple in self.bb_insts.drain(..).rev() {
-            self.block_insts.push(tuple);
-        }
-        let end = self.block_insts.len();
-        self.block_ranges.push((start, end));
-    }
-
-    fn copy_bbs_to_vcode(&mut self) {
-        for &(start, end) in self.block_ranges.iter().rev() {
-            for &InstTuple {
-                loc,
-                is_safepoint,
-                ref inst,
-            } in &self.block_insts[start..end]
-            {
-                self.vcode.set_srcloc(loc);
-                self.vcode.push(inst.clone(), is_safepoint);
-            }
-            self.vcode.end_bb();
-        }
+        self.vcode.end_bb();
    }

    fn lower_clif_branches<B: LowerBackend<MInst = I>>(
@@ -900,9 +761,28 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
        backend.lower_branch_group(self, branches, targets)?;
        let loc = self.srcloc(branches[0]);
        self.finish_ir_inst(loc);
+        // Add block param outputs for current block.
+        self.lower_branch_blockparam_args(block);
        Ok(())
    }

+    fn lower_branch_blockparam_args(&mut self, block: Block) {
+        visit_block_succs(self.f, block, |inst, _succ| {
+            let branch_args = self.f.dfg.inst_variable_args(inst);
+            let mut branch_arg_vregs: SmallVec<[Reg; 16]> = smallvec![];
+            for &arg in branch_args {
+                let arg = self.f.dfg.resolve_aliases(arg);
+                let regs = self.put_value_in_regs(arg);
+                for &vreg in regs.regs() {
+                    let vreg = self.vcode.resolve_vreg_alias(vreg.into());
+                    branch_arg_vregs.push(vreg.into());
+                }
+            }
+            self.vcode.add_branch_args_for_succ(&branch_arg_vregs[..]);
+        });
+        self.finish_ir_inst(SourceLoc::default());
+    }
+
    fn collect_branches_and_targets(
        &self,
        bindex: BlockIndex,
@@ -927,10 +807,7 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
    }

    /// Lower the function.
-    pub fn lower<B: LowerBackend<MInst = I>>(
-        mut self,
-        backend: &B,
-    ) -> CodegenResult<(VCode<I>, StackmapRequestInfo)> {
+    pub fn lower<B: LowerBackend<MInst = I>>(mut self, backend: &B) -> CodegenResult<VCode<I>> {
        log::trace!("about to lower function: {:?}", self.f);

        // Initialize the ABI object, giving it a temp if requested.
@@ -945,7 +822,7 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
        // not the whole `Lower` impl).
        self.pinned_reg = backend.maybe_pinned_reg();

-        self.vcode.set_entry(0);
+        self.vcode.set_entry(BlockIndex::new(0));

        // Reused vectors for branch lowering.
        let mut branches: SmallVec<[Inst; 2]> = SmallVec::new();
@@ -963,7 +840,7 @@ impl<'func, I: VCodeInst> Lower<'func, I> {

        // Main lowering loop over lowered blocks.
        for (bindex, lb) in lowered_order.iter().enumerate().rev() {
-            let bindex = bindex as BlockIndex;
+            let bindex = BlockIndex::new(bindex);

            // Lower the block body in reverse order (see comment in
            // `lower_clif_block()` for rationale).
@@ -976,30 +853,41 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
                    self.finish_ir_inst(self.srcloc(branches[0]));
                }
            } else {
-                // If no orig block, this must be a pure edge block; get the successor and
-                // emit a jump.
+                // If no orig block, this must be a pure edge block;
+                // get the successor and emit a jump. Add block params
+                // according to the one successor, and pass them
+                // through; note that the successor must have an
+                // original block.
                let (_, succ) = self.vcode.block_order().succ_indices(bindex)[0];
+
+                let orig_succ = lowered_order[succ.index()];
+                let orig_succ = orig_succ
+                    .orig_block()
+                    .expect("Edge block succ must be body block");
+
+                let mut branch_arg_vregs: SmallVec<[Reg; 16]> = smallvec![];
+                for ty in self.f.dfg.block_param_types(orig_succ) {
+                    let regs = alloc_vregs(ty, &mut self.next_vreg, &mut self.vcode)?;
+                    for &reg in regs.regs() {
+                        branch_arg_vregs.push(reg);
+                        let vreg = reg.to_virtual_reg().unwrap();
+                        self.vcode
+                            .add_block_param(vreg, self.vcode.get_vreg_type(vreg));
+                    }
+                }
+                self.vcode.add_branch_args_for_succ(&branch_arg_vregs[..]);
+
                self.emit(I::gen_jump(MachLabel::from_block(succ)));
                self.finish_ir_inst(SourceLoc::default());
            }

-            // Out-edge phi moves.
-            if let Some((pred, inst, succ)) = lb.out_edge() {
-                self.lower_edge(pred, inst, succ)?;
-                self.finish_ir_inst(SourceLoc::default());
-            }
            // Original block body.
            if let Some(bb) = lb.orig_block() {
                self.lower_clif_block(backend, bb)?;
                self.emit_value_label_markers_for_block_args(bb);
            }
-            // In-edge phi moves.
-            if let Some((pred, inst, succ)) = lb.in_edge() {
-                self.lower_edge(pred, inst, succ)?;
-                self.finish_ir_inst(SourceLoc::default());
-            }

-            if bindex == 0 {
+            if bindex.index() == 0 {
                // Set up the function with arg vreg inits.
                self.gen_arg_setup();
                self.finish_ir_inst(SourceLoc::default());
@@ -1008,13 +896,12 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
            self.finish_bb();
        }

-        self.copy_bbs_to_vcode();
-
-        // Now that we've emitted all instructions into the VCodeBuilder, let's build the VCode.
-        let (vcode, stack_map_info) = self.vcode.build();
+        // Now that we've emitted all instructions into the
+        // VCodeBuilder, let's build the VCode.
+        let vcode = self.vcode.build();
        log::trace!("built vcode: {:?}", vcode);

-        Ok((vcode, stack_map_info))
+        Ok(vcode)
    }
 }

@@ -1278,19 +1165,8 @@ impl<'func, I: VCodeInst> LowerCtx for Lower<'func, I> {
    }

    fn emit(&mut self, mach_inst: I) {
-        self.ir_insts.push(InstTuple {
-            loc: SourceLoc::default(),
-            is_safepoint: false,
-            inst: mach_inst,
-        });
-    }
-
-    fn emit_safepoint(&mut self, mach_inst: I) {
-        self.ir_insts.push(InstTuple {
-            loc: SourceLoc::default(),
-            is_safepoint: true,
-            inst: mach_inst,
-        });
+        log::trace!("emit: {:?}", mach_inst);
+        self.ir_insts.push(mach_inst);
    }

    fn sink_inst(&mut self, ir_inst: Inst) {
@@ -1336,7 +1212,7 @@ impl<'func, I: VCodeInst> LowerCtx for Lower<'func, I> {
    }

    fn ensure_in_vreg(&mut self, reg: Reg, ty: Type) -> Reg {
-        if reg.is_virtual() {
+        if reg.to_virtual_reg().is_some() {
            reg
        } else {
            let new_reg = self.alloc_tmp(ty).only_reg().unwrap();
@@ -1344,6 +1220,11 @@ impl<'func, I: VCodeInst> LowerCtx for Lower<'func, I> {
            new_reg.to_reg()
        }
    }
+
+    fn set_vreg_alias(&mut self, from: Reg, to: Reg) {
+        log::trace!("set vreg alias: from {:?} to {:?}", from, to);
+        self.vcode.set_vreg_alias(from, to);
+    }
 }

 /// Visit all successors of a block with a given visitor closure.
--- a/cranelift/codegen/src/machinst/mod.rs
+++ b/cranelift/codegen/src/machinst/mod.rs
@@ -8,14 +8,10 @@
 //!
 //! The container for machine instructions, at various stages of construction,
 //! is the `VCode` struct. We refer to a sequence of machine instructions organized
-//! into basic blocks as "vcode". This is short for "virtual-register code", though
-//! it's a bit of a misnomer because near the end of the pipeline, vcode has all
-//! real registers. Nevertheless, the name is catchy and we like it.
+//! into basic blocks as "vcode". This is short for "virtual-register code".
 //!
 //! The compilation pipeline, from an `ir::Function` (already optimized as much as
 //! you like by machine-independent optimization passes) onward, is as follows.
-//! (N.B.: though we show the VCode separately at each stage, the passes
-//! mutate the VCode in place; these are not separate copies of the code.)
 //!
 //! ```plain
 //!
@@ -31,37 +27,25 @@
 //!         |                          with unknown offsets.
 //!         |                        - critical edges (actually all edges)
 //!         |                          are split.)
-//!         | [regalloc]
 //!         |
-//!     VCode<arch_backend::Inst>   (machine instructions:
-//!         |                        - all real registers.
-//!         |                        - new instruction sequence returned
-//!         |                          out-of-band in RegAllocResult.
-//!         |                        - instruction sequence has spills,
-//!         |                          reloads, and moves inserted.
-//!         |                        - other invariants same as above.)
+//!         | [regalloc --> `regalloc2::Output`; VCode is unchanged]
 //!         |
-//!         | [preamble/postamble]
+//!         | [binary emission via MachBuffer]
 //!         |
-//!     VCode<arch_backend::Inst>   (machine instructions:
-//!         |                        - stack-frame size known.
-//!         |                        - out-of-band instruction sequence
-//!         |                          has preamble prepended to entry
-//!         |                          block, and postamble injected before
-//!         |                          every return instruction.
-//!         |                        - all symbolic stack references to
-//!         |                          stackslots and spillslots are resolved
-//!         |                          to concrete FP-offset mem addresses.)
-//!         |
-//!         | [binary emission via MachBuffer
-//!         |  with streaming branch resolution/simplification]
-//!         |
-//!     Vec<u8>                     (machine code!)
+//!     Vec<u8>                     (machine code:
+//!         |                        - two-dest branches resolved via
+//!         |                          streaming branch resolution/simplification.
+//!         |                        - regalloc `Allocation` results used directly
+//!         |                          by instruction emission code.
+//!         |                        - prologue and epilogue(s) built and emitted
+//!         |                          directly during emission.
+//!         |                        - nominal-SP-relative offsets resolved
+//!         |                          by tracking EmitState.)
 //!
 //! ```

 use crate::binemit::{Addend, CodeInfo, CodeOffset, Reloc, StackMap};
-use crate::ir::{SourceLoc, StackSlot, Type, ValueLabel};
+use crate::ir::{SourceLoc, StackSlot, Type};
 use crate::result::CodegenResult;
 use crate::settings::Flags;
 use crate::value_label::ValueLabelsRanges;
@@ -69,10 +53,7 @@ use alloc::boxed::Box;
 use alloc::vec::Vec;
 use core::fmt::Debug;
 use cranelift_entity::PrimaryMap;
-use regalloc::RegUsageCollector;
-use regalloc::{
-    RealReg, RealRegUniverse, Reg, RegClass, RegUsageMapper, SpillSlot, VirtualReg, Writable,
-};
+use regalloc2::{Allocation, VReg};
 use smallvec::{smallvec, SmallVec};
 use std::string::String;

@@ -98,20 +79,15 @@ pub use helpers::*;
 pub mod inst_common;
 pub use inst_common::*;
 pub mod valueregs;
+pub use reg::*;
 pub use valueregs::*;
-pub mod debug;
-pub use regmapping::*;
-pub mod regmapping;
+pub mod reg;

 /// A machine instruction.
 pub trait MachInst: Clone + Debug {
    /// Return the registers referenced by this machine instruction along with
    /// the modes of reference (use, def, modify).
-    fn get_regs(&self, collector: &mut RegUsageCollector);
-
-    /// Map virtual registers to physical registers using the given virt->phys
-    /// maps corresponding to the program points prior to, and after, this instruction.
-    fn map_regs<RUM: RegUsageMapper>(&mut self, maps: &RUM);
+    fn get_operands<F: Fn(VReg) -> VReg>(&self, collector: &mut OperandCollector<'_, F>);

    /// If this is a simple move, return the (source, destination) tuple of registers.
    fn is_move(&self) -> Option<(Writable<Reg>, Reg)>;
@@ -128,11 +104,6 @@ pub trait MachInst: Clone + Debug {
        true
    }

-    /// If this is a load or store to the stack, return that info.
-    fn stack_op_info(&self) -> Option<MachInstStackOpInfo> {
-        None
-    }
-
    /// Generate a move.
    fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Self;

@@ -144,10 +115,9 @@ pub trait MachInst: Clone + Debug {
        alloc_tmp: F,
    ) -> SmallVec<[Self; 4]>;

-    /// Possibly operate on a value directly in a spill-slot rather than a
-    /// register. Useful if the machine has register-memory instruction forms
-    /// (e.g., add directly from or directly to memory), like x86.
-    fn maybe_direct_reload(&self, reg: VirtualReg, slot: SpillSlot) -> Option<Self>;
+    /// Generate a dummy instruction that will keep a value alive but
+    /// has no other purpose.
+    fn gen_dummy_use(reg: Reg) -> Self;

    /// Determine register class(es) to store the given Cranelift type, and the
    /// Cranelift type actually stored in the underlying register(s).  May return
@@ -163,6 +133,13 @@ pub trait MachInst: Clone + Debug {
    /// generating spills and reloads for individual registers.
    fn rc_for_type(ty: Type) -> CodegenResult<(&'static [RegClass], &'static [Type])>;

+    /// Get an appropriate type that can fully hold a value in a given
+    /// register class. This may not be the only type that maps to
+    /// that class, but when used with `gen_move()` or the ABI trait's
+    /// load/spill constructors, it should produce instruction(s) that
+    /// move the entire register contents.
+    fn canonical_type_for_rc(rc: RegClass) -> Type;
+
    /// Generate a jump to another target. Used during lowering of
    /// control flow.
    fn gen_jump(target: MachLabel) -> Self;
@@ -187,16 +164,8 @@ pub trait MachInst: Clone + Debug {
    /// be dependent on compilation flags.
    fn ref_type_regclass(_flags: &Flags) -> RegClass;

-    /// Does this instruction define a ValueLabel? Returns the `Reg` whose value
-    /// becomes the new value of the `ValueLabel` after this instruction.
-    fn defines_value_label(&self) -> Option<(ValueLabel, Reg)> {
-        None
-    }
-
-    /// Create a marker instruction that defines a value label.
-    fn gen_value_label_marker(_label: ValueLabel, _reg: Reg) -> Self {
-        Self::gen_nop(0)
-    }
+    /// Is this a safepoint?
+    fn is_safepoint(&self) -> bool;

    /// A label-use kind: a type that describes the types of label references that
    /// can occur in an instruction.
@@ -266,35 +235,6 @@ pub enum MachTerminator<'a> {
    Indirect(&'a [MachLabel]),
 }

-impl<'a> MachTerminator<'a> {
-    /// Get the successor labels named in a `MachTerminator`.
-    pub fn get_succs(&self) -> SmallVec<[MachLabel; 2]> {
-        let mut ret = smallvec![];
-        match self {
-            &MachTerminator::Uncond(l) => {
-                ret.push(l);
-            }
-            &MachTerminator::Cond(l1, l2) => {
-                ret.push(l1);
-                ret.push(l2);
-            }
-            &MachTerminator::Indirect(ls) => {
-                ret.extend(ls.iter().cloned());
-            }
-            _ => {}
-        }
-        ret
-    }
-
-    /// Is this a terminator?
-    pub fn is_term(&self) -> bool {
-        match self {
-            MachTerminator::None => false,
-            _ => true,
-        }
-    }
-}
-
 /// A trait describing the ability to encode a MachInst into binary machine code.
 pub trait MachInstEmit: MachInst {
    /// Persistent state carried across `emit` invocations.
@@ -302,9 +242,15 @@ pub trait MachInstEmit: MachInst {
    /// Constant information used in `emit` invocations.
    type Info;
    /// Emit the instruction.
-    fn emit(&self, code: &mut MachBuffer<Self>, info: &Self::Info, state: &mut Self::State);
+    fn emit(
+        &self,
+        allocs: &[Allocation],
+        code: &mut MachBuffer<Self>,
+        info: &Self::Info,
+        state: &mut Self::State,
+    );
    /// Pretty-print the instruction.
-    fn pretty_print(&self, mb_rru: Option<&RealRegUniverse>, state: &mut Self::State) -> String;
+    fn pretty_print_inst(&self, allocs: &[Allocation], state: &mut Self::State) -> String;
 }

 /// A trait describing the emission state carried between MachInsts when
@@ -409,15 +355,3 @@ pub enum UnwindInfoKind {
    #[cfg(feature = "unwind")]
    Windows,
 }
-
-/// Info about an operation that loads or stores from/to the stack.
-#[derive(Clone, Copy, Debug)]
-pub enum MachInstStackOpInfo {
-    /// Load from an offset from the nominal stack pointer into the given reg.
-    LoadNomSPOff(Reg, i64),
-    /// Store to an offset from the nominal stack pointer from the given reg.
-    StoreNomSPOff(Reg, i64),
-    /// Adjustment of nominal-SP up or down. This value is added to subsequent
-    /// offsets in loads/stores above to produce real-SP offsets.
-    NomSPAdj(i64),
-}
--- a/cranelift/codegen/src/machinst/reg.rs
+++ b/cranelift/codegen/src/machinst/reg.rs
@@ -0,0 +1,504 @@
+//! Definitions for registers, operands, etc. Provides a thin
+//! interface over the register allocator so that we can more easily
+//! swap it out or shim it when necessary.
+
+use crate::machinst::MachInst;
+use alloc::{string::String, vec::Vec};
+use core::{fmt::Debug, hash::Hash};
+use regalloc2::{Allocation, Operand, PReg, VReg};
+use smallvec::{smallvec, SmallVec};
+
+#[cfg(feature = "enable-serde")]
+use serde::{Deserialize, Serialize};
+
+/// The first 128 vregs (64 int, 64 float/vec) are "pinned" to
+/// physical registers: this means that they are always constrained to
+/// the corresponding register at all use/mod/def sites.
+///
+/// Arbitrary vregs can also be constrained to physical registers at
+/// particular use/def/mod sites, and this is preferable; but pinned
+/// vregs allow us to migrate code that has been written using
+/// RealRegs directly.
+const PINNED_VREGS: usize = 128;
+
+/// Convert a `VReg` to its pinned `PReg`, if any.
+pub fn pinned_vreg_to_preg(vreg: VReg) -> Option<PReg> {
+    if vreg.vreg() < PINNED_VREGS {
+        Some(PReg::from_index(vreg.vreg()))
+    } else {
+        None
+    }
+}
+
+/// Give the first available vreg for generated code (i.e., after all
+/// pinned vregs).
+pub fn first_user_vreg_index() -> usize {
+    // This is just the constant defined above, but we keep the
+    // constant private and expose only this helper function with the
+    // specific name in order to ensure other parts of the code don't
+    // open-code and depend on the index-space scheme.
+    PINNED_VREGS
+}
+
+/// A register named in an instruction. This register can be either a
+/// virtual register or a fixed physical register. It does not have
+/// any constraints applied to it: those can be added later in
+/// `MachInst::get_operands()` when the `Reg`s are converted to
+/// `Operand`s.
+#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
+pub struct Reg(VReg);
+
+impl Reg {
+    /// Get the physical register (`RealReg`), if this register is
+    /// one.
+    pub fn to_real_reg(self) -> Option<RealReg> {
+        if pinned_vreg_to_preg(self.0).is_some() {
+            Some(RealReg(self.0))
+        } else {
+            None
+        }
+    }
+
+    /// Get the virtual (non-physical) register, if this register is
+    /// one.
+    pub fn to_virtual_reg(self) -> Option<VirtualReg> {
+        if pinned_vreg_to_preg(self.0).is_none() {
+            Some(VirtualReg(self.0))
+        } else {
+            None
+        }
+    }
+
+    /// Get the class of this register.
+    pub fn class(self) -> RegClass {
+        self.0.class()
+    }
+
+    /// Is this a real (physical) reg?
+    pub fn is_real(self) -> bool {
+        self.to_real_reg().is_some()
+    }
+
+    /// Is this a virtual reg?
+    pub fn is_virtual(self) -> bool {
+        self.to_virtual_reg().is_some()
+    }
+}
+
+impl std::fmt::Debug for Reg {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        if let Some(rreg) = self.to_real_reg() {
+            let preg: PReg = rreg.into();
+            write!(f, "{}", preg)
+        } else if let Some(vreg) = self.to_virtual_reg() {
+            let vreg: VReg = vreg.into();
+            write!(f, "{}", vreg)
+        } else {
+            unreachable!()
+        }
+    }
+}
+
+/// A real (physical) register. This corresponds to one of the target
+/// ISA's named registers and can be used as an instruction operand.
+#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
+pub struct RealReg(VReg);
+
+impl RealReg {
+    /// Get the class of this register.
+    pub fn class(self) -> RegClass {
+        self.0.class()
+    }
+
+    pub fn hw_enc(self) -> u8 {
+        PReg::from(self).hw_enc() as u8
+    }
+}
+
+impl std::fmt::Debug for RealReg {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        Reg::from(*self).fmt(f)
+    }
+}
+
+/// A virtual register. This can be allocated into a real (physical)
+/// register of the appropriate register class, but which one is not
+/// specified. Virtual registers are used when generating `MachInst`s,
+/// before register allocation occurs, in order to allow us to name as
+/// many register-carried values as necessary.
+#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
+pub struct VirtualReg(VReg);
+
+impl VirtualReg {
+    /// Get the class of this register.
+    pub fn class(self) -> RegClass {
+        self.0.class()
+    }
+
+    pub fn index(self) -> usize {
+        self.0.vreg()
+    }
+}
+
+impl std::fmt::Debug for VirtualReg {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        Reg::from(*self).fmt(f)
+    }
+}
+
+/// A type wrapper that indicates a register type is writable. The
+/// underlying register can be extracted, and the type wrapper can be
+/// built using an arbitrary register. Hence, this type-level wrapper
+/// is not strictly a guarantee. However, "casting" to a writable
+/// register is an explicit operation for which we can
+/// audit. Ordinarily, internal APIs in the compiler backend should
+/// take a `Writable<Reg>` whenever the register is written, and the
+/// usual, frictionless way to get one of these is to allocate a new
+/// temporary.
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
+pub struct Writable<T: Clone + Copy + Debug + PartialEq + Eq + PartialOrd + Ord + Hash> {
+    reg: T,
+}
+
+impl<T: Clone + Copy + Debug + PartialEq + Eq + PartialOrd + Ord + Hash> Writable<T> {
+    /// Explicitly construct a `Writable<T>` from a `T`. As noted in
+    /// the documentation for `Writable`, this is not hidden or
+    /// disallowed from the outside; anyone can perform the "cast";
+    /// but it is explicit so that we can audit the use sites.
+    pub fn from_reg(reg: T) -> Writable<T> {
+        Writable { reg }
+    }
+
+    /// Get the underlying register, which can be read.
+    pub fn to_reg(self) -> T {
+        self.reg
+    }
+
+    /// Map the underlying register to another value or type.
+    pub fn map<U, F>(self, f: F) -> Writable<U>
+    where
+        U: Clone + Copy + Debug + PartialEq + Eq + PartialOrd + Ord + Hash,
+        F: Fn(T) -> U,
+    {
+        Writable { reg: f(self.reg) }
+    }
+}
+
+// Conversions between regalloc2 types (VReg) and our types
+// (VirtualReg, RealReg, Reg).
+
+impl std::convert::From<regalloc2::VReg> for Reg {
+    fn from(vreg: regalloc2::VReg) -> Reg {
+        Reg(vreg)
+    }
+}
+
+impl std::convert::From<regalloc2::VReg> for VirtualReg {
+    fn from(vreg: regalloc2::VReg) -> VirtualReg {
+        debug_assert!(pinned_vreg_to_preg(vreg).is_none());
+        VirtualReg(vreg)
+    }
+}
+
+impl std::convert::From<regalloc2::VReg> for RealReg {
+    fn from(vreg: regalloc2::VReg) -> RealReg {
+        debug_assert!(pinned_vreg_to_preg(vreg).is_some());
+        RealReg(vreg)
+    }
+}
+
+impl std::convert::From<Reg> for regalloc2::VReg {
+    /// Extract the underlying `regalloc2::VReg`. Note that physical
+    /// registers also map to particular (special) VRegs, so this
+    /// method can be used either on virtual or physical `Reg`s.
+    fn from(reg: Reg) -> regalloc2::VReg {
+        reg.0
+    }
+}
+
+impl std::convert::From<VirtualReg> for regalloc2::VReg {
+    fn from(reg: VirtualReg) -> regalloc2::VReg {
+        reg.0
+    }
+}
+
+impl std::convert::From<RealReg> for regalloc2::VReg {
+    fn from(reg: RealReg) -> regalloc2::VReg {
+        reg.0
+    }
+}
+
+impl std::convert::From<RealReg> for regalloc2::PReg {
+    fn from(reg: RealReg) -> regalloc2::PReg {
+        PReg::from_index(reg.0.vreg())
+    }
+}
+
+impl std::convert::From<regalloc2::PReg> for RealReg {
+    fn from(preg: regalloc2::PReg) -> RealReg {
+        RealReg(VReg::new(preg.index(), preg.class()))
+    }
+}
+
+impl std::convert::From<regalloc2::PReg> for Reg {
+    fn from(preg: regalloc2::PReg) -> Reg {
+        Reg(VReg::new(preg.index(), preg.class()))
+    }
+}
+
+impl std::convert::From<RealReg> for Reg {
+    fn from(reg: RealReg) -> Reg {
+        Reg(reg.0)
+    }
+}
+
+impl std::convert::From<VirtualReg> for Reg {
+    fn from(reg: VirtualReg) -> Reg {
+        Reg(reg.0)
+    }
+}
+
+/// A spill slot.
+pub type SpillSlot = regalloc2::SpillSlot;
+
+/// A register class. Each register in the ISA has one class, and the
+/// classes are disjoint. Most modern ISAs will have just two classes:
+/// the integer/general-purpose registers (GPRs), and the float/vector
+/// registers (typically used for both).
+///
+/// Note that unlike some other compiler backend/register allocator
+/// designs, we do not allow for overlapping classes, i.e. registers
+/// that belong to more than one class, because doing so makes the
+/// allocation problem significantly more complex. Instead, when a
+/// register can be addressed under different names for different
+/// sizes (for example), the backend author should pick classes that
+/// denote some fundamental allocation unit that encompasses the whole
+/// register. For example, always allocate 128-bit vector registers
+/// `v0`..`vN`, even though `f32` and `f64` values may use only the
+/// low 32/64 bits of those registers and name them differently.
+pub type RegClass = regalloc2::RegClass;
+
+/// An OperandCollector is a wrapper around a Vec of Operands
+/// (flattened array for a whole sequence of instructions) that
+/// gathers operands from a single instruction and provides the range
+/// in the flattened array.
+#[derive(Debug)]
+pub struct OperandCollector<'a, F: Fn(VReg) -> VReg> {
+    operands: &'a mut Vec<Operand>,
+    operands_start: usize,
+    clobbers: Vec<PReg>,
+    renamer: F,
+}
+
+impl<'a, F: Fn(VReg) -> VReg> OperandCollector<'a, F> {
+    /// Start gathering operands into one flattened operand array.
+    pub fn new(operands: &'a mut Vec<Operand>, renamer: F) -> Self {
+        let operands_start = operands.len();
+        Self {
+            operands,
+            operands_start,
+            clobbers: vec![],
+            renamer,
+        }
+    }
+
+    /// Add an operand.
+    fn add_operand(&mut self, operand: Operand) {
+        let vreg = (self.renamer)(operand.vreg());
+        let operand = Operand::new(vreg, operand.constraint(), operand.kind(), operand.pos());
+        self.operands.push(operand);
+    }
+
+    /// Add a clobber.
+    fn add_clobber(&mut self, clobber: PReg) {
+        self.clobbers.push(clobber);
+    }
+
+    /// Finish the operand collection and return the tuple giving the
+    /// range of indices in the flattened operand array, and the
+    /// clobber array.
+    pub fn finish(self) -> ((u32, u32), Vec<PReg>) {
+        let start = self.operands_start as u32;
+        let end = self.operands.len() as u32;
+        ((start, end), self.clobbers)
+    }
+
+    /// Add a register use, at the start of the instruction (`Before`
+    /// position).
+    pub fn reg_use(&mut self, reg: Reg) {
+        self.add_operand(Operand::reg_use(reg.into()));
+    }
+
+    /// Add multiple register uses.
+    pub fn reg_uses(&mut self, regs: &[Reg]) {
+        for &reg in regs {
+            self.reg_use(reg);
+        }
+    }
+
+    /// Add a register def, at the end of the instruction (`After`
+    /// position). Use only when this def will be written after all
+    /// uses are read.
+    pub fn reg_def(&mut self, reg: Writable<Reg>) {
+        self.add_operand(Operand::reg_def(reg.to_reg().into()));
+    }
+
+    /// Add multiple register defs.
+    pub fn reg_defs(&mut self, regs: &[Writable<Reg>]) {
+        for &reg in regs {
+            self.reg_def(reg);
+        }
+    }
+
+    /// Add a register "early def", which logically occurs at the
+    /// beginning of the instruction, alongside all uses. Use this
+    /// when the def may be written before all uses are read; the
+    /// regalloc will ensure that it does not overwrite any uses.
+    pub fn reg_early_def(&mut self, reg: Writable<Reg>) {
+        self.add_operand(Operand::reg_def_at_start(reg.to_reg().into()));
+    }
+
+    /// Add a register "fixed use", which ties a vreg to a particular
+    /// RealReg at this point.
+    pub fn reg_fixed_use(&mut self, reg: Reg, rreg: Reg) {
+        let rreg = rreg.to_real_reg().expect("fixed reg is not a RealReg");
+        self.add_operand(Operand::reg_fixed_use(reg.into(), rreg.into()));
+    }
+
+    /// Add a register "fixed def", which ties a vreg to a particular
+    /// RealReg at this point.
+    pub fn reg_fixed_def(&mut self, reg: Writable<Reg>, rreg: Reg) {
+        let rreg = rreg.to_real_reg().expect("fixed reg is not a RealReg");
+        self.add_operand(Operand::reg_fixed_def(reg.to_reg().into(), rreg.into()));
+    }
+
+    /// Add a register def that reuses an earlier use-operand's
+    /// allocation. The index of that earlier operand (relative to the
+    /// current instruction's start of operands) must be known.
+    pub fn reg_reuse_def(&mut self, reg: Writable<Reg>, idx: usize) {
+        if reg.to_reg().to_virtual_reg().is_some() {
+            self.add_operand(Operand::reg_reuse_def(reg.to_reg().into(), idx));
+        } else {
+            // Sometimes destination registers that reuse a source are
+            // given with RealReg args. In this case, we assume the
+            // creator of the instruction knows what they are doing
+            // and just emit a normal def to the pinned vreg.
+            self.add_operand(Operand::reg_def(reg.to_reg().into()));
+        }
+    }
+
+    /// Add a register use+def, or "modify", where the reg must stay
+    /// in the same register on the input and output side of the
+    /// instruction.
+    pub fn reg_mod(&mut self, reg: Writable<Reg>) {
+        self.add_operand(Operand::new(
+            reg.to_reg().into(),
+            regalloc2::OperandConstraint::Reg,
+            regalloc2::OperandKind::Mod,
+            regalloc2::OperandPos::Early,
+        ));
+    }
+
+    /// Add a register clobber. This is a register that is written by
+    /// the instruction, so must be reserved (not used) for the whole
+    /// instruction, but is not used afterward.
+    #[allow(dead_code)] // FIXME: use clobbers rather than defs for calls!
+    pub fn reg_clobber(&mut self, reg: Writable<RealReg>) {
+        self.add_clobber(PReg::from(reg.to_reg()));
+    }
+}
+
+/// Use an OperandCollector to count the number of operands on an instruction.
+pub fn count_operands<I: MachInst>(inst: &I) -> usize {
+    let mut ops = vec![];
+    let mut coll = OperandCollector::new(&mut ops, |vreg| vreg);
+    inst.get_operands(&mut coll);
+    let ((start, end), _) = coll.finish();
+    debug_assert_eq!(0, start);
+    end as usize
+}
+
+/// Pretty-print part of a disassembly, with knowledge of
+/// operand/instruction size, and optionally with regalloc
+/// results. This can be used, for example, to print either `rax` or
+/// `eax` for the register by those names on x86-64, depending on a
+/// 64- or 32-bit context.
+pub trait PrettyPrint {
+    fn pretty_print(&self, size_bytes: u8, allocs: &mut AllocationConsumer<'_>) -> String;
+
+    fn pretty_print_default(&self) -> String {
+        self.pretty_print(0, &mut AllocationConsumer::new(&[]))
+    }
+}
+
+/// A consumer of an (optional) list of Allocations along with Regs
+/// that provides RealRegs where available.
+///
+/// This is meant to be used during code emission or
+/// pretty-printing. In at least the latter case, regalloc results may
+/// or may not be available, so we may end up printing either vregs or
+/// rregs. Even pre-regalloc, though, some registers may be RealRegs
+/// that were provided when the instruction was created.
+///
+/// This struct should be used in a specific way: when matching on an
+/// instruction, provide it the Regs in the same order as they were
+/// provided to the OperandCollector.
+#[derive(Clone)]
+pub struct AllocationConsumer<'a> {
+    allocs: std::slice::Iter<'a, Allocation>,
+}
+
+impl<'a> AllocationConsumer<'a> {
+    pub fn new(allocs: &'a [Allocation]) -> Self {
+        Self {
+            allocs: allocs.iter(),
+        }
+    }
+
+    pub fn next(&mut self, pre_regalloc_reg: Reg) -> Reg {
+        let alloc = self.allocs.next();
+        let alloc = alloc.map(|alloc| {
+            Reg::from(
+                alloc
+                    .as_reg()
+                    .expect("Should not have gotten a stack allocation"),
+            )
+        });
+
+        match (pre_regalloc_reg.to_real_reg(), alloc) {
+            (Some(rreg), None) => rreg.into(),
+            (Some(rreg), Some(alloc)) => {
+                debug_assert_eq!(Reg::from(rreg), alloc);
+                alloc
+            }
+            (None, Some(alloc)) => alloc,
+            _ => pre_regalloc_reg,
+        }
+    }
+
+    pub fn next_writable(&mut self, pre_regalloc_reg: Writable<Reg>) -> Writable<Reg> {
+        Writable::from_reg(self.next(pre_regalloc_reg.to_reg()))
+    }
+
+    pub fn next_n(&mut self, count: usize) -> SmallVec<[Allocation; 4]> {
+        let mut allocs = smallvec![];
+        for _ in 0..count {
+            if let Some(next) = self.allocs.next() {
+                allocs.push(*next);
+            } else {
+                return allocs;
+            }
+        }
+        allocs
+    }
+}
+
+impl<'a> std::default::Default for AllocationConsumer<'a> {
+    fn default() -> Self {
+        Self { allocs: [].iter() }
+    }
+}
--- a/cranelift/codegen/src/machinst/regmapping.rs
+++ b/cranelift/codegen/src/machinst/regmapping.rs
@@ -1,108 +0,0 @@
-use crate::ir::Type;
-use regalloc::{Reg, RegUsageMapper, Writable};
-use smallvec::SmallVec;
-use std::cell::Cell;
-
-// Define our own register-mapping trait so we can do arbitrary register
-// renaming that are more free form than what `regalloc` constrains us to with
-// its `RegUsageMapper` trait definition.
-pub trait RegMapper {
-    fn get_use(&self, reg: Reg) -> Option<Reg>;
-    fn get_def(&self, reg: Reg) -> Option<Reg>;
-    fn get_mod(&self, reg: Reg) -> Option<Reg>;
-
-    fn map_use(&self, r: &mut Reg) {
-        if let Some(new) = self.get_use(*r) {
-            *r = new;
-        }
-    }
-
-    fn map_def(&self, r: &mut Writable<Reg>) {
-        if let Some(new) = self.get_def(r.to_reg()) {
-            *r = Writable::from_reg(new);
-        }
-    }
-
-    fn map_mod(&self, r: &mut Writable<Reg>) {
-        if let Some(new) = self.get_mod(r.to_reg()) {
-            *r = Writable::from_reg(new);
-        }
-    }
-}
-
-impl<T> RegMapper for T
-where
-    T: RegUsageMapper,
-{
-    fn get_use(&self, reg: Reg) -> Option<Reg> {
-        let v = reg.as_virtual_reg()?;
-        self.get_use(v).map(|r| r.to_reg())
-    }
-
-    fn get_def(&self, reg: Reg) -> Option<Reg> {
-        let v = reg.as_virtual_reg()?;
-        self.get_def(v).map(|r| r.to_reg())
-    }
-
-    fn get_mod(&self, reg: Reg) -> Option<Reg> {
-        let v = reg.as_virtual_reg()?;
-        self.get_mod(v).map(|r| r.to_reg())
-    }
-}
-
-#[derive(Debug, Default)]
-pub struct RegRenamer {
-    // Map of `(old, new, used, ty)` register names. Use a `SmallVec` because
-    // we typically only have one or two renamings.
-    //
-    // The `used` flag indicates whether the mapping has been used for
-    // `get_def`, later used afterwards during `unmapped_defs` to know what
-    // moves need to be generated.
-    renames: SmallVec<[(Reg, Reg, Cell<bool>, Type); 2]>,
-}
-
-impl RegRenamer {
-    /// Adds a new mapping which means that `old` reg should now be called
-    /// `new`. The type of `old` is `ty` as specified.
-    pub fn add_rename(&mut self, old: Reg, new: Reg, ty: Type) {
-        self.renames.push((old, new, Cell::new(false), ty));
-    }
-
-    fn get_rename(&self, reg: Reg, set_used_def: bool) -> Option<Reg> {
-        let (_, new, used_def, _) = self.renames.iter().find(|(old, _, _, _)| reg == *old)?;
-        used_def.set(used_def.get() || set_used_def);
-        Some(*new)
-    }
-
-    /// Returns the list of register mappings, with their type, which were not
-    /// actually mapped.
-    ///
-    /// This list is used because it means that the `old` name for the register
-    /// was never actually defined, so to correctly rename this register the
-    /// caller needs to move `old` into `new`.
-    ///
-    /// This yields tuples of `(old, new, ty)`.
-    pub fn unmapped_defs(&self) -> impl Iterator<Item = (Reg, Reg, Type)> + '_ {
-        self.renames.iter().filter_map(|(old, new, used_def, ty)| {
-            if used_def.get() {
-                None
-            } else {
-                Some((*old, *new, *ty))
-            }
-        })
-    }
-}
-
-impl RegMapper for RegRenamer {
-    fn get_use(&self, reg: Reg) -> Option<Reg> {
-        self.get_rename(reg, false)
-    }
-
-    fn get_def(&self, reg: Reg) -> Option<Reg> {
-        self.get_rename(reg, true)
-    }
-
-    fn get_mod(&self, reg: Reg) -> Option<Reg> {
-        self.get_rename(reg, false)
-    }
-}
--- a/cranelift/codegen/src/machinst/valueregs.rs
+++ b/cranelift/codegen/src/machinst/valueregs.rs
@@ -1,7 +1,9 @@
 //! Data structure for tracking the (possibly multiple) registers that hold one
 //! SSA `Value`.

-use regalloc::{RealReg, Reg, VirtualReg, Writable};
+use regalloc2::{PReg, VReg};
+
+use super::{RealReg, Reg, VirtualReg, Writable};
 use std::fmt::Debug;

 const VALUE_REGS_PARTS: usize = 2;
@@ -35,17 +37,17 @@ pub trait InvalidSentinel: Copy + Eq {
 }
 impl InvalidSentinel for Reg {
    fn invalid_sentinel() -> Self {
-        Reg::invalid()
+        Reg::from(VReg::invalid())
    }
 }
 impl InvalidSentinel for VirtualReg {
    fn invalid_sentinel() -> Self {
-        VirtualReg::invalid()
+        VirtualReg::from(VReg::invalid())
    }
 }
 impl InvalidSentinel for RealReg {
    fn invalid_sentinel() -> Self {
-        RealReg::invalid()
+        RealReg::from(PReg::invalid())
    }
 }
 impl InvalidSentinel for Writable<Reg> {
--- a/cranelift/codegen/src/machinst/vcode.rs
+++ b/cranelift/codegen/src/machinst/vcode.rs