diff --git a/Cargo.lock b/Cargo.lock
index 11bb1a1770..adfcbd2876 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -529,7 +529,7 @@ dependencies = [
  "hashbrown 0.9.1",
  "log",
  "miette",
- "regalloc",
+ "regalloc2",
  "serde",
  "smallvec",
  "souper-ir",
@@ -1219,6 +1219,15 @@ dependencies = [
  "winapi",
 ]
 
+[[package]]
+name = "fxhash"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c"
+dependencies = [
+ "byteorder",
+]
+
 [[package]]
 name = "generic-array"
 version = "0.14.5"
@@ -2393,14 +2402,15 @@ dependencies = [
 ]
 
 [[package]]
-name = "regalloc"
-version = "0.0.34"
+name = "regalloc2"
+version = "0.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "62446b1d3ebf980bdc68837700af1d77b37bc430e524bf95319c6eada2a4cc02"
+checksum = "3dd122b168f0046afcde717e002cdf76c9c87f829ae99dd12a02a0dcf7cc68f1"
 dependencies = [
+ "fxhash",
  "log",
- "rustc-hash",
  "serde",
+ "slice-group-by",
  "smallvec",
 ]
 
@@ -2653,6 +2663,12 @@ dependencies = [
  "rand_core 0.6.3",
 ]
 
+[[package]]
+name = "slice-group-by"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "03b634d87b960ab1a38c4fe143b508576f075e7c978bfad18217645ebfdfa2ec"
+
 [[package]]
 name = "smallvec"
 version = "1.8.0"
diff --git a/cranelift/codegen/Cargo.toml b/cranelift/codegen/Cargo.toml
index 602715f83e..51f39a3674 100644
--- a/cranelift/codegen/Cargo.toml
+++ b/cranelift/codegen/Cargo.toml
@@ -23,7 +23,7 @@ serde = { version = "1.0.94", features = ["derive"], optional = true }
 bincode = { version = "1.2.1", optional = true }
 gimli = { version = "0.26.0", default-features = false, features = ["write"], optional = true }
 smallvec = { version = "1.6.1" }
-regalloc = "0.0.34"
+regalloc2 = { version = "0.1.1", features = ["checker"] }
 souper-ir = { version = "2.1.0", optional = true }
 # It is a goal of the cranelift-codegen crate to have minimal external dependencies.
 # Please don't add any unless they are essential to the task of creating binary
@@ -77,14 +77,10 @@ all-arch = [
 # For dependent crates that want to serialize some parts of cranelift
 enable-serde = [
     "serde",
-    "regalloc/enable-serde",
     "cranelift-entity/enable-serde",
+    "regalloc2/enable-serde",
 ]
 
-# Allow snapshotting regalloc test cases. Useful only to report bad register
-# allocation failures, or for regalloc.rs developers.
-regalloc-snapshot = ["bincode", "regalloc/enable-serde"]
-
 # Enable support for the Souper harvester.
 souper-harvest = ["souper-ir", "souper-ir/stringify"]
 
diff --git a/cranelift/codegen/meta/src/shared/settings.rs b/cranelift/codegen/meta/src/shared/settings.rs
index b9fa89587a..58b7cd4499 100644
--- a/cranelift/codegen/meta/src/shared/settings.rs
+++ b/cranelift/codegen/meta/src/shared/settings.rs
@@ -3,39 +3,6 @@ use crate::cdsl::settings::{SettingGroup, SettingGroupBuilder};
 pub(crate) fn define() -> SettingGroup {
     let mut settings = SettingGroupBuilder::new("shared");
 
-    settings.add_enum(
-        "regalloc",
-        "Register allocator to use with the MachInst backend.",
-        r#"
-            This selects the register allocator as an option among those offered by the `regalloc.rs`
-            crate. Please report register allocation bugs to the maintainers of this crate whenever
-            possible.
-
-            Note: this only applies to target that use the MachInst backend. As of 2020-04-17, this
-            means the x86_64 backend doesn't use this yet.
-
-            Possible values:
-
-            - `backtracking` is a greedy, backtracking register allocator as implemented in
-            Spidermonkey's optimizing tier IonMonkey. It may take more time to allocate registers, but
-            it should generate better code in general, resulting in better throughput of generated
-            code.
-            - `backtracking_checked` is the backtracking allocator with additional self checks that may
-            take some time to run, and thus these checks are disabled by default.
-            - `experimental_linear_scan` is an experimental linear scan allocator. It may take less
-            time to allocate registers, but generated code's quality may be inferior. As of
-            2020-04-17, it is still experimental and it should not be used in production settings.
-            - `experimental_linear_scan_checked` is the linear scan allocator with additional self
-            checks that may take some time to run, and thus these checks are disabled by default.
-        "#,
-        vec![
-            "backtracking",
-            "backtracking_checked",
-            "experimental_linear_scan",
-            "experimental_linear_scan_checked",
-        ],
-    );
-
     settings.add_enum(
         "opt_level",
         "Optimization level for generated code.",
diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs
index 3fbfc83564..bc75ce85f3 100644
--- a/cranelift/codegen/src/isa/aarch64/abi.rs
+++ b/cranelift/codegen/src/isa/aarch64/abi.rs
@@ -14,7 +14,7 @@ use crate::settings;
 use crate::{CodegenError, CodegenResult};
 use alloc::boxed::Box;
 use alloc::vec::Vec;
-use regalloc::{RealReg, Reg, RegClass, Set, Writable};
+use regalloc2::VReg;
 use smallvec::{smallvec, SmallVec};
 
 // We use a generic implementation that factors out AArch64 and x64 ABI commonalities, because
@@ -80,7 +80,7 @@ fn try_fill_baldrdash_reg(call_conv: isa::CallConv, param: &ir::AbiParam) -> Opt
             &ir::ArgumentPurpose::VMContext => {
                 // This is SpiderMonkey's `WasmTlsReg`.
                 Some(ABIArg::reg(
-                    xreg(BALDRDASH_TLS_REG).to_real_reg(),
+                    xreg(BALDRDASH_TLS_REG).to_real_reg().unwrap(),
                     ir::types::I64,
                     param.extension,
                     param.purpose,
@@ -89,7 +89,7 @@ fn try_fill_baldrdash_reg(call_conv: isa::CallConv, param: &ir::AbiParam) -> Opt
             &ir::ArgumentPurpose::SignatureId => {
                 // This is SpiderMonkey's `WasmTableCallSigReg`.
                 Some(ABIArg::reg(
-                    xreg(BALDRDASH_SIG_REG).to_real_reg(),
+                    xreg(BALDRDASH_SIG_REG).to_real_reg().unwrap(),
                     ir::types::I64,
                     param.extension,
                     param.purpose,
@@ -268,7 +268,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
             let (rcs, reg_types) = Inst::rc_for_type(param.value_type)?;
 
             if let Some(param) = try_fill_baldrdash_reg(call_conv, param) {
-                assert!(rcs[0] == RegClass::I64);
+                assert!(rcs[0] == RegClass::Int);
                 ret.push(param);
                 continue;
             }
@@ -313,7 +313,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
                     "Unable to handle multi reg params with more than 2 regs"
                 );
                 assert!(
-                    rcs == &[RegClass::I64, RegClass::I64],
+                    rcs == &[RegClass::Int, RegClass::Int],
                     "Unable to handle non i64 regs"
                 );
 
@@ -335,12 +335,12 @@ impl ABIMachineSpec for AArch64MachineDeps {
                     ret.push(ABIArg::Slots {
                         slots: vec![
                             ABIArgSlot::Reg {
-                                reg: lower_reg.to_real_reg(),
+                                reg: lower_reg.to_real_reg().unwrap(),
                                 ty: param.value_type,
                                 extension: param.extension,
                             },
                             ABIArgSlot::Reg {
-                                reg: upper_reg.to_real_reg(),
+                                reg: upper_reg.to_real_reg().unwrap(),
                                 ty: param.value_type,
                                 extension: param.extension,
                             },
@@ -356,19 +356,17 @@ impl ABIMachineSpec for AArch64MachineDeps {
                 // Single Register parameters
                 let rc = rcs[0];
                 let next_reg = match rc {
-                    RegClass::I64 => &mut next_xreg,
-                    RegClass::V128 => &mut next_vreg,
-                    _ => panic!("Invalid register class: {:?}", rc),
+                    RegClass::Int => &mut next_xreg,
+                    RegClass::Float => &mut next_vreg,
                 };
 
                 if *next_reg < max_per_class_reg_vals && remaining_reg_vals > 0 {
                     let reg = match rc {
-                        RegClass::I64 => xreg(*next_reg),
-                        RegClass::V128 => vreg(*next_reg),
-                        _ => unreachable!(),
+                        RegClass::Int => xreg(*next_reg),
+                        RegClass::Float => vreg(*next_reg),
                     };
                     ret.push(ABIArg::reg(
-                        reg.to_real_reg(),
+                        reg.to_real_reg().unwrap(),
                         param.value_type,
                         param.extension,
                         param.purpose,
@@ -435,7 +433,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
             debug_assert!(args_or_rets == ArgsOrRets::Args);
             if next_xreg < max_per_class_reg_vals && remaining_reg_vals > 0 {
                 ret.push(ABIArg::reg(
-                    xreg(next_xreg).to_real_reg(),
+                    xreg(next_xreg).to_real_reg().unwrap(),
                     I64,
                     ir::ArgumentExtension::None,
                     ir::ArgumentPurpose::Normal,
@@ -505,8 +503,8 @@ impl ABIMachineSpec for AArch64MachineDeps {
         }
     }
 
-    fn gen_ret() -> Inst {
-        Inst::Ret
+    fn gen_ret(rets: Vec<Reg>) -> Inst {
+        Inst::Ret { rets }
     }
 
     fn gen_add_imm(into_reg: Writable<Reg>, from_reg: Reg, imm: u32) -> SmallInstVec<Inst> {
@@ -708,7 +706,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
         call_conv: isa::CallConv,
         setup_frame: bool,
         flags: &settings::Flags,
-        clobbered_callee_saves: &Vec<Writable<RealReg>>,
+        clobbered_callee_saves: &[Writable<RealReg>],
         fixed_frame_storage_size: u32,
         _outgoing_args_size: u32,
     ) -> (u64, SmallVec<[Inst; 16]>) {
@@ -716,10 +714,9 @@ impl ABIMachineSpec for AArch64MachineDeps {
         let mut clobbered_vec = vec![];
 
         for &reg in clobbered_callee_saves.iter() {
-            match reg.to_reg().get_class() {
-                RegClass::I64 => clobbered_int.push(reg),
-                RegClass::V128 => clobbered_vec.push(reg),
-                class => panic!("Unexpected RegClass: {:?}", class),
+            match reg.to_reg().class() {
+                RegClass::Int => clobbered_int.push(reg),
+                RegClass::Float => clobbered_vec.push(reg),
             }
         }
 
@@ -758,9 +755,9 @@ impl ABIMachineSpec for AArch64MachineDeps {
         let iter = clobbered_int.chunks_exact(2);
 
         if let [rd] = iter.remainder() {
-            let rd = rd.to_reg().to_reg();
+            let rd: Reg = rd.to_reg().into();
 
-            debug_assert_eq!(rd.get_class(), RegClass::I64);
+            debug_assert_eq!(rd.class(), RegClass::Int);
             // str rd, [sp, #-16]!
             insts.push(Inst::Store64 {
                 rd,
@@ -776,7 +773,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
                 insts.push(Inst::Unwind {
                     inst: UnwindInst::SaveReg {
                         clobber_offset,
-                        reg: rd.to_real_reg(),
+                        reg: rd.to_real_reg().unwrap(),
                     },
                 });
             }
@@ -785,12 +782,12 @@ impl ABIMachineSpec for AArch64MachineDeps {
         let mut iter = iter.rev();
 
         while let Some([rt, rt2]) = iter.next() {
-            // .to_reg().to_reg(): Writable<RealReg> --> RealReg --> Reg
-            let rt = rt.to_reg().to_reg();
-            let rt2 = rt2.to_reg().to_reg();
+            // .to_reg().into(): Writable<RealReg> --> RealReg --> Reg
+            let rt: Reg = rt.to_reg().into();
+            let rt2: Reg = rt2.to_reg().into();
 
-            debug_assert!(rt.get_class() == RegClass::I64);
-            debug_assert!(rt2.get_class() == RegClass::I64);
+            debug_assert!(rt.class() == RegClass::Int);
+            debug_assert!(rt2.class() == RegClass::Int);
 
             // stp rt, rt2, [sp, #-16]!
             insts.push(Inst::StoreP64 {
@@ -808,13 +805,13 @@ impl ABIMachineSpec for AArch64MachineDeps {
                 insts.push(Inst::Unwind {
                     inst: UnwindInst::SaveReg {
                         clobber_offset,
-                        reg: rt.to_real_reg(),
+                        reg: rt.to_real_reg().unwrap(),
                     },
                 });
                 insts.push(Inst::Unwind {
                     inst: UnwindInst::SaveReg {
                         clobber_offset: clobber_offset + (clobber_offset_change / 2) as u32,
-                        reg: rt2.to_real_reg(),
+                        reg: rt2.to_real_reg().unwrap(),
                     },
                 });
             }
@@ -844,9 +841,9 @@ impl ABIMachineSpec for AArch64MachineDeps {
         let iter = clobbered_vec.chunks_exact(2);
 
         if let [rd] = iter.remainder() {
-            let rd = rd.to_reg().to_reg();
+            let rd: Reg = rd.to_reg().into();
 
-            debug_assert_eq!(rd.get_class(), RegClass::V128);
+            debug_assert_eq!(rd.class(), RegClass::Float);
             insts.push(store_vec_reg(rd));
 
             if flags.unwind_info() {
@@ -854,7 +851,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
                 insts.push(Inst::Unwind {
                     inst: UnwindInst::SaveReg {
                         clobber_offset,
-                        reg: rd.to_real_reg(),
+                        reg: rd.to_real_reg().unwrap(),
                     },
                 });
             }
@@ -896,11 +893,11 @@ impl ABIMachineSpec for AArch64MachineDeps {
         let mut iter = iter.rev();
 
         while let Some([rt, rt2]) = iter.next() {
-            let rt = rt.to_reg().to_reg();
-            let rt2 = rt2.to_reg().to_reg();
+            let rt: Reg = rt.to_reg().into();
+            let rt2: Reg = rt2.to_reg().into();
 
-            debug_assert_eq!(rt.get_class(), RegClass::V128);
-            debug_assert_eq!(rt2.get_class(), RegClass::V128);
+            debug_assert_eq!(rt.class(), RegClass::Float);
+            debug_assert_eq!(rt2.class(), RegClass::Float);
 
             let (inst, clobber_offset_change) = store_vec_reg_pair(rt, rt2);
 
@@ -911,13 +908,13 @@ impl ABIMachineSpec for AArch64MachineDeps {
                 insts.push(Inst::Unwind {
                     inst: UnwindInst::SaveReg {
                         clobber_offset,
-                        reg: rt.to_real_reg(),
+                        reg: rt.to_real_reg().unwrap(),
                     },
                 });
                 insts.push(Inst::Unwind {
                     inst: UnwindInst::SaveReg {
                         clobber_offset: clobber_offset + clobber_offset_change / 2,
-                        reg: rt2.to_real_reg(),
+                        reg: rt2.to_real_reg().unwrap(),
                     },
                 });
             }
@@ -934,7 +931,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
     fn gen_clobber_restore(
         call_conv: isa::CallConv,
         flags: &settings::Flags,
-        clobbers: &Set<Writable<RealReg>>,
+        clobbers: &[Writable<RealReg>],
         fixed_frame_storage_size: u32,
         _outgoing_args_size: u32,
     ) -> SmallVec<[Inst; 16]> {
@@ -994,31 +991,31 @@ impl ABIMachineSpec for AArch64MachineDeps {
         let mut iter = clobbered_vec.chunks_exact(2);
 
         while let Some([rt, rt2]) = iter.next() {
-            let rt = rt.map(|r| r.to_reg());
-            let rt2 = rt2.map(|r| r.to_reg());
+            let rt: Writable<Reg> = rt.map(|r| r.into());
+            let rt2: Writable<Reg> = rt2.map(|r| r.into());
 
-            debug_assert_eq!(rt.to_reg().get_class(), RegClass::V128);
-            debug_assert_eq!(rt2.to_reg().get_class(), RegClass::V128);
+            debug_assert_eq!(rt.to_reg().class(), RegClass::Float);
+            debug_assert_eq!(rt2.to_reg().class(), RegClass::Float);
             insts.push(load_vec_reg_pair(rt, rt2));
         }
 
         debug_assert!(iter.remainder().len() <= 1);
 
         if let [rd] = iter.remainder() {
-            let rd = rd.map(|r| r.to_reg());
+            let rd: Writable<Reg> = rd.map(|r| r.into());
 
-            debug_assert_eq!(rd.to_reg().get_class(), RegClass::V128);
+            debug_assert_eq!(rd.to_reg().class(), RegClass::Float);
             insts.push(load_vec_reg(rd));
         }
 
         let mut iter = clobbered_int.chunks_exact(2);
 
         while let Some([rt, rt2]) = iter.next() {
-            let rt = rt.map(|r| r.to_reg());
-            let rt2 = rt2.map(|r| r.to_reg());
+            let rt: Writable<Reg> = rt.map(|r| r.into());
+            let rt2: Writable<Reg> = rt2.map(|r| r.into());
 
-            debug_assert_eq!(rt.to_reg().get_class(), RegClass::I64);
-            debug_assert_eq!(rt2.to_reg().get_class(), RegClass::I64);
+            debug_assert_eq!(rt.to_reg().class(), RegClass::Int);
+            debug_assert_eq!(rt2.to_reg().class(), RegClass::Int);
             // ldp rt, rt2, [sp], #16
             insts.push(Inst::LoadP64 {
                 rt,
@@ -1034,9 +1031,9 @@ impl ABIMachineSpec for AArch64MachineDeps {
         debug_assert!(iter.remainder().len() <= 1);
 
         if let [rd] = iter.remainder() {
-            let rd = rd.map(|r| r.to_reg());
+            let rd: Writable<Reg> = rd.map(|r| r.into());
 
-            debug_assert_eq!(rd.to_reg().get_class(), RegClass::I64);
+            debug_assert_eq!(rd.to_reg().class(), RegClass::Int);
             // ldr rd, [sp], #16
             insts.push(Inst::ULoad64 {
                 rd,
@@ -1069,58 +1066,46 @@ impl ABIMachineSpec for AArch64MachineDeps {
         tmp: Writable<Reg>,
         callee_conv: isa::CallConv,
         caller_conv: isa::CallConv,
-    ) -> SmallVec<[(InstIsSafepoint, Inst); 2]> {
+    ) -> SmallVec<[Inst; 2]> {
         let mut insts = SmallVec::new();
         match &dest {
-            &CallDest::ExtName(ref name, RelocDistance::Near) => insts.push((
-                InstIsSafepoint::Yes,
-                Inst::Call {
-                    info: Box::new(CallInfo {
-                        dest: name.clone(),
-                        uses,
-                        defs,
-                        opcode,
-                        caller_callconv: caller_conv,
-                        callee_callconv: callee_conv,
-                    }),
-                },
-            )),
+            &CallDest::ExtName(ref name, RelocDistance::Near) => insts.push(Inst::Call {
+                info: Box::new(CallInfo {
+                    dest: name.clone(),
+                    uses,
+                    defs,
+                    opcode,
+                    caller_callconv: caller_conv,
+                    callee_callconv: callee_conv,
+                }),
+            }),
             &CallDest::ExtName(ref name, RelocDistance::Far) => {
-                insts.push((
-                    InstIsSafepoint::No,
-                    Inst::LoadExtName {
-                        rd: tmp,
-                        name: Box::new(name.clone()),
-                        offset: 0,
-                    },
-                ));
-                insts.push((
-                    InstIsSafepoint::Yes,
-                    Inst::CallInd {
-                        info: Box::new(CallIndInfo {
-                            rn: tmp.to_reg(),
-                            uses,
-                            defs,
-                            opcode,
-                            caller_callconv: caller_conv,
-                            callee_callconv: callee_conv,
-                        }),
-                    },
-                ));
-            }
-            &CallDest::Reg(reg) => insts.push((
-                InstIsSafepoint::Yes,
-                Inst::CallInd {
+                insts.push(Inst::LoadExtName {
+                    rd: tmp,
+                    name: Box::new(name.clone()),
+                    offset: 0,
+                });
+                insts.push(Inst::CallInd {
                     info: Box::new(CallIndInfo {
-                        rn: *reg,
+                        rn: tmp.to_reg(),
                         uses,
                         defs,
                         opcode,
                         caller_callconv: caller_conv,
                         callee_callconv: callee_conv,
                     }),
-                },
-            )),
+                });
+            }
+            &CallDest::Reg(reg) => insts.push(Inst::CallInd {
+                info: Box::new(CallIndInfo {
+                    rn: *reg,
+                    uses,
+                    defs,
+                    opcode,
+                    caller_callconv: caller_conv,
+                    callee_callconv: callee_conv,
+                }),
+            }),
         }
 
         insts
@@ -1157,9 +1142,8 @@ impl ABIMachineSpec for AArch64MachineDeps {
     fn get_number_of_spillslots_for_value(rc: RegClass) -> u32 {
         // We allocate in terms of 8-byte slots.
         match rc {
-            RegClass::I64 => 1,
-            RegClass::V128 => 2,
-            _ => panic!("Unexpected register class!"),
+            RegClass::Int => 1,
+            RegClass::Float => 2,
         }
     }
 
@@ -1177,13 +1161,13 @@ impl ABIMachineSpec for AArch64MachineDeps {
         let mut caller_saved = Vec::new();
         for i in 0..29 {
             let x = writable_xreg(i);
-            if is_reg_clobbered_by_call(call_conv_of_callee, x.to_reg().to_real_reg()) {
+            if is_reg_clobbered_by_call(call_conv_of_callee, x.to_reg().to_real_reg().unwrap()) {
                 caller_saved.push(x);
             }
         }
         for i in 0..32 {
             let v = writable_vreg(i);
-            if is_reg_clobbered_by_call(call_conv_of_callee, v.to_reg().to_real_reg()) {
+            if is_reg_clobbered_by_call(call_conv_of_callee, v.to_reg().to_real_reg().unwrap()) {
                 caller_saved.push(v);
             }
         }
@@ -1205,7 +1189,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
 
     fn get_clobbered_callee_saves(
         call_conv: isa::CallConv,
-        regs: &Set<Writable<RealReg>>,
+        regs: &[Writable<RealReg>],
     ) -> Vec<Writable<RealReg>> {
         let mut regs: Vec<Writable<RealReg>> = regs
             .iter()
@@ -1215,7 +1199,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
 
         // Sort registers for deterministic code output. We can do an unstable
         // sort because the registers will be unique (there are no dups).
-        regs.sort_unstable_by_key(|r| r.to_reg().get_index());
+        regs.sort_unstable_by_key(|r| VReg::from(r.to_reg()).vreg());
         regs
     }
 
@@ -1247,29 +1231,27 @@ fn legal_type_for_machine(ty: Type) -> bool {
 /// callee-save?
 fn is_reg_saved_in_prologue(call_conv: isa::CallConv, r: RealReg) -> bool {
     if call_conv.extends_baldrdash() {
-        match r.get_class() {
-            RegClass::I64 => {
-                let enc = r.get_hw_encoding();
-                return BALDRDASH_JIT_CALLEE_SAVED_GPR[enc];
+        match r.class() {
+            RegClass::Int => {
+                let enc = r.hw_enc() & 31;
+                return BALDRDASH_JIT_CALLEE_SAVED_GPR[enc as usize];
             }
-            RegClass::V128 => {
-                let enc = r.get_hw_encoding();
-                return BALDRDASH_JIT_CALLEE_SAVED_FPU[enc];
+            RegClass::Float => {
+                let enc = r.hw_enc() & 31;
+                return BALDRDASH_JIT_CALLEE_SAVED_FPU[enc as usize];
             }
-            _ => unimplemented!("baldrdash callee saved on non-i64 reg classes"),
         };
     }
 
-    match r.get_class() {
-        RegClass::I64 => {
+    match r.class() {
+        RegClass::Int => {
             // x19 - x28 inclusive are callee-saves.
-            r.get_hw_encoding() >= 19 && r.get_hw_encoding() <= 28
+            r.hw_enc() >= 19 && r.hw_enc() <= 28
         }
-        RegClass::V128 => {
+        RegClass::Float => {
             // v8 - v15 inclusive are callee-saves.
-            r.get_hw_encoding() >= 8 && r.get_hw_encoding() <= 15
+            r.hw_enc() >= 8 && r.hw_enc() <= 15
         }
-        _ => panic!("Unexpected RegClass"),
     }
 }
 
@@ -1278,53 +1260,51 @@ fn is_reg_saved_in_prologue(call_conv: isa::CallConv, r: RealReg) -> bool {
 /// written by the function's body.
 fn get_regs_restored_in_epilogue(
     call_conv: isa::CallConv,
-    regs: &Set<Writable<RealReg>>,
+    regs: &[Writable<RealReg>],
 ) -> (Vec<Writable<RealReg>>, Vec<Writable<RealReg>>) {
     let mut int_saves = vec![];
     let mut vec_saves = vec![];
-    for &reg in regs.iter() {
+    for &reg in regs {
         if is_reg_saved_in_prologue(call_conv, reg.to_reg()) {
-            match reg.to_reg().get_class() {
-                RegClass::I64 => int_saves.push(reg),
-                RegClass::V128 => vec_saves.push(reg),
-                _ => panic!("Unexpected RegClass"),
+            match reg.to_reg().class() {
+                RegClass::Int => int_saves.push(reg),
+                RegClass::Float => vec_saves.push(reg),
             }
         }
     }
     // Sort registers for deterministic code output. We can do an unstable sort because the
     // registers will be unique (there are no dups).
-    int_saves.sort_unstable_by_key(|r| r.to_reg().get_index());
-    vec_saves.sort_unstable_by_key(|r| r.to_reg().get_index());
+    int_saves.sort_unstable_by_key(|r| VReg::from(r.to_reg()).vreg());
+    vec_saves.sort_unstable_by_key(|r| VReg::from(r.to_reg()).vreg());
     (int_saves, vec_saves)
 }
 
 fn is_reg_clobbered_by_call(call_conv_of_callee: isa::CallConv, r: RealReg) -> bool {
     if call_conv_of_callee.extends_baldrdash() {
-        match r.get_class() {
-            RegClass::I64 => {
-                let enc = r.get_hw_encoding();
-                if !BALDRDASH_JIT_CALLEE_SAVED_GPR[enc] {
+        match r.class() {
+            RegClass::Int => {
+                let enc = r.hw_enc() & 31;
+                if !BALDRDASH_JIT_CALLEE_SAVED_GPR[enc as usize] {
                     return true;
                 }
                 // Otherwise, fall through to preserve native's ABI caller-saved.
             }
-            RegClass::V128 => {
-                let enc = r.get_hw_encoding();
-                if !BALDRDASH_JIT_CALLEE_SAVED_FPU[enc] {
+            RegClass::Float => {
+                let enc = r.hw_enc() & 31;
+                if !BALDRDASH_JIT_CALLEE_SAVED_FPU[enc as usize] {
                     return true;
                 }
                 // Otherwise, fall through to preserve native's ABI caller-saved.
             }
-            _ => unimplemented!("baldrdash callee saved on non-i64 reg classes"),
         };
     }
 
-    match r.get_class() {
-        RegClass::I64 => {
+    match r.class() {
+        RegClass::Int => {
             // x0 - x17 inclusive are caller-saves.
-            r.get_hw_encoding() <= 17
+            r.hw_enc() <= 17
         }
-        RegClass::V128 => {
+        RegClass::Float => {
             // v0 - v7 inclusive and v16 - v31 inclusive are caller-saves. The
             // upper 64 bits of v8 - v15 inclusive are also caller-saves.
             // However, because we cannot currently represent partial registers
@@ -1341,6 +1321,5 @@ fn is_reg_clobbered_by_call(call_conv_of_callee: isa::CallConv, r: RealReg) -> b
             // include them as defs here.
             true
         }
-        _ => panic!("Unexpected RegClass"),
     }
 }
diff --git a/cranelift/codegen/src/isa/aarch64/inst.isle b/cranelift/codegen/src/isa/aarch64/inst.isle
index 734c268bb0..7392d02f77 100644
--- a/cranelift/codegen/src/isa/aarch64/inst.isle
+++ b/cranelift/codegen/src/isa/aarch64/inst.isle
@@ -1,795 +1,794 @@
 ;; Instruction formats.
 (type MInst
-  (enum
-    ;; A no-op of zero size.
-    (Nop0)
+      (enum
+       ;; A no-op of zero size.
+       (Nop0)
 
-    ;; A no-op that is one instruction large.
-    (Nop4)
+       ;; A no-op that is one instruction large.
+       (Nop4)
 
-    ;; An ALU operation with two register sources and a register destination.
-    (AluRRR
-      (alu_op ALUOp)
-      (size OperandSize)
-      (rd WritableReg)
-      (rn Reg)
-      (rm Reg))
+       ;; An ALU operation with two register sources and a register destination.
+       (AluRRR
+        (alu_op ALUOp)
+        (size OperandSize)
+        (rd WritableReg)
+        (rn Reg)
+        (rm Reg))
 
-    ;; An ALU operation with three register sources and a register destination.
-    (AluRRRR
-      (alu_op ALUOp3)
-      (rd WritableReg)
-      (rn Reg)
-      (rm Reg)
-      (ra Reg))
+       ;; An ALU operation with three register sources and a register destination.
+       (AluRRRR
+        (alu_op ALUOp3)
+        (rd WritableReg)
+        (rn Reg)
+        (rm Reg)
+        (ra Reg))
 
-    ;; An ALU operation with a register source and an immediate-12 source, and a register
-    ;; destination.
-    (AluRRImm12
-      (alu_op ALUOp)
-      (size OperandSize)
-      (rd WritableReg)
-      (rn Reg)
-      (imm12 Imm12))
+       ;; An ALU operation with a register source and an immediate-12 source, and a register
+       ;; destination.
+       (AluRRImm12
+        (alu_op ALUOp)
+        (size OperandSize)
+        (rd WritableReg)
+        (rn Reg)
+        (imm12 Imm12))
 
-    ;; An ALU operation with a register source and an immediate-logic source, and a register destination.
-    (AluRRImmLogic
-      (alu_op ALUOp)
-      (size OperandSize)
-      (rd WritableReg)
-      (rn Reg)
-      (imml ImmLogic))
+       ;; An ALU operation with a register source and an immediate-logic source, and a register destination.
+       (AluRRImmLogic
+        (alu_op ALUOp)
+        (size OperandSize)
+        (rd WritableReg)
+        (rn Reg)
+        (imml ImmLogic))
 
-    ;; An ALU operation with a register source and an immediate-shiftamt source, and a register destination.
-    (AluRRImmShift
-      (alu_op ALUOp)
-      (size OperandSize)
-      (rd WritableReg)
-      (rn Reg)
-      (immshift ImmShift))
+       ;; An ALU operation with a register source and an immediate-shiftamt source, and a register destination.
+       (AluRRImmShift
+        (alu_op ALUOp)
+        (size OperandSize)
+        (rd WritableReg)
+        (rn Reg)
+        (immshift ImmShift))
 
-    ;; An ALU operation with two register sources, one of which can be shifted, and a register
-    ;; destination.
-    (AluRRRShift
-      (alu_op ALUOp)
-      (size OperandSize)
-      (rd WritableReg)
-      (rn Reg)
-      (rm Reg)
-      (shiftop ShiftOpAndAmt))
+       ;; An ALU operation with two register sources, one of which can be shifted, and a register
+       ;; destination.
+       (AluRRRShift
+        (alu_op ALUOp)
+        (size OperandSize)
+        (rd WritableReg)
+        (rn Reg)
+        (rm Reg)
+        (shiftop ShiftOpAndAmt))
 
-    ;; An ALU operation with two register sources, one of which can be {zero,sign}-extended and
-    ;; shifted, and a register destination.
-    (AluRRRExtend
-      (alu_op ALUOp)
-      (size OperandSize)
-      (rd WritableReg)
-      (rn Reg)
-      (rm Reg)
-      (extendop ExtendOp))
+       ;; An ALU operation with two register sources, one of which can be {zero,sign}-extended and
+       ;; shifted, and a register destination.
+       (AluRRRExtend
+        (alu_op ALUOp)
+        (size OperandSize)
+        (rd WritableReg)
+        (rn Reg)
+        (rm Reg)
+        (extendop ExtendOp))
 
-    ;; A bit op instruction with a single register source.
-    (BitRR
-      (op BitOp)
-      (size OperandSize)
-      (rd WritableReg)
-      (rn Reg))
+       ;; A bit op instruction with a single register source.
+       (BitRR
+        (op BitOp)
+        (size OperandSize)
+        (rd WritableReg)
+        (rn Reg))
 
-    ;; An unsigned (zero-extending) 8-bit load.
-    (ULoad8
-      (rd WritableReg)
-      (mem AMode)
-      (flags MemFlags))
+       ;; An unsigned (zero-extending) 8-bit load.
+       (ULoad8
+        (rd WritableReg)
+        (mem AMode)
+        (flags MemFlags))
 
-    ;; A signed (sign-extending) 8-bit load.
-    (SLoad8
-      (rd WritableReg)
-      (mem AMode)
-      (flags MemFlags))
+       ;; A signed (sign-extending) 8-bit load.
+       (SLoad8
+        (rd WritableReg)
+        (mem AMode)
+        (flags MemFlags))
 
-    ;; An unsigned (zero-extending) 16-bit load.
-    (ULoad16
-      (rd WritableReg)
-      (mem AMode)
-      (flags MemFlags))
+       ;; An unsigned (zero-extending) 16-bit load.
+       (ULoad16
+        (rd WritableReg)
+        (mem AMode)
+        (flags MemFlags))
 
-    ;; A signed (sign-extending) 16-bit load.
-    (SLoad16
-      (rd WritableReg)
-      (mem AMode)
-      (flags MemFlags))
+       ;; A signed (sign-extending) 16-bit load.
+       (SLoad16
+        (rd WritableReg)
+        (mem AMode)
+        (flags MemFlags))
 
-    ;; An unsigned (zero-extending) 32-bit load.
-    (ULoad32
-      (rd WritableReg)
-      (mem AMode)
-      (flags MemFlags))
+       ;; An unsigned (zero-extending) 32-bit load.
+       (ULoad32
+        (rd WritableReg)
+        (mem AMode)
+        (flags MemFlags))
 
-    ;; A signed (sign-extending) 32-bit load.
-    (SLoad32
-      (rd WritableReg)
-      (mem AMode)
-      (flags MemFlags))
+       ;; A signed (sign-extending) 32-bit load.
+       (SLoad32
+        (rd WritableReg)
+        (mem AMode)
+        (flags MemFlags))
 
-    ;; A 64-bit load.
-    (ULoad64
-      (rd WritableReg)
-      (mem AMode)
-      (flags MemFlags))
+       ;; A 64-bit load.
+       (ULoad64
+        (rd WritableReg)
+        (mem AMode)
+        (flags MemFlags))
 
-    ;; An 8-bit store.
-    (Store8
-      (rd Reg)
-      (mem AMode)
-      (flags MemFlags))
+       ;; An 8-bit store.
+       (Store8
+        (rd Reg)
+        (mem AMode)
+        (flags MemFlags))
 
-    ;; A 16-bit store.
-    (Store16
-      (rd Reg)
-      (mem AMode)
-      (flags MemFlags))
+       ;; A 16-bit store.
+       (Store16
+        (rd Reg)
+        (mem AMode)
+        (flags MemFlags))
 
-    ;; A 32-bit store.
-    (Store32
-      (rd Reg)
-      (mem AMode)
-      (flags MemFlags))
+       ;; A 32-bit store.
+       (Store32
+        (rd Reg)
+        (mem AMode)
+        (flags MemFlags))
 
-    ;; A 64-bit store.
-    (Store64
-      (rd Reg)
-      (mem AMode)
-      (flags MemFlags))
+       ;; A 64-bit store.
+       (Store64
+        (rd Reg)
+        (mem AMode)
+        (flags MemFlags))
 
-    ;; A store of a pair of registers.
-    (StoreP64
-      (rt Reg)
-      (rt2 Reg)
-      (mem PairAMode)
-      (flags MemFlags))
+       ;; A store of a pair of registers.
+       (StoreP64
+        (rt Reg)
+        (rt2 Reg)
+        (mem PairAMode)
+        (flags MemFlags))
 
-    ;; A load of a pair of registers.
-    (LoadP64
-      (rt WritableReg)
-      (rt2 WritableReg)
-      (mem PairAMode)
-      (flags MemFlags))
+       ;; A load of a pair of registers.
+       (LoadP64
+        (rt WritableReg)
+        (rt2 WritableReg)
+        (mem PairAMode)
+        (flags MemFlags))
 
-    ;; A MOV instruction. These are encoded as ORR's (AluRRR form) but we
-    ;; keep them separate at the `Inst` level for better pretty-printing
-    ;; and faster `is_move()` logic.
-    (Mov64
-      (rd WritableReg)
-      (rm Reg))
+       ;; A MOV instruction. These are encoded as ORR's (AluRRR form) but we
+       ;; keep them separate at the `Inst` level for better pretty-printing
+       ;; and faster `is_move()` logic.
+       (Mov64
+        (rd WritableReg)
+        (rm Reg))
 
-    ;; A 32-bit MOV. Zeroes the top 32 bits of the destination. This is
-    ;; effectively an alias for an unsigned 32-to-64-bit extension.
-    (Mov32
-      (rd WritableReg)
-      (rm Reg))
+       ;; A 32-bit MOV. Zeroes the top 32 bits of the destination. This is
+       ;; effectively an alias for an unsigned 32-to-64-bit extension.
+       (Mov32
+        (rd WritableReg)
+        (rm Reg))
 
-    ;; A MOVZ with a 16-bit immediate.
-    (MovZ
-      (rd WritableReg)
-      (imm MoveWideConst)
-      (size OperandSize))
+       ;; A MOVZ with a 16-bit immediate.
+       (MovZ
+        (rd WritableReg)
+        (imm MoveWideConst)
+        (size OperandSize))
 
-    ;; A MOVN with a 16-bit immediate.
-    (MovN
-      (rd WritableReg)
-      (imm MoveWideConst)
-      (size OperandSize))
+       ;; A MOVN with a 16-bit immediate.
+       (MovN
+        (rd WritableReg)
+        (imm MoveWideConst)
+        (size OperandSize))
 
-    ;; A MOVK with a 16-bit immediate.
-    (MovK
-      (rd WritableReg)
-      (imm MoveWideConst)
-      (size OperandSize))
+       ;; A MOVK with a 16-bit immediate.
+       (MovK
+        (rd WritableReg)
+        (imm MoveWideConst)
+        (size OperandSize))
 
-    ;; A sign- or zero-extend operation.
-    (Extend
-      (rd WritableReg)
-      (rn Reg)
-      (signed bool)
-      (from_bits u8)
-      (to_bits u8))
+       ;; A sign- or zero-extend operation.
+       (Extend
+        (rd WritableReg)
+        (rn Reg)
+        (signed bool)
+        (from_bits u8)
+        (to_bits u8))
 
-    ;; A conditional-select operation.
-    (CSel
-      (rd WritableReg)
-      (cond Cond)
-      (rn Reg)
-      (rm Reg))
+       ;; A conditional-select operation.
+       (CSel
+        (rd WritableReg)
+        (cond Cond)
+        (rn Reg)
+        (rm Reg))
 
-    ;; A conditional-set operation.
-    (CSet
-      (rd WritableReg)
-      (cond Cond))
+       ;; A conditional-set operation.
+       (CSet
+        (rd WritableReg)
+        (cond Cond))
 
-    ;; A conditional-set-mask operation.
-    (CSetm
-      (rd WritableReg)
-      (cond Cond))
+       ;; A conditional-set-mask operation.
+       (CSetm
+        (rd WritableReg)
+        (cond Cond))
 
-    ;; A conditional comparison with an immediate.
-    (CCmpImm
-      (size OperandSize)
-      (rn Reg)
-      (imm UImm5)
-      (nzcv NZCV)
-      (cond Cond))
+       ;; A conditional comparison with an immediate.
+       (CCmpImm
+        (size OperandSize)
+        (rn Reg)
+        (imm UImm5)
+        (nzcv NZCV)
+        (cond Cond))
 
-    ;; A synthetic insn, which is a load-linked store-conditional loop, that has the overall
-    ;; effect of atomically modifying a memory location in a particular way.  Because we have
-    ;; no way to explain to the regalloc about earlyclobber registers, this instruction has
-    ;; completely fixed operand registers, and we rely on the RA's coalescing to remove copies
-    ;; in the surrounding code to the extent it can.  The sequence is both preceded and
-    ;; followed by a fence which is at least as comprehensive as that of the `Fence`
-    ;; instruction below.  This instruction is sequentially consistent.  The operand
-    ;; conventions are:
-    ;;
-    ;; x25   (rd) address
-    ;; x26   (rd) second operand for `op`
-    ;; x27   (wr) old value
-    ;; x24   (wr) scratch reg; value afterwards has no meaning
-    ;; x28   (wr) scratch reg; value afterwards has no meaning
-    (AtomicRMWLoop
-      (ty Type) ;; I8, I16, I32 or I64
-      (op AtomicRmwOp))
-
-    ;; An atomic read-modify-write operation. These instructions require the
-    ;; Large System Extension (LSE) ISA support (FEAT_LSE). The instructions have
-    ;; acquire-release semantics.
-    (AtomicRMW
-      (op AtomicRMWOp)
-      (rs Reg)
-      (rt WritableReg)
-      (rn Reg)
-      (ty Type))
-
-    ;; An atomic compare-and-swap operation. This instruction is sequentially consistent.
-    (AtomicCAS
-      (rs WritableReg)
-      (rt Reg)
-      (rn Reg)
-      (ty Type))
-
-    ;; Similar to AtomicRMWLoop, a compare-and-swap operation implemented using a load-linked
-    ;; store-conditional loop.
-    ;; This instruction is sequentially consistent.
-    ;; Note that the operand conventions, although very similar to AtomicRMWLoop, are different:
-    ;;
-    ;; x25   (rd) address
-    ;; x26   (rd) expected value
-    ;; x28   (rd) replacement value
-    ;; x27   (wr) old value
-    ;; x24   (wr) scratch reg; value afterwards has no meaning
-    (AtomicCASLoop
+       ;; A synthetic insn, which is a load-linked store-conditional loop, that has the overall
+       ;; effect of atomically modifying a memory location in a particular way.  Because we have
+       ;; no way to explain to the regalloc about earlyclobber registers, this instruction has
+       ;; completely fixed operand registers, and we rely on the RA's coalescing to remove copies
+       ;; in the surrounding code to the extent it can.  The sequence is both preceded and
+       ;; followed by a fence which is at least as comprehensive as that of the `Fence`
+       ;; instruction below.  This instruction is sequentially consistent.  The operand
+       ;; conventions are:
+       ;;
+       ;; x25   (rd) address
+       ;; x26   (rd) second operand for `op`
+       ;; x27   (wr) old value
+       ;; x24   (wr) scratch reg; value afterwards has no meaning
+       ;; x28   (wr) scratch reg; value afterwards has no meaning
+       (AtomicRMWLoop
         (ty Type) ;; I8, I16, I32 or I64
-    )
-
-    ;; Read `access_ty` bits from address `rt`, either 8, 16, 32 or 64-bits, and put
-    ;; it in `rn`, optionally zero-extending to fill a word or double word result.
-    ;; This instruction is sequentially consistent.
-    (LoadAcquire
-      (access_ty Type) ;; I8, I16, I32 or I64
-      (rt WritableReg)
-      (rn Reg))
-
-    ;; Write the lowest `ty` bits of `rt` to address `rn`.
-    ;; This instruction is sequentially consistent.
-    (StoreRelease
-      (access_ty Type) ;; I8, I16, I32 or I64
-      (rt Reg)
-      (rn Reg))
-
-    ;; A memory fence.  This must provide ordering to ensure that, at a minimum, neither loads
-    ;; nor stores may move forwards or backwards across the fence.  Currently emitted as "dmb
-    ;; ish".  This instruction is sequentially consistent.
-    (Fence)
-
-    ;; FPU move. Note that this is distinct from a vector-register
-    ;; move; moving just 64 bits seems to be significantly faster.
-    (FpuMove64
-      (rd WritableReg)
-      (rn Reg))
-
-    ;; Vector register move.
-    (FpuMove128
-      (rd WritableReg)
-      (rn Reg))
-
-    ;; Move to scalar from a vector element.
-    (FpuMoveFromVec
-      (rd WritableReg)
-      (rn Reg)
-      (idx u8)
-      (size VectorSize))
-
-    ;; Zero-extend a SIMD & FP scalar to the full width of a vector register.
-    ;; 16-bit scalars require half-precision floating-point support (FEAT_FP16).
-    (FpuExtend
-      (rd WritableReg)
-      (rn Reg)
-      (size ScalarSize))
-
-    ;; 1-op FPU instruction.
-    (FpuRR
-      (fpu_op FPUOp1)
-      (rd WritableReg)
-      (rn Reg))
-
-    ;; 2-op FPU instruction.
-    (FpuRRR
-      (fpu_op FPUOp2)
-      (rd WritableReg)
-      (rn Reg)
-      (rm Reg))
-
-    (FpuRRI
-      (fpu_op FPUOpRI)
-      (rd WritableReg)
-      (rn Reg))
-
-    ;; 3-op FPU instruction.
-    (FpuRRRR
-      (fpu_op FPUOp3)
-      (rd WritableReg)
-      (rn Reg)
-      (rm Reg)
-      (ra Reg))
-
-    ;; FPU comparison, single-precision (32 bit).
-    (FpuCmp32
-      (rn Reg)
-      (rm Reg))
-
-    ;; FPU comparison, double-precision (64 bit).
-    (FpuCmp64
-      (rn Reg)
-      (rm Reg))
-
-    ;; Floating-point load, single-precision (32 bit).
-    (FpuLoad32
-      (rd WritableReg)
-      (mem AMode)
-      (flags MemFlags))
-
-    ;; Floating-point store, single-precision (32 bit).
-    (FpuStore32
-      (rd Reg)
-      (mem AMode)
-      (flags MemFlags))
-
-    ;; Floating-point load, double-precision (64 bit).
-    (FpuLoad64
-      (rd WritableReg)
-      (mem AMode)
-      (flags MemFlags))
-
-    ;; Floating-point store, double-precision (64 bit).
-    (FpuStore64
-      (rd Reg)
-      (mem AMode)
-      (flags MemFlags))
-
-    ;; Floating-point/vector load, 128 bit.
-    (FpuLoad128
-      (rd WritableReg)
-      (mem AMode)
-      (flags MemFlags))
-
-    ;; Floating-point/vector store, 128 bit.
-    (FpuStore128
-      (rd Reg)
-      (mem AMode)
-      (flags MemFlags))
-
-    ;; A load of a pair of floating-point registers, double precision (64-bit).
-    (FpuLoadP64
-      (rt WritableReg)
-      (rt2 WritableReg)
-      (mem PairAMode)
-      (flags MemFlags))
-
-    ;; A store of a pair of floating-point registers, double precision (64-bit).
-    (FpuStoreP64
-      (rt Reg)
-      (rt2 Reg)
-      (mem PairAMode)
-      (flags MemFlags))
-
-    ;; A load of a pair of floating-point registers, 128-bit.
-    (FpuLoadP128
-      (rt WritableReg)
-      (rt2 WritableReg)
-      (mem PairAMode)
-      (flags MemFlags))
-
-    ;; A store of a pair of floating-point registers, 128-bit.
-    (FpuStoreP128
-      (rt Reg)
-      (rt2 Reg)
-      (mem PairAMode)
-      (flags MemFlags))
-
-    (LoadFpuConst64
-      (rd WritableReg)
-      (const_data u64))
-
-    (LoadFpuConst128
-      (rd WritableReg)
-      (const_data u128))
-
-    ;; Conversion: FP -> integer.
-    (FpuToInt
-      (op FpuToIntOp)
-      (rd WritableReg)
-      (rn Reg))
-
-    ;; Conversion: integer -> FP.
-    (IntToFpu
-      (op IntToFpuOp)
-      (rd WritableReg)
-      (rn Reg))
-
-    ;; FP conditional select, 32 bit.
-    (FpuCSel32
-      (rd WritableReg)
-      (rn Reg)
-      (rm Reg)
-      (cond Cond))
-
-    ;; FP conditional select, 64 bit.
-    (FpuCSel64
-      (rd WritableReg)
-      (rn Reg)
-      (rm Reg)
-      (cond Cond))
-
-    ;; Round to integer.
-    (FpuRound
-      (op FpuRoundMode)
-      (rd WritableReg)
-      (rn Reg))
-
-    ;; Move from a GPR to a vector register.  The scalar value is parked in the lowest lane
-    ;; of the destination, and all other lanes are zeroed out.  Currently only 32- and 64-bit
-    ;; transactions are supported.
-    (MovToFpu
-      (rd WritableReg)
-      (rn Reg)
-      (size ScalarSize))
-
-    ;; Loads a floating-point immediate.
-    (FpuMoveFPImm
-      (rd WritableReg)
-      (imm ASIMDFPModImm)
-      (size ScalarSize))
-
-    ;; Move to a vector element from a GPR.
-    (MovToVec
-      (rd WritableReg)
-      (rn Reg)
-      (idx u8)
-      (size VectorSize))
-
-    ;; Unsigned move from a vector element to a GPR.
-    (MovFromVec
-      (rd WritableReg)
-      (rn Reg)
-      (idx u8)
-      (size VectorSize))
-
-    ;; Signed move from a vector element to a GPR.
-    (MovFromVecSigned
-      (rd WritableReg)
-      (rn Reg)
-      (idx u8)
-      (size VectorSize)
-      (scalar_size OperandSize))
-
-    ;; Duplicate general-purpose register to vector.
-    (VecDup
-      (rd WritableReg)
-      (rn Reg)
-      (size VectorSize))
-
-    ;; Duplicate scalar to vector.
-    (VecDupFromFpu
-      (rd WritableReg)
-      (rn Reg)
-      (size VectorSize))
-
-    ;; Duplicate FP immediate to vector.
-    (VecDupFPImm
-      (rd WritableReg)
-      (imm ASIMDFPModImm)
-      (size VectorSize))
-
-    ;; Duplicate immediate to vector.
-    (VecDupImm
-      (rd WritableReg)
-      (imm ASIMDMovModImm)
-      (invert bool)
-      (size VectorSize))
-
-    ;; Vector extend.
-    (VecExtend
-      (t VecExtendOp)
-      (rd WritableReg)
-      (rn Reg)
-      (high_half bool))
-
-    ;; Move vector element to another vector element.
-    (VecMovElement
-      (rd WritableReg)
-      (rn Reg)
-      (dest_idx u8)
-      (src_idx u8)
-      (size VectorSize))
-
-    ;; Vector widening operation.
-    (VecRRLong
-      (op VecRRLongOp)
-      (rd WritableReg)
-      (rn Reg)
-      (high_half bool))
-
-    ;; Vector narrowing operation.
-    (VecRRNarrow
-      (op VecRRNarrowOp)
-      (rd WritableReg)
-      (rn Reg)
-      (high_half bool))
-
-    ;; 1-operand vector instruction that operates on a pair of elements.
-    (VecRRPair
-      (op VecPairOp)
-      (rd WritableReg)
-      (rn Reg))
-
-    ;; 2-operand vector instruction that produces a result with twice the
-    ;; lane width and half the number of lanes.
-    (VecRRRLong
-      (alu_op VecRRRLongOp)
-      (rd WritableReg)
-      (rn Reg)
-      (rm Reg)
-      (high_half bool))
-
-    ;; 1-operand vector instruction that extends elements of the input
-    ;; register and operates on a pair of elements. The output lane width
-    ;; is double that of the input.
-    (VecRRPairLong
-      (op VecRRPairLongOp)
-      (rd WritableReg)
-      (rn Reg))
-
-    ;; A vector ALU op.
-    (VecRRR
-      (alu_op VecALUOp)
-      (rd WritableReg)
-      (rn Reg)
-      (rm Reg)
-      (size VectorSize))
-
-    ;; Vector two register miscellaneous instruction.
-    (VecMisc
-      (op VecMisc2)
-      (rd WritableReg)
-      (rn Reg)
-      (size VectorSize))
-
-    ;; Vector instruction across lanes.
-    (VecLanes
-      (op VecLanesOp)
-      (rd WritableReg)
-      (rn Reg)
-      (size VectorSize))
-
-    ;; Vector shift by immediate Shift Left (immediate), Unsigned Shift Right (immediate)
-    ;; Signed Shift Right (immediate).  These are somewhat unusual in that, for right shifts,
-    ;; the allowed range of `imm` values is 1 to lane-size-in-bits, inclusive.  A zero
-    ;; right-shift cannot be encoded.  Left shifts are "normal", though, having valid `imm`
-    ;; values from 0 to lane-size-in-bits - 1 inclusive.
-    (VecShiftImm
-      (op VecShiftImmOp)
-      (rd WritableReg)
-      (rn Reg)
-      (size VectorSize)
-      (imm u8))
-
-    ;; Vector extract - create a new vector, being the concatenation of the lowest `imm4` bytes
-    ;; of `rm` followed by the uppermost `16 - imm4` bytes of `rn`.
-    (VecExtract
-      (rd WritableReg)
-      (rn Reg)
-      (rm Reg)
-      (imm4 u8))
-
-    ;; Table vector lookup - single register table. The table consists of 8-bit elements and is
-    ;; stored in `rn`, while `rm` contains 8-bit element indices. `is_extension` specifies whether
-    ;; to emit a TBX or a TBL instruction, i.e. whether to leave the elements in the destination
-    ;; vector that correspond to out-of-range indices (greater than 15) unmodified or to set them
-    ;; to 0.
-    (VecTbl
-      (rd WritableReg)
-      (rn Reg)
-      (rm Reg)
-      (is_extension bool))
-
-    ;; Table vector lookup - two register table. The table consists of 8-bit elements and is
-    ;; stored in `rn` and `rn2`, while `rm` contains 8-bit element indices. `is_extension`
-    ;; specifies whether to emit a TBX or a TBL instruction, i.e. whether to leave the elements in
-    ;; the destination vector that correspond to out-of-range indices (greater than 31) unmodified
-    ;; or to set them to 0. The table registers `rn` and `rn2` must have consecutive numbers
-    ;; modulo 32, that is v31 and v0 (in that order) are consecutive registers.
-    (VecTbl2
-      (rd WritableReg)
-      (rn Reg)
-      (rn2 Reg)
-      (rm Reg)
-      (is_extension bool))
-
-    ;; Load an element and replicate to all lanes of a vector.
-    (VecLoadReplicate
-      (rd WritableReg)
-      (rn Reg)
-      (size VectorSize))
-
-    ;; Vector conditional select, 128 bit.  A synthetic instruction, which generates a 4-insn
-    ;; control-flow diamond.
-    (VecCSel
-      (rd WritableReg)
-      (rn Reg)
-      (rm Reg)
-      (cond Cond))
-
-    ;; Move to the NZCV flags (actually a `MSR NZCV, Xn` insn).
-    (MovToNZCV
-      (rn Reg))
-
-    ;; Move from the NZCV flags (actually a `MRS Xn, NZCV` insn).
-    (MovFromNZCV
-      (rd WritableReg))
-
-    ;; A machine call instruction. N.B.: this allows only a +/- 128MB offset (it uses a relocation
-    ;; of type `Reloc::Arm64Call`); if the destination distance is not `RelocDistance::Near`, the
-    ;; code should use a `LoadExtName` / `CallInd` sequence instead, allowing an arbitrary 64-bit
-    ;; target.
-    (Call
-      (info BoxCallInfo))
-
-    ;; A machine indirect-call instruction.
-    (CallInd
-      (info BoxCallIndInfo))
-
-    ;; ---- branches (exactly one must appear at end of BB) ----
-
-    ;; A machine return instruction.
-    (Ret)
-
-    ;; A placeholder instruction, generating no code, meaning that a function epilogue must be
-    ;; inserted there.
-    (EpiloguePlaceholder)
-
-    ;; An unconditional branch.
-    (Jump
-      (dest BranchTarget))
-
-    ;; A conditional branch. Contains two targets; at emission time, both are emitted, but
-    ;; the MachBuffer knows to truncate the trailing branch if fallthrough. We optimize the
-    ;; choice of taken/not_taken (inverting the branch polarity as needed) based on the
-    ;; fallthrough at the time of lowering.
-    (CondBr
-      (taken BranchTarget)
-      (not_taken BranchTarget)
-      (kind CondBrKind))
-
-    ;; A conditional trap: execute a `udf` if the condition is true. This is
-    ;; one VCode instruction because it uses embedded control flow; it is
-    ;; logically a single-in, single-out region, but needs to appear as one
-    ;; unit to the register allocator.
-    ;;
-    ;; The `CondBrKind` gives the conditional-branch condition that will
-    ;; *execute* the embedded `Inst`. (In the emitted code, we use the inverse
-    ;; of this condition in a branch that skips the trap instruction.)
-    (TrapIf
-      (kind CondBrKind)
-      (trap_code TrapCode))
-
-    ;; An indirect branch through a register, augmented with set of all
-    ;; possible successors.
-    (IndirectBr
-      (rn Reg)
-      (targets VecMachLabel))
-
-    ;; A "break" instruction, used for e.g. traps and debug breakpoints.
-    (Brk)
-
-    ;; An instruction guaranteed to always be undefined and to trigger an illegal instruction at
-    ;; runtime.
-    (Udf
-      (trap_code TrapCode))
-
-    ;; Compute the address (using a PC-relative offset) of a memory location, using the `ADR`
-    ;; instruction. Note that we take a simple offset, not a `MemLabel`, here, because `Adr` is
-    ;; only used for now in fixed lowering sequences with hardcoded offsets. In the future we may
-    ;; need full `MemLabel` support.
-    (Adr
-      (rd WritableReg)
-      ;; Offset in range -2^20 .. 2^20.
-      (off i32))
-
-    ;; Raw 32-bit word, used for inline constants and jump-table entries.
-    (Word4
-      (data u32))
-
-    ;; Raw 64-bit word, used for inline constants.
-    (Word8
-      (data u64))
-
-    ;; Jump-table sequence, as one compound instruction (see note in lower_inst.rs for rationale).
-    (JTSequence
-      (info BoxJTSequenceInfo)
-      (ridx Reg)
-      (rtmp1 WritableReg)
-      (rtmp2 WritableReg))
-
-    ;; Load an inline symbol reference.
-    (LoadExtName
-      (rd WritableReg)
-      (name BoxExternalName)
-      (offset i64))
-
-    ;; Load address referenced by `mem` into `rd`.
-    (LoadAddr
-      (rd WritableReg)
-      (mem AMode))
-
-    ;; Marker, no-op in generated code: SP "virtual offset" is adjusted. This
-    ;; controls how AMode::NominalSPOffset args are lowered.
-    (VirtualSPOffsetAdj
-      (offset i64))
-
-    ;; Meta-insn, no-op in generated code: emit constant/branch veneer island
-    ;; at this point (with a guard jump around it) if less than the needed
-    ;; space is available before the next branch deadline. See the `MachBuffer`
-    ;; implementation in `machinst/buffer.rs` for the overall algorithm. In
-    ;; brief, we retain a set of "pending/unresolved label references" from
-    ;; branches as we scan forward through instructions to emit machine code;
-    ;; if we notice we're about to go out of range on an unresolved reference,
-    ;; we stop, emit a bunch of "veneers" (branches in a form that has a longer
-    ;; range, e.g. a 26-bit-offset unconditional jump), and point the original
-    ;; label references to those. This is an "island" because it comes in the
-    ;; middle of the code.
-    ;;
-    ;; This meta-instruction is a necessary part of the logic that determines
-    ;; where to place islands. Ordinarily, we want to place them between basic
-    ;; blocks, so we compute the worst-case size of each block, and emit the
-    ;; island before starting a block if we would exceed a deadline before the
-    ;; end of the block. However, some sequences (such as an inline jumptable)
-    ;; are variable-length and not accounted for by this logic; so these
-    ;; lowered sequences include an `EmitIsland` to trigger island generation
-    ;; where necessary.
-    (EmitIsland
-      ;; The needed space before the next deadline.
-      (needed_space CodeOffset))
-
-    ;; A call to the `ElfTlsGetAddr` libcall. Returns address of TLS symbol in x0.
-    (ElfTlsGetAddr
-      (symbol ExternalName))
-
-    ;; A definition of a value label.
-    (ValueLabelMarker
-      (reg Reg)
-      (label ValueLabel))
-
-    ;; An unwind pseudo-instruction.
-    (Unwind
-      (inst UnwindInst))
-))
+        (op AtomicRmwOp))
+
+       ;; An atomic read-modify-write operation. These instructions require the
+       ;; Large System Extension (LSE) ISA support (FEAT_LSE). The instructions have
+       ;; acquire-release semantics.
+       (AtomicRMW
+        (op AtomicRMWOp)
+        (rs Reg)
+        (rt WritableReg)
+        (rn Reg)
+        (ty Type))
+
+       ;; An atomic compare-and-swap operation. This instruction is sequentially consistent.
+       (AtomicCAS
+        (rs WritableReg)
+        (rt Reg)
+        (rn Reg)
+        (ty Type))
+
+       ;; Similar to AtomicRMWLoop, a compare-and-swap operation implemented using a load-linked
+       ;; store-conditional loop.
+       ;; This instruction is sequentially consistent.
+       ;; Note that the operand conventions, although very similar to AtomicRMWLoop, are different:
+       ;;
+       ;; x25   (rd) address
+       ;; x26   (rd) expected value
+       ;; x28   (rd) replacement value
+       ;; x27   (wr) old value
+       ;; x24   (wr) scratch reg; value afterwards has no meaning
+       (AtomicCASLoop
+        (ty Type) ;; I8, I16, I32 or I64
+        )
+
+       ;; Read `access_ty` bits from address `rt`, either 8, 16, 32 or 64-bits, and put
+       ;; it in `rn`, optionally zero-extending to fill a word or double word result.
+       ;; This instruction is sequentially consistent.
+       (LoadAcquire
+        (access_ty Type) ;; I8, I16, I32 or I64
+        (rt WritableReg)
+        (rn Reg))
+
+       ;; Write the lowest `ty` bits of `rt` to address `rn`.
+       ;; This instruction is sequentially consistent.
+       (StoreRelease
+        (access_ty Type) ;; I8, I16, I32 or I64
+        (rt Reg)
+        (rn Reg))
+
+       ;; A memory fence.  This must provide ordering to ensure that, at a minimum, neither loads
+       ;; nor stores may move forwards or backwards across the fence.  Currently emitted as "dmb
+       ;; ish".  This instruction is sequentially consistent.
+       (Fence)
+
+       ;; FPU move. Note that this is distinct from a vector-register
+       ;; move; moving just 64 bits seems to be significantly faster.
+       (FpuMove64
+        (rd WritableReg)
+        (rn Reg))
+
+       ;; Vector register move.
+       (FpuMove128
+        (rd WritableReg)
+        (rn Reg))
+
+       ;; Move to scalar from a vector element.
+       (FpuMoveFromVec
+        (rd WritableReg)
+        (rn Reg)
+        (idx u8)
+        (size VectorSize))
+
+       ;; Zero-extend a SIMD & FP scalar to the full width of a vector register.
+       ;; 16-bit scalars require half-precision floating-point support (FEAT_FP16).
+       (FpuExtend
+        (rd WritableReg)
+        (rn Reg)
+        (size ScalarSize))
+
+       ;; 1-op FPU instruction.
+       (FpuRR
+        (fpu_op FPUOp1)
+        (rd WritableReg)
+        (rn Reg))
+
+       ;; 2-op FPU instruction.
+       (FpuRRR
+        (fpu_op FPUOp2)
+        (rd WritableReg)
+        (rn Reg)
+        (rm Reg))
+
+       (FpuRRI
+        (fpu_op FPUOpRI)
+        (rd WritableReg)
+        (rn Reg))
+
+       ;; 3-op FPU instruction.
+       (FpuRRRR
+        (fpu_op FPUOp3)
+        (rd WritableReg)
+        (rn Reg)
+        (rm Reg)
+        (ra Reg))
+
+       ;; FPU comparison, single-precision (32 bit).
+       (FpuCmp32
+        (rn Reg)
+        (rm Reg))
+
+       ;; FPU comparison, double-precision (64 bit).
+       (FpuCmp64
+        (rn Reg)
+        (rm Reg))
+
+       ;; Floating-point load, single-precision (32 bit).
+       (FpuLoad32
+        (rd WritableReg)
+        (mem AMode)
+        (flags MemFlags))
+
+       ;; Floating-point store, single-precision (32 bit).
+       (FpuStore32
+        (rd Reg)
+        (mem AMode)
+        (flags MemFlags))
+
+       ;; Floating-point load, double-precision (64 bit).
+       (FpuLoad64
+        (rd WritableReg)
+        (mem AMode)
+        (flags MemFlags))
+
+       ;; Floating-point store, double-precision (64 bit).
+       (FpuStore64
+        (rd Reg)
+        (mem AMode)
+        (flags MemFlags))
+
+       ;; Floating-point/vector load, 128 bit.
+       (FpuLoad128
+        (rd WritableReg)
+        (mem AMode)
+        (flags MemFlags))
+
+       ;; Floating-point/vector store, 128 bit.
+       (FpuStore128
+        (rd Reg)
+        (mem AMode)
+        (flags MemFlags))
+
+       ;; A load of a pair of floating-point registers, double precision (64-bit).
+       (FpuLoadP64
+        (rt WritableReg)
+        (rt2 WritableReg)
+        (mem PairAMode)
+        (flags MemFlags))
+
+       ;; A store of a pair of floating-point registers, double precision (64-bit).
+       (FpuStoreP64
+        (rt Reg)
+        (rt2 Reg)
+        (mem PairAMode)
+        (flags MemFlags))
+
+       ;; A load of a pair of floating-point registers, 128-bit.
+       (FpuLoadP128
+        (rt WritableReg)
+        (rt2 WritableReg)
+        (mem PairAMode)
+        (flags MemFlags))
+
+       ;; A store of a pair of floating-point registers, 128-bit.
+       (FpuStoreP128
+        (rt Reg)
+        (rt2 Reg)
+        (mem PairAMode)
+        (flags MemFlags))
+
+       (LoadFpuConst64
+        (rd WritableReg)
+        (const_data u64))
+
+       (LoadFpuConst128
+        (rd WritableReg)
+        (const_data u128))
+
+       ;; Conversion: FP -> integer.
+       (FpuToInt
+        (op FpuToIntOp)
+        (rd WritableReg)
+        (rn Reg))
+
+       ;; Conversion: integer -> FP.
+       (IntToFpu
+        (op IntToFpuOp)
+        (rd WritableReg)
+        (rn Reg))
+
+       ;; FP conditional select, 32 bit.
+       (FpuCSel32
+        (rd WritableReg)
+        (rn Reg)
+        (rm Reg)
+        (cond Cond))
+
+       ;; FP conditional select, 64 bit.
+       (FpuCSel64
+        (rd WritableReg)
+        (rn Reg)
+        (rm Reg)
+        (cond Cond))
+
+       ;; Round to integer.
+       (FpuRound
+        (op FpuRoundMode)
+        (rd WritableReg)
+        (rn Reg))
+
+       ;; Move from a GPR to a vector register.  The scalar value is parked in the lowest lane
+       ;; of the destination, and all other lanes are zeroed out.  Currently only 32- and 64-bit
+       ;; transactions are supported.
+       (MovToFpu
+        (rd WritableReg)
+        (rn Reg)
+        (size ScalarSize))
+
+       ;; Loads a floating-point immediate.
+       (FpuMoveFPImm
+        (rd WritableReg)
+        (imm ASIMDFPModImm)
+        (size ScalarSize))
+
+       ;; Move to a vector element from a GPR.
+       (MovToVec
+        (rd WritableReg)
+        (rn Reg)
+        (idx u8)
+        (size VectorSize))
+
+       ;; Unsigned move from a vector element to a GPR.
+       (MovFromVec
+        (rd WritableReg)
+        (rn Reg)
+        (idx u8)
+        (size VectorSize))
+
+       ;; Signed move from a vector element to a GPR.
+       (MovFromVecSigned
+        (rd WritableReg)
+        (rn Reg)
+        (idx u8)
+        (size VectorSize)
+        (scalar_size OperandSize))
+
+       ;; Duplicate general-purpose register to vector.
+       (VecDup
+        (rd WritableReg)
+        (rn Reg)
+        (size VectorSize))
+
+       ;; Duplicate scalar to vector.
+       (VecDupFromFpu
+        (rd WritableReg)
+        (rn Reg)
+        (size VectorSize))
+
+       ;; Duplicate FP immediate to vector.
+       (VecDupFPImm
+        (rd WritableReg)
+        (imm ASIMDFPModImm)
+        (size VectorSize))
+
+       ;; Duplicate immediate to vector.
+       (VecDupImm
+        (rd WritableReg)
+        (imm ASIMDMovModImm)
+        (invert bool)
+        (size VectorSize))
+
+       ;; Vector extend.
+       (VecExtend
+        (t VecExtendOp)
+        (rd WritableReg)
+        (rn Reg)
+        (high_half bool))
+
+       ;; Move vector element to another vector element.
+       (VecMovElement
+        (rd WritableReg)
+        (rn Reg)
+        (dest_idx u8)
+        (src_idx u8)
+        (size VectorSize))
+
+       ;; Vector widening operation.
+       (VecRRLong
+        (op VecRRLongOp)
+        (rd WritableReg)
+        (rn Reg)
+        (high_half bool))
+
+       ;; Vector narrowing operation.
+       (VecRRNarrow
+        (op VecRRNarrowOp)
+        (rd WritableReg)
+        (rn Reg)
+        (high_half bool))
+
+       ;; 1-operand vector instruction that operates on a pair of elements.
+       (VecRRPair
+        (op VecPairOp)
+        (rd WritableReg)
+        (rn Reg))
+
+       ;; 2-operand vector instruction that produces a result with twice the
+       ;; lane width and half the number of lanes.
+       (VecRRRLong
+        (alu_op VecRRRLongOp)
+        (rd WritableReg)
+        (rn Reg)
+        (rm Reg)
+        (high_half bool))
+
+       ;; 1-operand vector instruction that extends elements of the input
+       ;; register and operates on a pair of elements. The output lane width
+       ;; is double that of the input.
+       (VecRRPairLong
+        (op VecRRPairLongOp)
+        (rd WritableReg)
+        (rn Reg))
+
+       ;; A vector ALU op.
+       (VecRRR
+        (alu_op VecALUOp)
+        (rd WritableReg)
+        (rn Reg)
+        (rm Reg)
+        (size VectorSize))
+
+       ;; Vector two register miscellaneous instruction.
+       (VecMisc
+        (op VecMisc2)
+        (rd WritableReg)
+        (rn Reg)
+        (size VectorSize))
+
+       ;; Vector instruction across lanes.
+       (VecLanes
+        (op VecLanesOp)
+        (rd WritableReg)
+        (rn Reg)
+        (size VectorSize))
+
+       ;; Vector shift by immediate Shift Left (immediate), Unsigned Shift Right (immediate)
+       ;; Signed Shift Right (immediate).  These are somewhat unusual in that, for right shifts,
+       ;; the allowed range of `imm` values is 1 to lane-size-in-bits, inclusive.  A zero
+       ;; right-shift cannot be encoded.  Left shifts are "normal", though, having valid `imm`
+       ;; values from 0 to lane-size-in-bits - 1 inclusive.
+       (VecShiftImm
+        (op VecShiftImmOp)
+        (rd WritableReg)
+        (rn Reg)
+        (size VectorSize)
+        (imm u8))
+
+       ;; Vector extract - create a new vector, being the concatenation of the lowest `imm4` bytes
+       ;; of `rm` followed by the uppermost `16 - imm4` bytes of `rn`.
+       (VecExtract
+        (rd WritableReg)
+        (rn Reg)
+        (rm Reg)
+        (imm4 u8))
+
+       ;; Table vector lookup - single register table. The table consists of 8-bit elements and is
+       ;; stored in `rn`, while `rm` contains 8-bit element indices. `is_extension` specifies whether
+       ;; to emit a TBX or a TBL instruction, i.e. whether to leave the elements in the destination
+       ;; vector that correspond to out-of-range indices (greater than 15) unmodified or to set them
+       ;; to 0.
+       (VecTbl
+        (rd WritableReg)
+        (rn Reg)
+        (rm Reg)
+        (is_extension bool))
+
+       ;; Table vector lookup - two register table. The table consists of 8-bit elements and is
+       ;; stored in `rn` and `rn2`, while `rm` contains 8-bit element indices. `is_extension`
+       ;; specifies whether to emit a TBX or a TBL instruction, i.e. whether to leave the elements in
+       ;; the destination vector that correspond to out-of-range indices (greater than 31) unmodified
+       ;; or to set them to 0. The table registers `rn` and `rn2` must have consecutive numbers
+       ;; modulo 32, that is v31 and v0 (in that order) are consecutive registers.
+       (VecTbl2
+        (rd WritableReg)
+        (rn Reg)
+        (rn2 Reg)
+        (rm Reg)
+        (is_extension bool))
+
+       ;; Load an element and replicate to all lanes of a vector.
+       (VecLoadReplicate
+        (rd WritableReg)
+        (rn Reg)
+        (size VectorSize))
+
+       ;; Vector conditional select, 128 bit.  A synthetic instruction, which generates a 4-insn
+       ;; control-flow diamond.
+       (VecCSel
+        (rd WritableReg)
+        (rn Reg)
+        (rm Reg)
+        (cond Cond))
+
+       ;; Move to the NZCV flags (actually a `MSR NZCV, Xn` insn).
+       (MovToNZCV
+        (rn Reg))
+
+       ;; Move from the NZCV flags (actually a `MRS Xn, NZCV` insn).
+       (MovFromNZCV
+        (rd WritableReg))
+
+       ;; A machine call instruction. N.B.: this allows only a +/- 128MB offset (it uses a relocation
+       ;; of type `Reloc::Arm64Call`); if the destination distance is not `RelocDistance::Near`, the
+       ;; code should use a `LoadExtName` / `CallInd` sequence instead, allowing an arbitrary 64-bit
+       ;; target.
+       (Call
+        (info BoxCallInfo))
+
+       ;; A machine indirect-call instruction.
+       (CallInd
+        (info BoxCallIndInfo))
+
+       ;; ---- branches (exactly one must appear at end of BB) ----
+
+       ;; A machine return instruction.
+       (Ret
+        (rets VecReg))
+
+       ;; A placeholder instruction, generating no code, meaning that a function epilogue must be
+       ;; inserted there.
+       (EpiloguePlaceholder)
+
+       ;; An unconditional branch.
+       (Jump
+        (dest BranchTarget))
+
+       ;; A conditional branch. Contains two targets; at emission time, both are emitted, but
+       ;; the MachBuffer knows to truncate the trailing branch if fallthrough. We optimize the
+       ;; choice of taken/not_taken (inverting the branch polarity as needed) based on the
+       ;; fallthrough at the time of lowering.
+       (CondBr
+        (taken BranchTarget)
+        (not_taken BranchTarget)
+        (kind CondBrKind))
+
+       ;; A conditional trap: execute a `udf` if the condition is true. This is
+       ;; one VCode instruction because it uses embedded control flow; it is
+       ;; logically a single-in, single-out region, but needs to appear as one
+       ;; unit to the register allocator.
+       ;;
+       ;; The `CondBrKind` gives the conditional-branch condition that will
+       ;; *execute* the embedded `Inst`. (In the emitted code, we use the inverse
+       ;; of this condition in a branch that skips the trap instruction.)
+       (TrapIf
+        (kind CondBrKind)
+        (trap_code TrapCode))
+
+       ;; An indirect branch through a register, augmented with set of all
+       ;; possible successors.
+       (IndirectBr
+        (rn Reg)
+        (targets VecMachLabel))
+
+       ;; A "break" instruction, used for e.g. traps and debug breakpoints.
+       (Brk)
+
+       ;; An instruction guaranteed to always be undefined and to trigger an illegal instruction at
+       ;; runtime.
+       (Udf
+        (trap_code TrapCode))
+
+       ;; Compute the address (using a PC-relative offset) of a memory location, using the `ADR`
+       ;; instruction. Note that we take a simple offset, not a `MemLabel`, here, because `Adr` is
+       ;; only used for now in fixed lowering sequences with hardcoded offsets. In the future we may
+       ;; need full `MemLabel` support.
+       (Adr
+        (rd WritableReg)
+        ;; Offset in range -2^20 .. 2^20.
+        (off i32))
+
+       ;; Raw 32-bit word, used for inline constants and jump-table entries.
+       (Word4
+        (data u32))
+
+       ;; Raw 64-bit word, used for inline constants.
+       (Word8
+        (data u64))
+
+       ;; Jump-table sequence, as one compound instruction (see note in lower_inst.rs for rationale).
+       (JTSequence
+        (info BoxJTSequenceInfo)
+        (ridx Reg)
+        (rtmp1 WritableReg)
+        (rtmp2 WritableReg))
+
+       ;; Load an inline symbol reference.
+       (LoadExtName
+        (rd WritableReg)
+        (name BoxExternalName)
+        (offset i64))
+
+       ;; Load address referenced by `mem` into `rd`.
+       (LoadAddr
+        (rd WritableReg)
+        (mem AMode))
+
+       ;; Marker, no-op in generated code: SP "virtual offset" is adjusted. This
+       ;; controls how AMode::NominalSPOffset args are lowered.
+       (VirtualSPOffsetAdj
+        (offset i64))
+
+       ;; Meta-insn, no-op in generated code: emit constant/branch veneer island
+       ;; at this point (with a guard jump around it) if less than the needed
+       ;; space is available before the next branch deadline. See the `MachBuffer`
+       ;; implementation in `machinst/buffer.rs` for the overall algorithm. In
+       ;; brief, we retain a set of "pending/unresolved label references" from
+       ;; branches as we scan forward through instructions to emit machine code;
+       ;; if we notice we're about to go out of range on an unresolved reference,
+       ;; we stop, emit a bunch of "veneers" (branches in a form that has a longer
+       ;; range, e.g. a 26-bit-offset unconditional jump), and point the original
+       ;; label references to those. This is an "island" because it comes in the
+       ;; middle of the code.
+       ;;
+       ;; This meta-instruction is a necessary part of the logic that determines
+       ;; where to place islands. Ordinarily, we want to place them between basic
+       ;; blocks, so we compute the worst-case size of each block, and emit the
+       ;; island before starting a block if we would exceed a deadline before the
+       ;; end of the block. However, some sequences (such as an inline jumptable)
+       ;; are variable-length and not accounted for by this logic; so these
+       ;; lowered sequences include an `EmitIsland` to trigger island generation
+       ;; where necessary.
+       (EmitIsland
+        ;; The needed space before the next deadline.
+        (needed_space CodeOffset))
+
+       ;; A call to the `ElfTlsGetAddr` libcall. Returns address of TLS symbol in x0.
+       (ElfTlsGetAddr
+        (symbol ExternalName))
+
+       ;; An unwind pseudo-instruction.
+       (Unwind
+        (inst UnwindInst))
+
+       ;; A dummy use, useful to keep a value alive.
+       (DummyUse
+        (reg Reg))))
 
 ;; An ALU operation. This can be paired with several instruction formats
 ;; below (see `Inst`) in any combination.
diff --git a/cranelift/codegen/src/isa/aarch64/inst/args.rs b/cranelift/codegen/src/isa/aarch64/inst/args.rs
index 1a836f1a71..2a25c6702a 100644
--- a/cranelift/codegen/src/isa/aarch64/inst/args.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/args.rs
@@ -6,10 +6,7 @@
 use crate::ir::types::*;
 use crate::ir::Type;
 use crate::isa::aarch64::inst::*;
-use crate::machinst::{ty_bits, MachLabel};
-
-use regalloc::{PrettyPrint, RealRegUniverse, Reg, Writable};
-
+use crate::machinst::{ty_bits, MachLabel, PrettyPrint, Reg, Writable};
 use core::convert::Into;
 use std::string::String;
 
@@ -222,6 +219,29 @@ impl AMode {
             _ => None,
         }
     }
+
+    pub fn with_allocs(&self, allocs: &mut AllocationConsumer<'_>) -> Self {
+        // This should match `memarg_operands()`.
+        match self {
+            &AMode::Unscaled(reg, imm9) => AMode::Unscaled(allocs.next(reg), imm9),
+            &AMode::UnsignedOffset(r, uimm12) => AMode::UnsignedOffset(allocs.next(r), uimm12),
+            &AMode::RegReg(r1, r2) => AMode::RegReg(allocs.next(r1), allocs.next(r2)),
+            &AMode::RegScaled(r1, r2, ty) => AMode::RegScaled(allocs.next(r1), allocs.next(r2), ty),
+            &AMode::RegScaledExtended(r1, r2, ty, ext) => {
+                AMode::RegScaledExtended(allocs.next(r1), allocs.next(r2), ty, ext)
+            }
+            &AMode::RegExtended(r1, r2, ext) => {
+                AMode::RegExtended(allocs.next(r1), allocs.next(r2), ext)
+            }
+            &AMode::PreIndexed(reg, simm9) => AMode::PreIndexed(allocs.next_writable(reg), simm9),
+            &AMode::PostIndexed(reg, simm9) => AMode::PostIndexed(allocs.next_writable(reg), simm9),
+            &AMode::RegOffset(r, off, ty) => AMode::RegOffset(allocs.next(r), off, ty),
+            &AMode::FPOffset(..)
+            | &AMode::SPOffset(..)
+            | &AMode::NominalSPOffset(..)
+            | AMode::Label(..) => self.clone(),
+        }
+    }
 }
 
 /// A memory argument to a load/store-pair.
@@ -232,6 +252,23 @@ pub enum PairAMode {
     PostIndexed(Writable<Reg>, SImm7Scaled),
 }
 
+impl PairAMode {
+    pub fn with_allocs(&self, allocs: &mut AllocationConsumer<'_>) -> Self {
+        // Should match `pairmemarg_operands()`.
+        match self {
+            &PairAMode::SignedOffset(reg, simm7scaled) => {
+                PairAMode::SignedOffset(allocs.next(reg), simm7scaled)
+            }
+            &PairAMode::PreIndexed(reg, simm7scaled) => {
+                PairAMode::PreIndexed(allocs.next_writable(reg), simm7scaled)
+            }
+            &PairAMode::PostIndexed(reg, simm7scaled) => {
+                PairAMode::PostIndexed(allocs.next_writable(reg), simm7scaled)
+            }
+        }
+    }
+}
+
 //=============================================================================
 // Instruction sub-components (conditions, branches and branch targets):
 // definitions
@@ -362,19 +399,19 @@ impl BranchTarget {
 }
 
 impl PrettyPrint for ShiftOpAndAmt {
-    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+    fn pretty_print(&self, _: u8, _: &mut AllocationConsumer<'_>) -> String {
         format!("{:?} {}", self.op(), self.amt().value())
     }
 }
 
 impl PrettyPrint for ExtendOp {
-    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+    fn pretty_print(&self, _: u8, _: &mut AllocationConsumer<'_>) -> String {
         format!("{:?}", self)
     }
 }
 
 impl PrettyPrint for MemLabel {
-    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+    fn pretty_print(&self, _: u8, _: &mut AllocationConsumer<'_>) -> String {
         match self {
             &MemLabel::PCRel(off) => format!("pc+{}", off),
         }
@@ -393,33 +430,36 @@ fn shift_for_type(ty: Type) -> usize {
 }
 
 impl PrettyPrint for AMode {
-    fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
+    fn pretty_print(&self, _: u8, allocs: &mut AllocationConsumer<'_>) -> String {
         match self {
             &AMode::Unscaled(reg, simm9) => {
+                let reg = pretty_print_reg(reg, allocs);
                 if simm9.value != 0 {
-                    format!("[{}, {}]", reg.show_rru(mb_rru), simm9.show_rru(mb_rru))
+                    let simm9 = simm9.pretty_print(8, allocs);
+                    format!("[{}, {}]", reg, simm9)
                 } else {
-                    format!("[{}]", reg.show_rru(mb_rru))
+                    format!("[{}]", reg)
                 }
             }
             &AMode::UnsignedOffset(reg, uimm12) => {
+                let reg = pretty_print_reg(reg, allocs);
                 if uimm12.value != 0 {
-                    format!("[{}, {}]", reg.show_rru(mb_rru), uimm12.show_rru(mb_rru))
+                    let uimm12 = uimm12.pretty_print(8, allocs);
+                    format!("[{}, {}]", reg, uimm12)
                 } else {
-                    format!("[{}]", reg.show_rru(mb_rru))
+                    format!("[{}]", reg)
                 }
             }
             &AMode::RegReg(r1, r2) => {
-                format!("[{}, {}]", r1.show_rru(mb_rru), r2.show_rru(mb_rru),)
+                let r1 = pretty_print_reg(r1, allocs);
+                let r2 = pretty_print_reg(r2, allocs);
+                format!("[{}, {}]", r1, r2)
             }
             &AMode::RegScaled(r1, r2, ty) => {
+                let r1 = pretty_print_reg(r1, allocs);
+                let r2 = pretty_print_reg(r2, allocs);
                 let shift = shift_for_type(ty);
-                format!(
-                    "[{}, {}, LSL #{}]",
-                    r1.show_rru(mb_rru),
-                    r2.show_rru(mb_rru),
-                    shift,
-                )
+                format!("[{}, {}, LSL #{}]", r1, r2, shift)
             }
             &AMode::RegScaledExtended(r1, r2, ty, op) => {
                 let shift = shift_for_type(ty);
@@ -427,39 +467,32 @@ impl PrettyPrint for AMode {
                     ExtendOp::SXTW | ExtendOp::UXTW => OperandSize::Size32,
                     _ => OperandSize::Size64,
                 };
-                let op = op.show_rru(mb_rru);
-                format!(
-                    "[{}, {}, {} #{}]",
-                    r1.show_rru(mb_rru),
-                    show_ireg_sized(r2, mb_rru, size),
-                    op,
-                    shift
-                )
+                let r1 = pretty_print_reg(r1, allocs);
+                let r2 = pretty_print_ireg(r2, size, allocs);
+                let op = op.pretty_print(0, allocs);
+                format!("[{}, {}, {} #{}]", r1, r2, op, shift)
             }
             &AMode::RegExtended(r1, r2, op) => {
                 let size = match op {
                     ExtendOp::SXTW | ExtendOp::UXTW => OperandSize::Size32,
                     _ => OperandSize::Size64,
                 };
-                let op = op.show_rru(mb_rru);
-                format!(
-                    "[{}, {}, {}]",
-                    r1.show_rru(mb_rru),
-                    show_ireg_sized(r2, mb_rru, size),
-                    op,
-                )
+                let r1 = pretty_print_reg(r1, allocs);
+                let r2 = pretty_print_ireg(r2, size, allocs);
+                let op = op.pretty_print(0, allocs);
+                format!("[{}, {}, {}]", r1, r2, op)
+            }
+            &AMode::Label(ref label) => label.pretty_print(0, allocs),
+            &AMode::PreIndexed(r, simm9) => {
+                let r = pretty_print_reg(r.to_reg(), allocs);
+                let simm9 = simm9.pretty_print(8, allocs);
+                format!("[{}, {}]!", r, simm9)
+            }
+            &AMode::PostIndexed(r, simm9) => {
+                let r = pretty_print_reg(r.to_reg(), allocs);
+                let simm9 = simm9.pretty_print(8, allocs);
+                format!("[{}], {}", r, simm9)
             }
-            &AMode::Label(ref label) => label.show_rru(mb_rru),
-            &AMode::PreIndexed(r, simm9) => format!(
-                "[{}, {}]!",
-                r.to_reg().show_rru(mb_rru),
-                simm9.show_rru(mb_rru)
-            ),
-            &AMode::PostIndexed(r, simm9) => format!(
-                "[{}], {}",
-                r.to_reg().show_rru(mb_rru),
-                simm9.show_rru(mb_rru)
-            ),
             // Eliminated by `mem_finalize()`.
             &AMode::SPOffset(..)
             | &AMode::FPOffset(..)
@@ -472,31 +505,33 @@ impl PrettyPrint for AMode {
 }
 
 impl PrettyPrint for PairAMode {
-    fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
+    fn pretty_print(&self, _: u8, allocs: &mut AllocationConsumer<'_>) -> String {
         match self {
             &PairAMode::SignedOffset(reg, simm7) => {
+                let reg = pretty_print_reg(reg, allocs);
                 if simm7.value != 0 {
-                    format!("[{}, {}]", reg.show_rru(mb_rru), simm7.show_rru(mb_rru))
+                    let simm7 = simm7.pretty_print(8, allocs);
+                    format!("[{}, {}]", reg, simm7)
                 } else {
-                    format!("[{}]", reg.show_rru(mb_rru))
+                    format!("[{}]", reg)
                 }
             }
-            &PairAMode::PreIndexed(reg, simm7) => format!(
-                "[{}, {}]!",
-                reg.to_reg().show_rru(mb_rru),
-                simm7.show_rru(mb_rru)
-            ),
-            &PairAMode::PostIndexed(reg, simm7) => format!(
-                "[{}], {}",
-                reg.to_reg().show_rru(mb_rru),
-                simm7.show_rru(mb_rru)
-            ),
+            &PairAMode::PreIndexed(reg, simm7) => {
+                let reg = pretty_print_reg(reg.to_reg(), allocs);
+                let simm7 = simm7.pretty_print(8, allocs);
+                format!("[{}, {}]!", reg, simm7)
+            }
+            &PairAMode::PostIndexed(reg, simm7) => {
+                let reg = pretty_print_reg(reg.to_reg(), allocs);
+                let simm7 = simm7.pretty_print(8, allocs);
+                format!("[{}], {}", reg, simm7)
+            }
         }
     }
 }
 
 impl PrettyPrint for Cond {
-    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+    fn pretty_print(&self, _: u8, _: &mut AllocationConsumer<'_>) -> String {
         let mut s = format!("{:?}", self);
         s.make_ascii_lowercase();
         s
@@ -504,7 +539,7 @@ impl PrettyPrint for Cond {
 }
 
 impl PrettyPrint for BranchTarget {
-    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+    fn pretty_print(&self, _: u8, _: &mut AllocationConsumer<'_>) -> String {
         match self {
             &BranchTarget::Label(label) => format!("label{:?}", label.get()),
             &BranchTarget::ResolvedOffset(off) => format!("{}", off),
diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs
index 560a5b5d77..d354e00b75 100644
--- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs
@@ -1,14 +1,13 @@
 //! AArch64 ISA: binary code emission.
 
+use regalloc2::Allocation;
+
 use crate::binemit::{CodeOffset, Reloc, StackMap};
 use crate::ir::constant::ConstantData;
 use crate::ir::types::*;
 use crate::ir::{LibCall, MemFlags, TrapCode};
 use crate::isa::aarch64::inst::*;
-use crate::machinst::ty_bits;
-
-use regalloc::{Reg, RegClass, Writable};
-
+use crate::machinst::{ty_bits, Reg, RegClass, Writable};
 use core::convert::TryFrom;
 
 /// Memory label/reference finalization: convert a MemLabel to a PC-relative
@@ -98,17 +97,17 @@ pub fn u64_constant(bits: u64) -> ConstantData {
 // Instructions and subcomponents: emission
 
 fn machreg_to_gpr(m: Reg) -> u32 {
-    assert_eq!(m.get_class(), RegClass::I64);
-    u32::try_from(m.to_real_reg().get_hw_encoding()).unwrap()
+    assert_eq!(m.class(), RegClass::Int);
+    u32::try_from(m.to_real_reg().unwrap().hw_enc() & 31).unwrap()
 }
 
 fn machreg_to_vec(m: Reg) -> u32 {
-    assert_eq!(m.get_class(), RegClass::V128);
-    u32::try_from(m.to_real_reg().get_hw_encoding()).unwrap()
+    assert_eq!(m.class(), RegClass::Float);
+    u32::try_from(m.to_real_reg().unwrap().hw_enc()).unwrap()
 }
 
 fn machreg_to_gpr_or_vec(m: Reg) -> u32 {
-    u32::try_from(m.to_real_reg().get_hw_encoding()).unwrap()
+    u32::try_from(m.to_real_reg().unwrap().hw_enc() & 31).unwrap()
 }
 
 pub(crate) fn enc_arith_rrr(
@@ -168,10 +167,20 @@ fn enc_cbr(op_31_24: u32, off_18_0: u32, op_4: u32, cond: u32) -> u32 {
     (op_31_24 << 24) | (off_18_0 << 5) | (op_4 << 4) | cond
 }
 
-fn enc_conditional_br(taken: BranchTarget, kind: CondBrKind) -> u32 {
+fn enc_conditional_br(
+    taken: BranchTarget,
+    kind: CondBrKind,
+    allocs: &mut AllocationConsumer<'_>,
+) -> u32 {
     match kind {
-        CondBrKind::Zero(reg) => enc_cmpbr(0b1_011010_0, taken.as_offset19_or_zero(), reg),
-        CondBrKind::NotZero(reg) => enc_cmpbr(0b1_011010_1, taken.as_offset19_or_zero(), reg),
+        CondBrKind::Zero(reg) => {
+            let reg = allocs.next(reg);
+            enc_cmpbr(0b1_011010_0, taken.as_offset19_or_zero(), reg)
+        }
+        CondBrKind::NotZero(reg) => {
+            let reg = allocs.next(reg);
+            enc_cmpbr(0b1_011010_1, taken.as_offset19_or_zero(), reg)
+        }
         CondBrKind::Cond(c) => enc_cbr(0b01010100, taken.as_offset19_or_zero(), 0b0, c.bits()),
     }
 }
@@ -674,7 +683,15 @@ impl MachInstEmit for Inst {
     type State = EmitState;
     type Info = EmitInfo;
 
-    fn emit(&self, sink: &mut MachBuffer<Inst>, emit_info: &Self::Info, state: &mut EmitState) {
+    fn emit(
+        &self,
+        allocs: &[Allocation],
+        sink: &mut MachBuffer<Inst>,
+        emit_info: &Self::Info,
+        state: &mut EmitState,
+    ) {
+        let mut allocs = AllocationConsumer::new(allocs);
+
         // N.B.: we *must* not exceed the "worst-case size" used to compute
         // where to insert islands, except when islands are explicitly triggered
         // (with an `EmitIsland`). We check this in debug builds. This is `mut`
@@ -690,6 +707,10 @@ impl MachInstEmit for Inst {
                 rn,
                 rm,
             } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
+                let rm = allocs.next(rm);
+
                 debug_assert!(match alu_op {
                     ALUOp::SDiv | ALUOp::UDiv | ALUOp::SMulH | ALUOp::UMulH =>
                         size == OperandSize::Size64,
@@ -742,6 +763,11 @@ impl MachInstEmit for Inst {
                 rn,
                 ra,
             } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
+                let rm = allocs.next(rm);
+                let ra = allocs.next(ra);
+
                 let (top11, bit15) = match alu_op {
                     ALUOp3::MAdd32 => (0b0_00_11011_000, 0),
                     ALUOp3::MSub32 => (0b0_00_11011_000, 1),
@@ -757,6 +783,8 @@ impl MachInstEmit for Inst {
                 rn,
                 ref imm12,
             } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
                 let top8 = match alu_op {
                     ALUOp::Add => 0b000_10001,
                     ALUOp::Sub => 0b010_10001,
@@ -780,6 +808,8 @@ impl MachInstEmit for Inst {
                 rn,
                 ref imml,
             } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
                 let (top9, inv) = match alu_op {
                     ALUOp::Orr => (0b001_100100, false),
                     ALUOp::And => (0b000_100100, false),
@@ -802,6 +832,8 @@ impl MachInstEmit for Inst {
                 rn,
                 ref immshift,
             } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
                 let amt = immshift.value();
                 let (top10, immr, imms) = match alu_op {
                     ALUOp::RotR => (0b0001001110, machreg_to_gpr(rn), u32::from(amt)),
@@ -839,6 +871,9 @@ impl MachInstEmit for Inst {
                 rm,
                 ref shiftop,
             } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
+                let rm = allocs.next(rm);
                 let top11: u32 = match alu_op {
                     ALUOp::Add => 0b000_01011000,
                     ALUOp::AddS => 0b001_01011000,
@@ -867,6 +902,9 @@ impl MachInstEmit for Inst {
                 rm,
                 extendop,
             } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
+                let rm = allocs.next(rm);
                 let top11: u32 = match alu_op {
                     ALUOp::Add => 0b00001011001,
                     ALUOp::Sub => 0b01001011001,
@@ -882,6 +920,8 @@ impl MachInstEmit for Inst {
             &Inst::BitRR {
                 op, size, rd, rn, ..
             } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
                 let (op1, op2) = match op {
                     BitOp::RBit => (0b00000, 0b000000),
                     BitOp::Clz => (0b00000, 0b000100),
@@ -902,10 +942,12 @@ impl MachInstEmit for Inst {
             | &Inst::FpuLoad32 { rd, ref mem, flags }
             | &Inst::FpuLoad64 { rd, ref mem, flags }
             | &Inst::FpuLoad128 { rd, ref mem, flags } => {
-                let (mem_insts, mem) = mem_finalize(sink.cur_offset(), mem, state);
+                let rd = allocs.next_writable(rd);
+                let mem = mem.with_allocs(&mut allocs);
+                let (mem_insts, mem) = mem_finalize(sink.cur_offset(), &mem, state);
 
                 for inst in mem_insts.into_iter() {
-                    inst.emit(sink, emit_info, state);
+                    inst.emit(&[], sink, emit_info, state);
                 }
 
                 // ldst encoding helpers take Reg, not Writable<Reg>.
@@ -936,20 +978,26 @@ impl MachInstEmit for Inst {
 
                 match &mem {
                     &AMode::Unscaled(reg, simm9) => {
+                        let reg = allocs.next(reg);
                         sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd));
                     }
                     &AMode::UnsignedOffset(reg, uimm12scaled) => {
+                        let reg = allocs.next(reg);
                         if uimm12scaled.value() != 0 {
                             assert_eq!(bits, ty_bits(uimm12scaled.scale_ty()));
                         }
                         sink.put4(enc_ldst_uimm12(op, uimm12scaled, reg, rd));
                     }
                     &AMode::RegReg(r1, r2) => {
+                        let r1 = allocs.next(r1);
+                        let r2 = allocs.next(r2);
                         sink.put4(enc_ldst_reg(
                             op, r1, r2, /* scaled = */ false, /* extendop = */ None, rd,
                         ));
                     }
                     &AMode::RegScaled(r1, r2, ty) | &AMode::RegScaledExtended(r1, r2, ty, _) => {
+                        let r1 = allocs.next(r1);
+                        let r2 = allocs.next(r2);
                         assert_eq!(bits, ty_bits(ty));
                         let extendop = match &mem {
                             &AMode::RegScaled(..) => None,
@@ -961,6 +1009,8 @@ impl MachInstEmit for Inst {
                         ));
                     }
                     &AMode::RegExtended(r1, r2, extendop) => {
+                        let r1 = allocs.next(r1);
+                        let r2 = allocs.next(r2);
                         sink.put4(enc_ldst_reg(
                             op,
                             r1,
@@ -999,10 +1049,12 @@ impl MachInstEmit for Inst {
                         }
                     }
                     &AMode::PreIndexed(reg, simm9) => {
-                        sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg.to_reg(), rd));
+                        let reg = allocs.next(reg.to_reg());
+                        sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg, rd));
                     }
                     &AMode::PostIndexed(reg, simm9) => {
-                        sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg.to_reg(), rd));
+                        let reg = allocs.next(reg.to_reg());
+                        sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg, rd));
                     }
                     // Eliminated by `mem_finalize()` above.
                     &AMode::SPOffset(..) | &AMode::FPOffset(..) | &AMode::NominalSPOffset(..) => {
@@ -1019,10 +1071,12 @@ impl MachInstEmit for Inst {
             | &Inst::FpuStore32 { rd, ref mem, flags }
             | &Inst::FpuStore64 { rd, ref mem, flags }
             | &Inst::FpuStore128 { rd, ref mem, flags } => {
-                let (mem_insts, mem) = mem_finalize(sink.cur_offset(), mem, state);
+                let rd = allocs.next(rd);
+                let mem = mem.with_allocs(&mut allocs);
+                let (mem_insts, mem) = mem_finalize(sink.cur_offset(), &mem, state);
 
                 for inst in mem_insts.into_iter() {
-                    inst.emit(sink, emit_info, state);
+                    inst.emit(&[], sink, emit_info, state);
                 }
 
                 let (op, bits) = match self {
@@ -1044,20 +1098,26 @@ impl MachInstEmit for Inst {
 
                 match &mem {
                     &AMode::Unscaled(reg, simm9) => {
+                        let reg = allocs.next(reg);
                         sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd));
                     }
                     &AMode::UnsignedOffset(reg, uimm12scaled) => {
+                        let reg = allocs.next(reg);
                         if uimm12scaled.value() != 0 {
                             assert_eq!(bits, ty_bits(uimm12scaled.scale_ty()));
                         }
                         sink.put4(enc_ldst_uimm12(op, uimm12scaled, reg, rd));
                     }
                     &AMode::RegReg(r1, r2) => {
+                        let r1 = allocs.next(r1);
+                        let r2 = allocs.next(r2);
                         sink.put4(enc_ldst_reg(
                             op, r1, r2, /* scaled = */ false, /* extendop = */ None, rd,
                         ));
                     }
                     &AMode::RegScaled(r1, r2, _ty) | &AMode::RegScaledExtended(r1, r2, _ty, _) => {
+                        let r1 = allocs.next(r1);
+                        let r2 = allocs.next(r2);
                         let extendop = match &mem {
                             &AMode::RegScaled(..) => None,
                             &AMode::RegScaledExtended(_, _, _, op) => Some(op),
@@ -1068,6 +1128,8 @@ impl MachInstEmit for Inst {
                         ));
                     }
                     &AMode::RegExtended(r1, r2, extendop) => {
+                        let r1 = allocs.next(r1);
+                        let r2 = allocs.next(r2);
                         sink.put4(enc_ldst_reg(
                             op,
                             r1,
@@ -1081,10 +1143,12 @@ impl MachInstEmit for Inst {
                         panic!("Store to a MemLabel not implemented!");
                     }
                     &AMode::PreIndexed(reg, simm9) => {
-                        sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg.to_reg(), rd));
+                        let reg = allocs.next(reg.to_reg());
+                        sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg, rd));
                     }
                     &AMode::PostIndexed(reg, simm9) => {
-                        sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg.to_reg(), rd));
+                        let reg = allocs.next(reg.to_reg());
+                        sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg, rd));
                     }
                     // Eliminated by `mem_finalize()` above.
                     &AMode::SPOffset(..) | &AMode::FPOffset(..) | &AMode::NominalSPOffset(..) => {
@@ -1100,23 +1164,29 @@ impl MachInstEmit for Inst {
                 ref mem,
                 flags,
             } => {
+                let rt = allocs.next(rt);
+                let rt2 = allocs.next(rt2);
+                let mem = mem.with_allocs(&mut allocs);
                 let srcloc = state.cur_srcloc();
                 if srcloc != SourceLoc::default() && !flags.notrap() {
                     // Register the offset at which the actual store instruction starts.
                     sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
                 }
-                match mem {
+                match &mem {
                     &PairAMode::SignedOffset(reg, simm7) => {
                         assert_eq!(simm7.scale_ty, I64);
+                        let reg = allocs.next(reg);
                         sink.put4(enc_ldst_pair(0b1010100100, simm7, reg, rt, rt2));
                     }
                     &PairAMode::PreIndexed(reg, simm7) => {
                         assert_eq!(simm7.scale_ty, I64);
-                        sink.put4(enc_ldst_pair(0b1010100110, simm7, reg.to_reg(), rt, rt2));
+                        let reg = allocs.next(reg.to_reg());
+                        sink.put4(enc_ldst_pair(0b1010100110, simm7, reg, rt, rt2));
                     }
                     &PairAMode::PostIndexed(reg, simm7) => {
                         assert_eq!(simm7.scale_ty, I64);
-                        sink.put4(enc_ldst_pair(0b1010100010, simm7, reg.to_reg(), rt, rt2));
+                        let reg = allocs.next(reg.to_reg());
+                        sink.put4(enc_ldst_pair(0b1010100010, simm7, reg, rt, rt2));
                     }
                 }
             }
@@ -1126,26 +1196,30 @@ impl MachInstEmit for Inst {
                 ref mem,
                 flags,
             } => {
+                let rt = allocs.next(rt.to_reg());
+                let rt2 = allocs.next(rt2.to_reg());
+                let mem = mem.with_allocs(&mut allocs);
                 let srcloc = state.cur_srcloc();
                 if srcloc != SourceLoc::default() && !flags.notrap() {
                     // Register the offset at which the actual load instruction starts.
                     sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
                 }
 
-                let rt = rt.to_reg();
-                let rt2 = rt2.to_reg();
-                match mem {
+                match &mem {
                     &PairAMode::SignedOffset(reg, simm7) => {
                         assert_eq!(simm7.scale_ty, I64);
+                        let reg = allocs.next(reg);
                         sink.put4(enc_ldst_pair(0b1010100101, simm7, reg, rt, rt2));
                     }
                     &PairAMode::PreIndexed(reg, simm7) => {
                         assert_eq!(simm7.scale_ty, I64);
-                        sink.put4(enc_ldst_pair(0b1010100111, simm7, reg.to_reg(), rt, rt2));
+                        let reg = allocs.next(reg.to_reg());
+                        sink.put4(enc_ldst_pair(0b1010100111, simm7, reg, rt, rt2));
                     }
                     &PairAMode::PostIndexed(reg, simm7) => {
                         assert_eq!(simm7.scale_ty, I64);
-                        sink.put4(enc_ldst_pair(0b1010100011, simm7, reg.to_reg(), rt, rt2));
+                        let reg = allocs.next(reg.to_reg());
+                        sink.put4(enc_ldst_pair(0b1010100011, simm7, reg, rt, rt2));
                     }
                 }
             }
@@ -1161,6 +1235,9 @@ impl MachInstEmit for Inst {
                 ref mem,
                 flags,
             } => {
+                let rt = allocs.next(rt.to_reg());
+                let rt2 = allocs.next(rt2.to_reg());
+                let mem = mem.with_allocs(&mut allocs);
                 let srcloc = state.cur_srcloc();
 
                 if srcloc != SourceLoc::default() && !flags.notrap() {
@@ -1173,37 +1250,22 @@ impl MachInstEmit for Inst {
                     &Inst::FpuLoadP128 { .. } => 0b10,
                     _ => unreachable!(),
                 };
-                let rt = rt.to_reg();
-                let rt2 = rt2.to_reg();
 
-                match mem {
+                match &mem {
                     &PairAMode::SignedOffset(reg, simm7) => {
                         assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
+                        let reg = allocs.next(reg);
                         sink.put4(enc_ldst_vec_pair(opc, 0b10, true, simm7, reg, rt, rt2));
                     }
                     &PairAMode::PreIndexed(reg, simm7) => {
                         assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
-                        sink.put4(enc_ldst_vec_pair(
-                            opc,
-                            0b11,
-                            true,
-                            simm7,
-                            reg.to_reg(),
-                            rt,
-                            rt2,
-                        ));
+                        let reg = allocs.next(reg.to_reg());
+                        sink.put4(enc_ldst_vec_pair(opc, 0b11, true, simm7, reg, rt, rt2));
                     }
                     &PairAMode::PostIndexed(reg, simm7) => {
                         assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
-                        sink.put4(enc_ldst_vec_pair(
-                            opc,
-                            0b01,
-                            true,
-                            simm7,
-                            reg.to_reg(),
-                            rt,
-                            rt2,
-                        ));
+                        let reg = allocs.next(reg.to_reg());
+                        sink.put4(enc_ldst_vec_pair(opc, 0b01, true, simm7, reg, rt, rt2));
                     }
                 }
             }
@@ -1219,6 +1281,9 @@ impl MachInstEmit for Inst {
                 ref mem,
                 flags,
             } => {
+                let rt = allocs.next(rt);
+                let rt2 = allocs.next(rt2);
+                let mem = mem.with_allocs(&mut allocs);
                 let srcloc = state.cur_srcloc();
 
                 if srcloc != SourceLoc::default() && !flags.notrap() {
@@ -1232,40 +1297,29 @@ impl MachInstEmit for Inst {
                     _ => unreachable!(),
                 };
 
-                match mem {
+                match &mem {
                     &PairAMode::SignedOffset(reg, simm7) => {
                         assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
+                        let reg = allocs.next(reg);
                         sink.put4(enc_ldst_vec_pair(opc, 0b10, false, simm7, reg, rt, rt2));
                     }
                     &PairAMode::PreIndexed(reg, simm7) => {
                         assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
-                        sink.put4(enc_ldst_vec_pair(
-                            opc,
-                            0b11,
-                            false,
-                            simm7,
-                            reg.to_reg(),
-                            rt,
-                            rt2,
-                        ));
+                        let reg = allocs.next(reg.to_reg());
+                        sink.put4(enc_ldst_vec_pair(opc, 0b11, false, simm7, reg, rt, rt2));
                     }
                     &PairAMode::PostIndexed(reg, simm7) => {
                         assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
-                        sink.put4(enc_ldst_vec_pair(
-                            opc,
-                            0b01,
-                            false,
-                            simm7,
-                            reg.to_reg(),
-                            rt,
-                            rt2,
-                        ));
+                        let reg = allocs.next(reg.to_reg());
+                        sink.put4(enc_ldst_vec_pair(opc, 0b01, false, simm7, reg, rt, rt2));
                     }
                 }
             }
             &Inst::Mov64 { rd, rm } => {
-                assert!(rd.to_reg().get_class() == rm.get_class());
-                assert!(rm.get_class() == RegClass::I64);
+                let rd = allocs.next_writable(rd);
+                let rm = allocs.next(rm);
+                assert!(rd.to_reg().class() == rm.class());
+                assert!(rm.class() == RegClass::Int);
 
                 // MOV to SP is interpreted as MOV to XZR instead. And our codegen
                 // should never MOV to XZR.
@@ -1287,6 +1341,8 @@ impl MachInstEmit for Inst {
                 }
             }
             &Inst::Mov32 { rd, rm } => {
+                let rd = allocs.next_writable(rd);
+                let rm = allocs.next(rm);
                 // MOV to SP is interpreted as MOV to XZR instead. And our codegen
                 // should never MOV to XZR.
                 assert!(machreg_to_gpr(rd.to_reg()) != 31);
@@ -1294,21 +1350,29 @@ impl MachInstEmit for Inst {
                 sink.put4(enc_arith_rrr(0b00101010_000, 0b000_000, rd, zero_reg(), rm));
             }
             &Inst::MovZ { rd, imm, size } => {
+                let rd = allocs.next_writable(rd);
                 sink.put4(enc_move_wide(MoveWideOpcode::MOVZ, rd, imm, size))
             }
             &Inst::MovN { rd, imm, size } => {
+                let rd = allocs.next_writable(rd);
                 sink.put4(enc_move_wide(MoveWideOpcode::MOVN, rd, imm, size))
             }
             &Inst::MovK { rd, imm, size } => {
+                let rd = allocs.next_writable(rd);
                 sink.put4(enc_move_wide(MoveWideOpcode::MOVK, rd, imm, size))
             }
             &Inst::CSel { rd, rn, rm, cond } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
+                let rm = allocs.next(rm);
                 sink.put4(enc_csel(rd, rn, rm, cond));
             }
             &Inst::CSet { rd, cond } => {
+                let rd = allocs.next_writable(rd);
                 sink.put4(enc_cset(rd, cond));
             }
             &Inst::CSetm { rd, cond } => {
+                let rd = allocs.next_writable(rd);
                 sink.put4(enc_csetm(rd, cond));
             }
             &Inst::CCmpImm {
@@ -1318,9 +1382,13 @@ impl MachInstEmit for Inst {
                 nzcv,
                 cond,
             } => {
+                let rn = allocs.next(rn);
                 sink.put4(enc_ccmp_imm(size, rn, imm, nzcv, cond));
             }
             &Inst::AtomicRMW { ty, op, rs, rt, rn } => {
+                let rs = allocs.next(rs);
+                let rt = allocs.next_writable(rt);
+                let rn = allocs.next(rn);
                 sink.put4(enc_ldal(ty, op, rs, rt, rn));
             }
             &Inst::AtomicRMWLoop { ty, op } => {
@@ -1375,7 +1443,7 @@ impl MachInstEmit for Inst {
                             rn: x27,
                             rm: x26,
                         }
-                        .emit(sink, emit_info, state);
+                        .emit(&[], sink, emit_info, state);
 
                         Inst::AluRRR {
                             alu_op: ALUOp::OrrNot,
@@ -1384,7 +1452,7 @@ impl MachInstEmit for Inst {
                             rn: xzr,
                             rm: x28,
                         }
-                        .emit(sink, emit_info, state);
+                        .emit(&[], sink, emit_info, state);
                     }
                     AtomicRmwOp::Umin
                     | AtomicRmwOp::Umax
@@ -1408,7 +1476,7 @@ impl MachInstEmit for Inst {
                             rn: x27,
                             rm: x26,
                         }
-                        .emit(sink, emit_info, state);
+                        .emit(&[], sink, emit_info, state);
 
                         Inst::CSel {
                             cond,
@@ -1416,7 +1484,7 @@ impl MachInstEmit for Inst {
                             rn: x27,
                             rm: x26,
                         }
-                        .emit(sink, emit_info, state);
+                        .emit(&[], sink, emit_info, state);
                     }
                     _ => {
                         // add/sub/and/orr/eor x28, x27, x26
@@ -1441,7 +1509,7 @@ impl MachInstEmit for Inst {
                             rn: x27,
                             rm: x26,
                         }
-                        .emit(sink, emit_info, state);
+                        .emit(&[], sink, emit_info, state);
                     }
                 }
 
@@ -1462,10 +1530,14 @@ impl MachInstEmit for Inst {
                 sink.put4(enc_conditional_br(
                     BranchTarget::Label(again_label),
                     CondBrKind::NotZero(x24),
+                    &mut AllocationConsumer::default(),
                 ));
                 sink.use_label_at_offset(br_offset, again_label, LabelUse::Branch19);
             }
             &Inst::AtomicCAS { rs, rt, rn, ty } => {
+                let rs = allocs.next_writable(rs);
+                let rt = allocs.next(rt);
+                let rn = allocs.next(rn);
                 let size = match ty {
                     I8 => 0b00,
                     I16 => 0b01,
@@ -1526,6 +1598,7 @@ impl MachInstEmit for Inst {
                 sink.put4(enc_conditional_br(
                     BranchTarget::Label(out_label),
                     CondBrKind::Cond(Cond::Ne),
+                    &mut AllocationConsumer::default(),
                 ));
                 sink.use_label_at_offset(br_out_offset, out_label, LabelUse::Branch19);
 
@@ -1542,6 +1615,7 @@ impl MachInstEmit for Inst {
                 sink.put4(enc_conditional_br(
                     BranchTarget::Label(again_label),
                     CondBrKind::NotZero(x24),
+                    &mut AllocationConsumer::default(),
                 ));
                 sink.use_label_at_offset(br_again_offset, again_label, LabelUse::Branch19);
 
@@ -1549,21 +1623,31 @@ impl MachInstEmit for Inst {
                 sink.bind_label(out_label);
             }
             &Inst::LoadAcquire { access_ty, rt, rn } => {
+                let rn = allocs.next(rn);
+                let rt = allocs.next_writable(rt);
                 sink.put4(enc_ldar(access_ty, rt, rn));
             }
             &Inst::StoreRelease { access_ty, rt, rn } => {
+                let rn = allocs.next(rn);
+                let rt = allocs.next(rt);
                 sink.put4(enc_stlr(access_ty, rt, rn));
             }
             &Inst::Fence {} => {
                 sink.put4(enc_dmb_ish()); // dmb ish
             }
             &Inst::FpuMove64 { rd, rn } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
                 sink.put4(enc_fpurr(0b000_11110_01_1_000000_10000, rd, rn));
             }
             &Inst::FpuMove128 { rd, rn } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
                 sink.put4(enc_vecmov(/* 16b = */ true, rd, rn));
             }
             &Inst::FpuMoveFromVec { rd, rn, idx, size } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
                 let (imm5, shift, mask) = match size.lane_size() {
                     ScalarSize::Size32 => (0b00100, 3, 0b011),
                     ScalarSize::Size64 => (0b01000, 4, 0b001),
@@ -1579,6 +1663,8 @@ impl MachInstEmit for Inst {
                 );
             }
             &Inst::FpuExtend { rd, rn, size } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
                 sink.put4(enc_fpurr(
                     0b000_11110_00_1_000000_10000 | (size.ftype() << 13),
                     rd,
@@ -1586,6 +1672,8 @@ impl MachInstEmit for Inst {
                 ));
             }
             &Inst::FpuRR { fpu_op, rd, rn } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
                 let top22 = match fpu_op {
                     FPUOp1::Abs32 => 0b000_11110_00_1_000001_10000,
                     FPUOp1::Abs64 => 0b000_11110_01_1_000001_10000,
@@ -1599,6 +1687,9 @@ impl MachInstEmit for Inst {
                 sink.put4(enc_fpurr(top22, rd, rn));
             }
             &Inst::FpuRRR { fpu_op, rd, rn, rm } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
+                let rm = allocs.next(rm);
                 let top22 = match fpu_op {
                     FPUOp2::Add32 => 0b000_11110_00_1_00000_001010,
                     FPUOp2::Add64 => 0b000_11110_01_1_00000_001010,
@@ -1619,44 +1710,48 @@ impl MachInstEmit for Inst {
                 };
                 sink.put4(enc_fpurrr(top22, rd, rn, rm));
             }
-            &Inst::FpuRRI { fpu_op, rd, rn } => match fpu_op {
-                FPUOpRI::UShr32(imm) => {
-                    debug_assert_eq!(32, imm.lane_size_in_bits);
-                    sink.put4(
-                        0b0_0_1_011110_0000000_00_0_0_0_1_00000_00000
-                            | imm.enc() << 16
-                            | machreg_to_vec(rn) << 5
-                            | machreg_to_vec(rd.to_reg()),
-                    )
+            &Inst::FpuRRI { fpu_op, rd, rn } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
+                match fpu_op {
+                    FPUOpRI::UShr32(imm) => {
+                        debug_assert_eq!(32, imm.lane_size_in_bits);
+                        sink.put4(
+                            0b0_0_1_011110_0000000_00_0_0_0_1_00000_00000
+                                | imm.enc() << 16
+                                | machreg_to_vec(rn) << 5
+                                | machreg_to_vec(rd.to_reg()),
+                        )
+                    }
+                    FPUOpRI::UShr64(imm) => {
+                        debug_assert_eq!(64, imm.lane_size_in_bits);
+                        sink.put4(
+                            0b01_1_111110_0000000_00_0_0_0_1_00000_00000
+                                | imm.enc() << 16
+                                | machreg_to_vec(rn) << 5
+                                | machreg_to_vec(rd.to_reg()),
+                        )
+                    }
+                    FPUOpRI::Sli64(imm) => {
+                        debug_assert_eq!(64, imm.lane_size_in_bits);
+                        sink.put4(
+                            0b01_1_111110_0000000_010101_00000_00000
+                                | imm.enc() << 16
+                                | machreg_to_vec(rn) << 5
+                                | machreg_to_vec(rd.to_reg()),
+                        )
+                    }
+                    FPUOpRI::Sli32(imm) => {
+                        debug_assert_eq!(32, imm.lane_size_in_bits);
+                        sink.put4(
+                            0b0_0_1_011110_0000000_010101_00000_00000
+                                | imm.enc() << 16
+                                | machreg_to_vec(rn) << 5
+                                | machreg_to_vec(rd.to_reg()),
+                        )
+                    }
                 }
-                FPUOpRI::UShr64(imm) => {
-                    debug_assert_eq!(64, imm.lane_size_in_bits);
-                    sink.put4(
-                        0b01_1_111110_0000000_00_0_0_0_1_00000_00000
-                            | imm.enc() << 16
-                            | machreg_to_vec(rn) << 5
-                            | machreg_to_vec(rd.to_reg()),
-                    )
-                }
-                FPUOpRI::Sli64(imm) => {
-                    debug_assert_eq!(64, imm.lane_size_in_bits);
-                    sink.put4(
-                        0b01_1_111110_0000000_010101_00000_00000
-                            | imm.enc() << 16
-                            | machreg_to_vec(rn) << 5
-                            | machreg_to_vec(rd.to_reg()),
-                    )
-                }
-                FPUOpRI::Sli32(imm) => {
-                    debug_assert_eq!(32, imm.lane_size_in_bits);
-                    sink.put4(
-                        0b0_0_1_011110_0000000_010101_00000_00000
-                            | imm.enc() << 16
-                            | machreg_to_vec(rn) << 5
-                            | machreg_to_vec(rd.to_reg()),
-                    )
-                }
-            },
+            }
             &Inst::FpuRRRR {
                 fpu_op,
                 rd,
@@ -1664,6 +1759,10 @@ impl MachInstEmit for Inst {
                 rm,
                 ra,
             } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
+                let rm = allocs.next(rm);
+                let ra = allocs.next(ra);
                 let top17 = match fpu_op {
                     FPUOp3::MAdd32 => 0b000_11111_00_0_00000_0,
                     FPUOp3::MAdd64 => 0b000_11111_01_0_00000_0,
@@ -1671,6 +1770,8 @@ impl MachInstEmit for Inst {
                 sink.put4(enc_fpurrrr(top17, rd, rn, rm, ra));
             }
             &Inst::VecMisc { op, rd, rn, size } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
                 let (q, enc_size) = size.enc_size();
                 let (u, bits_12_16, size) = match op {
                     VecMisc2::Not => (0b1, 0b00101, 0b00),
@@ -1813,6 +1914,8 @@ impl MachInstEmit for Inst {
                 sink.put4(enc_vec_rr_misc((q << 1) | u, size, bits_12_16, rd, rn));
             }
             &Inst::VecLanes { op, rd, rn, size } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
                 let (q, size) = match size {
                     VectorSize::Size8x8 => (0b0, 0b00),
                     VectorSize::Size8x16 => (0b1, 0b00),
@@ -1834,6 +1937,8 @@ impl MachInstEmit for Inst {
                 size,
                 imm,
             } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
                 let (is_shr, template) = match op {
                     VecShiftImmOp::Ushr => (true, 0b_011_011110_0000_000_000001_00000_00000_u32),
                     VecShiftImmOp::Sshr => (true, 0b_010_011110_0000_000_000001_00000_00000_u32),
@@ -1869,6 +1974,9 @@ impl MachInstEmit for Inst {
                 sink.put4(template | (immh_immb << 16) | (rn_enc << 5) | rd_enc);
             }
             &Inst::VecExtract { rd, rn, rm, imm4 } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
+                let rm = allocs.next(rm);
                 if imm4 < 16 {
                     let template = 0b_01_101110_000_00000_0_0000_0_00000_00000_u32;
                     let rm_enc = machreg_to_vec(rm);
@@ -1890,6 +1998,9 @@ impl MachInstEmit for Inst {
                 rm,
                 is_extension,
             } => {
+                let rn = allocs.next(rn);
+                let rm = allocs.next(rm);
+                let rd = allocs.next_writable(rd);
                 sink.put4(enc_tbl(is_extension, 0b00, rd, rn, rm));
             }
             &Inst::VecTbl2 {
@@ -1899,16 +2010,26 @@ impl MachInstEmit for Inst {
                 rm,
                 is_extension,
             } => {
+                let rn = allocs.next(rn);
+                let rn2 = allocs.next(rn2);
+                let rm = allocs.next(rm);
+                let rd = allocs.next_writable(rd);
                 assert_eq!(machreg_to_vec(rn2), (machreg_to_vec(rn) + 1) % 32);
                 sink.put4(enc_tbl(is_extension, 0b01, rd, rn, rm));
             }
             &Inst::FpuCmp32 { rn, rm } => {
+                let rn = allocs.next(rn);
+                let rm = allocs.next(rm);
                 sink.put4(enc_fcmp(ScalarSize::Size32, rn, rm));
             }
             &Inst::FpuCmp64 { rn, rm } => {
+                let rn = allocs.next(rn);
+                let rm = allocs.next(rm);
                 sink.put4(enc_fcmp(ScalarSize::Size64, rn, rm));
             }
             &Inst::FpuToInt { op, rd, rn } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
                 let top16 = match op {
                     // FCVTZS (32/32-bit)
                     FpuToIntOp::F32ToI32 => 0b000_11110_00_1_11_000,
@@ -1930,6 +2051,8 @@ impl MachInstEmit for Inst {
                 sink.put4(enc_fputoint(top16, rd, rn));
             }
             &Inst::IntToFpu { op, rd, rn } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
                 let top16 = match op {
                     // SCVTF (32/32-bit)
                     IntToFpuOp::I32ToF32 => 0b000_11110_00_1_00_010,
@@ -1951,41 +2074,51 @@ impl MachInstEmit for Inst {
                 sink.put4(enc_inttofpu(top16, rd, rn));
             }
             &Inst::LoadFpuConst64 { rd, const_data } => {
+                let rd = allocs.next_writable(rd);
                 let inst = Inst::FpuLoad64 {
                     rd,
                     mem: AMode::Label(MemLabel::PCRel(8)),
                     flags: MemFlags::trusted(),
                 };
-                inst.emit(sink, emit_info, state);
+                inst.emit(&[], sink, emit_info, state);
                 let inst = Inst::Jump {
                     dest: BranchTarget::ResolvedOffset(12),
                 };
-                inst.emit(sink, emit_info, state);
+                inst.emit(&[], sink, emit_info, state);
                 sink.put8(const_data);
             }
             &Inst::LoadFpuConst128 { rd, const_data } => {
+                let rd = allocs.next_writable(rd);
                 let inst = Inst::FpuLoad128 {
                     rd,
                     mem: AMode::Label(MemLabel::PCRel(8)),
                     flags: MemFlags::trusted(),
                 };
-                inst.emit(sink, emit_info, state);
+                inst.emit(&[], sink, emit_info, state);
                 let inst = Inst::Jump {
                     dest: BranchTarget::ResolvedOffset(20),
                 };
-                inst.emit(sink, emit_info, state);
+                inst.emit(&[], sink, emit_info, state);
 
                 for i in const_data.to_le_bytes().iter() {
                     sink.put1(*i);
                 }
             }
             &Inst::FpuCSel32 { rd, rn, rm, cond } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
+                let rm = allocs.next(rm);
                 sink.put4(enc_fcsel(rd, rn, rm, cond, ScalarSize::Size32));
             }
             &Inst::FpuCSel64 { rd, rn, rm, cond } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
+                let rm = allocs.next(rm);
                 sink.put4(enc_fcsel(rd, rn, rm, cond, ScalarSize::Size64));
             }
             &Inst::FpuRound { op, rd, rn } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
                 let top22 = match op {
                     FpuRoundMode::Minus32 => 0b000_11110_00_1_001_010_10000,
                     FpuRoundMode::Minus64 => 0b000_11110_01_1_001_010_10000,
@@ -1999,6 +2132,8 @@ impl MachInstEmit for Inst {
                 sink.put4(enc_fround(top22, rd, rn));
             }
             &Inst::MovToFpu { rd, rn, size } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
                 let template = match size {
                     ScalarSize::Size32 => 0b000_11110_00_1_00_111_000000_00000_00000,
                     ScalarSize::Size64 => 0b100_11110_01_1_00_111_000000_00000_00000,
@@ -2007,6 +2142,7 @@ impl MachInstEmit for Inst {
                 sink.put4(template | (machreg_to_gpr(rn) << 5) | machreg_to_vec(rd.to_reg()));
             }
             &Inst::FpuMoveFPImm { rd, imm, size } => {
+                let rd = allocs.next_writable(rd);
                 let size_code = match size {
                     ScalarSize::Size32 => 0b00,
                     ScalarSize::Size64 => 0b01,
@@ -2020,6 +2156,8 @@ impl MachInstEmit for Inst {
                 );
             }
             &Inst::MovToVec { rd, rn, idx, size } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
                 let (imm5, shift) = match size.lane_size() {
                     ScalarSize::Size8 => (0b00001, 1),
                     ScalarSize::Size16 => (0b00010, 2),
@@ -2037,6 +2175,8 @@ impl MachInstEmit for Inst {
                 );
             }
             &Inst::MovFromVec { rd, rn, idx, size } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
                 let (q, imm5, shift, mask) = match size {
                     VectorSize::Size8x16 => (0b0, 0b00001, 1, 0b1111),
                     VectorSize::Size16x8 => (0b0, 0b00010, 2, 0b0111),
@@ -2061,6 +2201,8 @@ impl MachInstEmit for Inst {
                 size,
                 scalar_size,
             } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
                 let (imm5, shift, half) = match size {
                     VectorSize::Size8x8 => (0b00001, 1, true),
                     VectorSize::Size8x16 => (0b00001, 1, false),
@@ -2087,6 +2229,8 @@ impl MachInstEmit for Inst {
                 );
             }
             &Inst::VecDup { rd, rn, size } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
                 let imm5 = match size {
                     VectorSize::Size8x16 => 0b00001,
                     VectorSize::Size16x8 => 0b00010,
@@ -2102,6 +2246,8 @@ impl MachInstEmit for Inst {
                 );
             }
             &Inst::VecDupFromFpu { rd, rn, size } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
                 let imm5 = match size {
                     VectorSize::Size32x4 => 0b00100,
                     VectorSize::Size64x2 => 0b01000,
@@ -2115,6 +2261,7 @@ impl MachInstEmit for Inst {
                 );
             }
             &Inst::VecDupFPImm { rd, imm, size } => {
+                let rd = allocs.next_writable(rd);
                 let imm = imm.enc_bits();
                 let op = match size.lane_size() {
                     ScalarSize::Size32 => 0,
@@ -2131,6 +2278,7 @@ impl MachInstEmit for Inst {
                 invert,
                 size,
             } => {
+                let rd = allocs.next_writable(rd);
                 let (imm, shift, shift_ones) = imm.value();
                 let (op, cmode) = match size.lane_size() {
                     ScalarSize::Size8 => {
@@ -2178,6 +2326,8 @@ impl MachInstEmit for Inst {
                 rn,
                 high_half,
             } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
                 let (u, immh) = match t {
                     VecExtendOp::Sxtl8 => (0b0, 0b001),
                     VecExtendOp::Sxtl16 => (0b0, 0b010),
@@ -2201,6 +2351,8 @@ impl MachInstEmit for Inst {
                 rn,
                 high_half,
             } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
                 let (u, size, bits_12_16) = match op {
                     VecRRLongOp::Fcvtl16 => (0b0, 0b00, 0b10111),
                     VecRRLongOp::Fcvtl32 => (0b0, 0b01, 0b10111),
@@ -2223,6 +2375,8 @@ impl MachInstEmit for Inst {
                 rn,
                 high_half,
             } => {
+                let rn = allocs.next(rn);
+                let rd = allocs.next_writable(rd);
                 let (u, size, bits_12_16) = match op {
                     VecRRNarrowOp::Xtn16 => (0b0, 0b00, 0b10010),
                     VecRRNarrowOp::Xtn32 => (0b0, 0b01, 0b10010),
@@ -2255,6 +2409,8 @@ impl MachInstEmit for Inst {
                 src_idx,
                 size,
             } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
                 let (imm5, shift) = match size.lane_size() {
                     ScalarSize::Size8 => (0b00001, 1),
                     ScalarSize::Size16 => (0b00010, 2),
@@ -2276,6 +2432,8 @@ impl MachInstEmit for Inst {
                 );
             }
             &Inst::VecRRPair { op, rd, rn } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
                 let bits_12_16 = match op {
                     VecPairOp::Addp => 0b11011,
                 };
@@ -2289,6 +2447,9 @@ impl MachInstEmit for Inst {
                 alu_op,
                 high_half,
             } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
+                let rm = allocs.next(rm);
                 let (u, size, bit14) = match alu_op {
                     VecRRRLongOp::Smull8 => (0b0, 0b00, 0b1),
                     VecRRRLongOp::Smull16 => (0b0, 0b01, 0b1),
@@ -2311,6 +2472,8 @@ impl MachInstEmit for Inst {
                 ));
             }
             &Inst::VecRRPairLong { op, rd, rn } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
                 let (u, size) = match op {
                     VecRRPairLongOp::Saddlp8 => (0b0, 0b0),
                     VecRRPairLongOp::Uaddlp8 => (0b1, 0b0),
@@ -2327,6 +2490,9 @@ impl MachInstEmit for Inst {
                 alu_op,
                 size,
             } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
+                let rm = allocs.next(rm);
                 let (q, enc_size) = size.enc_size();
                 let is_float = match alu_op {
                     VecALUOp::Fcmeq
@@ -2431,6 +2597,8 @@ impl MachInstEmit for Inst {
                 sink.put4(enc_vec_rrr(top11 | q << 9, rm, bit15_10, rn, rd));
             }
             &Inst::VecLoadReplicate { rd, rn, size } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
                 let (q, size) = size.enc_size();
 
                 let srcloc = state.cur_srcloc();
@@ -2442,6 +2610,9 @@ impl MachInstEmit for Inst {
                 sink.put4(enc_ldst_vec(q, size, rn, rd));
             }
             &Inst::VecCSel { rd, rn, rm, cond } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
+                let rm = allocs.next(rm);
                 /* Emit this:
                       b.cond  else
                       mov     rd, rm
@@ -2460,6 +2631,7 @@ impl MachInstEmit for Inst {
                 sink.put4(enc_conditional_br(
                     BranchTarget::Label(else_label),
                     CondBrKind::Cond(cond),
+                    &mut AllocationConsumer::default(),
                 ));
                 sink.use_label_at_offset(br_else_offset, else_label, LabelUse::Branch19);
 
@@ -2482,9 +2654,11 @@ impl MachInstEmit for Inst {
                 sink.bind_label(out_label);
             }
             &Inst::MovToNZCV { rn } => {
+                let rn = allocs.next(rn);
                 sink.put4(0xd51b4200 | machreg_to_gpr(rn));
             }
             &Inst::MovFromNZCV { rd } => {
+                let rd = allocs.next_writable(rd);
                 sink.put4(0xd53b4200 | machreg_to_gpr(rd.to_reg()));
             }
             &Inst::Extend {
@@ -2494,6 +2668,8 @@ impl MachInstEmit for Inst {
                 from_bits: 1,
                 to_bits,
             } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
                 assert!(to_bits <= 64);
                 // Reduce zero-extend-from-1-bit to:
                 // - and rd, rn, #1
@@ -2507,7 +2683,7 @@ impl MachInstEmit for Inst {
                     rn,
                     imml,
                 }
-                .emit(sink, emit_info, state);
+                .emit(&[], sink, emit_info, state);
             }
             &Inst::Extend {
                 rd,
@@ -2516,8 +2692,10 @@ impl MachInstEmit for Inst {
                 from_bits: 32,
                 to_bits: 64,
             } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
                 let mov = Inst::Mov32 { rd, rm: rn };
-                mov.emit(sink, emit_info, state);
+                mov.emit(&[], sink, emit_info, state);
             }
             &Inst::Extend {
                 rd,
@@ -2526,6 +2704,8 @@ impl MachInstEmit for Inst {
                 from_bits,
                 to_bits,
             } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
                 let (opc, size) = if signed {
                     (0b00, OperandSize::from_bits(to_bits))
                 } else {
@@ -2543,7 +2723,7 @@ impl MachInstEmit for Inst {
                 // Emit the jump itself.
                 sink.put4(enc_jump26(0b000101, dest.as_offset26_or_zero()));
             }
-            &Inst::Ret => {
+            &Inst::Ret { .. } => {
                 sink.put4(0xd65f03c0);
             }
             &Inst::EpiloguePlaceholder => {
@@ -2564,7 +2744,8 @@ impl MachInstEmit for Inst {
                 if let Some(s) = state.take_stack_map() {
                     sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s);
                 }
-                sink.put4(0b1101011_0001_11111_000000_00000_00000 | (machreg_to_gpr(info.rn) << 5));
+                let rn = allocs.next(info.rn);
+                sink.put4(0b1101011_0001_11111_000000_00000_00000 | (machreg_to_gpr(rn) << 5));
                 let loc = state.cur_srcloc();
                 if info.opcode.is_call() {
                     sink.add_call_site(loc, info.opcode);
@@ -2579,10 +2760,12 @@ impl MachInstEmit for Inst {
                 let cond_off = sink.cur_offset();
                 if let Some(l) = taken.as_label() {
                     sink.use_label_at_offset(cond_off, l, LabelUse::Branch19);
-                    let inverted = enc_conditional_br(taken, kind.invert()).to_le_bytes();
+                    let mut allocs_inv = allocs.clone();
+                    let inverted =
+                        enc_conditional_br(taken, kind.invert(), &mut allocs_inv).to_le_bytes();
                     sink.add_cond_branch(cond_off, cond_off + 4, l, &inverted[..]);
                 }
-                sink.put4(enc_conditional_br(taken, kind));
+                sink.put4(enc_conditional_br(taken, kind, &mut allocs));
 
                 // Unconditional part next.
                 let uncond_off = sink.cur_offset();
@@ -2599,15 +2782,17 @@ impl MachInstEmit for Inst {
                 sink.put4(enc_conditional_br(
                     BranchTarget::Label(label),
                     kind.invert(),
+                    &mut allocs,
                 ));
                 sink.use_label_at_offset(off, label, LabelUse::Branch19);
                 // udf
                 let trap = Inst::Udf { trap_code };
-                trap.emit(sink, emit_info, state);
+                trap.emit(&[], sink, emit_info, state);
                 // LABEL:
                 sink.bind_label(label);
             }
             &Inst::IndirectBr { rn, .. } => {
+                let rn = allocs.next(rn);
                 sink.put4(enc_br(rn));
             }
             &Inst::Nop0 => {}
@@ -2626,6 +2811,7 @@ impl MachInstEmit for Inst {
                 sink.put4(0xd4a00000);
             }
             &Inst::Adr { rd, off } => {
+                let rd = allocs.next_writable(rd);
                 assert!(off > -(1 << 20));
                 assert!(off < (1 << 20));
                 sink.put4(enc_adr(off, rd));
@@ -2643,12 +2829,19 @@ impl MachInstEmit for Inst {
                 ref info,
                 ..
             } => {
+                let ridx = allocs.next(ridx);
+                let rtmp1 = allocs.next_writable(rtmp1);
+                let rtmp2 = allocs.next_writable(rtmp2);
                 // This sequence is *one* instruction in the vcode, and is expanded only here at
                 // emission time, because we cannot allow the regalloc to insert spills/reloads in
                 // the middle; we depend on hardcoded PC-rel addressing below.
 
                 // Branch to default when condition code from prior comparison indicates.
-                let br = enc_conditional_br(info.default_target, CondBrKind::Cond(Cond::Hs));
+                let br = enc_conditional_br(
+                    info.default_target,
+                    CondBrKind::Cond(Cond::Hs),
+                    &mut AllocationConsumer::default(),
+                );
                 // No need to inform the sink's branch folding logic about this branch, because it
                 // will not be merged with any other branch, flipped, or elided (it is not preceded
                 // or succeeded by any other branch). Just emit it with the label use.
@@ -2661,10 +2854,10 @@ impl MachInstEmit for Inst {
                 // Save index in a tmp (the live range of ridx only goes to start of this
                 // sequence; rtmp1 or rtmp2 may overwrite it).
                 let inst = Inst::gen_move(rtmp2, ridx, I64);
-                inst.emit(sink, emit_info, state);
+                inst.emit(&[], sink, emit_info, state);
                 // Load address of jump table
                 let inst = Inst::Adr { rd: rtmp1, off: 16 };
-                inst.emit(sink, emit_info, state);
+                inst.emit(&[], sink, emit_info, state);
                 // Load value out of jump table
                 let inst = Inst::SLoad32 {
                     rd: rtmp2,
@@ -2676,7 +2869,7 @@ impl MachInstEmit for Inst {
                     ),
                     flags: MemFlags::trusted(),
                 };
-                inst.emit(sink, emit_info, state);
+                inst.emit(&[], sink, emit_info, state);
                 // Add base of jump table to jump-table-sourced block offset
                 let inst = Inst::AluRRR {
                     alu_op: ALUOp::Add,
@@ -2685,14 +2878,14 @@ impl MachInstEmit for Inst {
                     rn: rtmp1.to_reg(),
                     rm: rtmp2.to_reg(),
                 };
-                inst.emit(sink, emit_info, state);
+                inst.emit(&[], sink, emit_info, state);
                 // Branch to computed address. (`targets` here is only used for successor queries
                 // and is not needed for emission.)
                 let inst = Inst::IndirectBr {
                     rn: rtmp1.to_reg(),
                     targets: vec![],
                 };
-                inst.emit(sink, emit_info, state);
+                inst.emit(&[], sink, emit_info, state);
                 // Emit jump table (table of 32-bit offsets).
                 let jt_off = sink.cur_offset();
                 for &target in info.targets.iter() {
@@ -2719,16 +2912,17 @@ impl MachInstEmit for Inst {
                 ref name,
                 offset,
             } => {
+                let rd = allocs.next_writable(rd);
                 let inst = Inst::ULoad64 {
                     rd,
                     mem: AMode::Label(MemLabel::PCRel(8)),
                     flags: MemFlags::trusted(),
                 };
-                inst.emit(sink, emit_info, state);
+                inst.emit(&[], sink, emit_info, state);
                 let inst = Inst::Jump {
                     dest: BranchTarget::ResolvedOffset(12),
                 };
-                inst.emit(sink, emit_info, state);
+                inst.emit(&[], sink, emit_info, state);
                 let srcloc = state.cur_srcloc();
                 sink.add_reloc(srcloc, Reloc::Abs8, name, offset);
                 if emit_info.0.emit_all_ones_funcaddrs() {
@@ -2738,15 +2932,24 @@ impl MachInstEmit for Inst {
                 }
             }
             &Inst::LoadAddr { rd, ref mem } => {
-                let (mem_insts, mem) = mem_finalize(sink.cur_offset(), mem, state);
+                let rd = allocs.next_writable(rd);
+                let mem = mem.with_allocs(&mut allocs);
+                let (mem_insts, mem) = mem_finalize(sink.cur_offset(), &mem, state);
                 for inst in mem_insts.into_iter() {
-                    inst.emit(sink, emit_info, state);
+                    inst.emit(&[], sink, emit_info, state);
                 }
 
                 let (reg, index_reg, offset) = match mem {
-                    AMode::RegExtended(r, idx, extendop) => (r, Some((idx, extendop)), 0),
-                    AMode::Unscaled(r, simm9) => (r, None, simm9.value()),
+                    AMode::RegExtended(r, idx, extendop) => {
+                        let r = allocs.next(r);
+                        (r, Some((idx, extendop)), 0)
+                    }
+                    AMode::Unscaled(r, simm9) => {
+                        let r = allocs.next(r);
+                        (r, None, simm9.value())
+                    }
                     AMode::UnsignedOffset(r, uimm12scaled) => {
+                        let r = allocs.next(r);
                         (r, None, uimm12scaled.value() as i32)
                     }
                     _ => panic!("Unsupported case for LoadAddr: {:?}", mem),
@@ -2768,12 +2971,12 @@ impl MachInstEmit for Inst {
                         extendop,
                     };
 
-                    add.emit(sink, emit_info, state);
+                    add.emit(&[], sink, emit_info, state);
                 } else if offset == 0 {
                     if reg != rd.to_reg() {
                         let mov = Inst::Mov64 { rd, rm: reg };
 
-                        mov.emit(sink, emit_info, state);
+                        mov.emit(&[], sink, emit_info, state);
                     }
                 } else if let Some(imm12) = Imm12::maybe_from_u64(abs_offset) {
                     let add = Inst::AluRRImm12 {
@@ -2783,7 +2986,7 @@ impl MachInstEmit for Inst {
                         rn: reg,
                         imm12,
                     };
-                    add.emit(sink, emit_info, state);
+                    add.emit(&[], sink, emit_info, state);
                 } else {
                     // Use `tmp2` here: `reg` may be `spilltmp` if the `AMode` on this instruction
                     // was initially an `SPOffset`. Assert that `tmp2` is truly free to use. Note
@@ -2794,7 +2997,7 @@ impl MachInstEmit for Inst {
                     debug_assert!(reg != tmp2_reg());
                     let tmp = writable_tmp2_reg();
                     for insn in Inst::load_constant(tmp, abs_offset).into_iter() {
-                        insn.emit(sink, emit_info, state);
+                        insn.emit(&[], sink, emit_info, state);
                     }
                     let add = Inst::AluRRR {
                         alu_op,
@@ -2803,7 +3006,7 @@ impl MachInstEmit for Inst {
                         rn: reg,
                         rm: tmp.to_reg(),
                     };
-                    add.emit(sink, emit_info, state);
+                    add.emit(&[], sink, emit_info, state);
                 }
             }
             &Inst::VirtualSPOffsetAdj { offset } => {
@@ -2820,7 +3023,7 @@ impl MachInstEmit for Inst {
                     let jmp = Inst::Jump {
                         dest: BranchTarget::Label(jump_around_label),
                     };
-                    jmp.emit(sink, emit_info, state);
+                    jmp.emit(&[], sink, emit_info, state);
                     sink.emit_island(needed_space + 4);
                     sink.bind_label(jump_around_label);
                 }
@@ -2851,13 +3054,11 @@ impl MachInstEmit for Inst {
                 sink.put4(0xd503201f);
             }
 
-            &Inst::ValueLabelMarker { .. } => {
-                // Nothing; this is only used to compute debug info.
-            }
-
             &Inst::Unwind { ref inst } => {
                 sink.add_unwind(inst.clone());
             }
+
+            &Inst::DummyUse { .. } => {}
         }
 
         let end_off = sink.cur_offset();
@@ -2866,7 +3067,8 @@ impl MachInstEmit for Inst {
         state.clear_post_insn();
     }
 
-    fn pretty_print(&self, mb_rru: Option<&RealRegUniverse>, state: &mut EmitState) -> String {
-        self.print_with_state(mb_rru, state)
+    fn pretty_print_inst(&self, allocs: &[Allocation], state: &mut Self::State) -> String {
+        let mut allocs = AllocationConsumer::new(allocs);
+        self.print_with_state(state, &mut allocs)
     }
 }
diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
index 9cc917192c..918ca3ffaf 100644
--- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
@@ -27,7 +27,7 @@ fn test_aarch64_binemit() {
     // Then:
     //
     //      $ echo "mov x1, x2" | aarch64inst.sh
-    insns.push((Inst::Ret, "C0035FD6", "ret"));
+    insns.push((Inst::Ret { rets: vec![] }, "C0035FD6", "ret"));
     insns.push((Inst::Nop0, "", "nop-zero-len"));
     insns.push((Inst::Nop4, "1F2003D5", "nop"));
     insns.push((
@@ -1631,7 +1631,7 @@ fn test_aarch64_binemit() {
             flags: MemFlags::trusted(),
         },
         "E18040F8",
-        "ldur x1, [x7, #8]",
+        "ldr x1, [x7, #8]",
     ));
 
     insns.push((
@@ -6794,7 +6794,6 @@ fn test_aarch64_binemit() {
     insns.push((Inst::Fence {}, "BF3B03D5", "dmb ish"));
 
     let flags = settings::Flags::new(settings::builder());
-    let rru = create_reg_universe(&flags);
     let emit_info = EmitInfo::new(flags);
     for (insn, expected_encoding, expected_printing) in insns {
         println!(
@@ -6803,11 +6802,12 @@ fn test_aarch64_binemit() {
         );
 
         // Check the printed text is as expected.
-        let actual_printing = insn.show_rru(Some(&rru));
+        let actual_printing =
+            insn.print_with_state(&mut EmitState::default(), &mut AllocationConsumer::new(&[]));
         assert_eq!(expected_printing, actual_printing);
 
         let mut buffer = MachBuffer::new();
-        insn.emit(&mut buffer, &emit_info, &mut Default::default());
+        insn.emit(&[], &mut buffer, &emit_info, &mut Default::default());
         let buffer = buffer.finish();
         let actual_encoding = &buffer.stringify_code_bytes();
         assert_eq!(expected_encoding, actual_encoding);
diff --git a/cranelift/codegen/src/isa/aarch64/inst/imms.rs b/cranelift/codegen/src/isa/aarch64/inst/imms.rs
index beed2f40de..c6a6fed300 100644
--- a/cranelift/codegen/src/isa/aarch64/inst/imms.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/imms.rs
@@ -5,8 +5,7 @@
 use crate::ir::types::*;
 use crate::ir::Type;
 use crate::isa::aarch64::inst::{OperandSize, ScalarSize};
-
-use regalloc::{PrettyPrint, RealRegUniverse};
+use crate::machinst::{AllocationConsumer, PrettyPrint};
 
 use core::convert::TryFrom;
 use std::string::String;
@@ -871,7 +870,7 @@ impl ASIMDFPModImm {
 }
 
 impl PrettyPrint for NZCV {
-    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+    fn pretty_print(&self, _: u8, _: &mut AllocationConsumer<'_>) -> String {
         let fmt = |c: char, v| if v { c.to_ascii_uppercase() } else { c };
         format!(
             "#{}{}{}{}",
@@ -884,13 +883,13 @@ impl PrettyPrint for NZCV {
 }
 
 impl PrettyPrint for UImm5 {
-    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+    fn pretty_print(&self, _: u8, _: &mut AllocationConsumer<'_>) -> String {
         format!("#{}", self.value)
     }
 }
 
 impl PrettyPrint for Imm12 {
-    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+    fn pretty_print(&self, _: u8, _: &mut AllocationConsumer<'_>) -> String {
         let shift = if self.shift12 { 12 } else { 0 };
         let value = u32::from(self.bits) << shift;
         format!("#{}", value)
@@ -898,49 +897,49 @@ impl PrettyPrint for Imm12 {
 }
 
 impl PrettyPrint for SImm7Scaled {
-    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+    fn pretty_print(&self, _: u8, _: &mut AllocationConsumer<'_>) -> String {
         format!("#{}", self.value)
     }
 }
 
 impl PrettyPrint for FPULeftShiftImm {
-    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+    fn pretty_print(&self, _: u8, _: &mut AllocationConsumer<'_>) -> String {
         format!("#{}", self.amount)
     }
 }
 
 impl PrettyPrint for FPURightShiftImm {
-    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+    fn pretty_print(&self, _: u8, _: &mut AllocationConsumer<'_>) -> String {
         format!("#{}", self.amount)
     }
 }
 
 impl PrettyPrint for SImm9 {
-    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+    fn pretty_print(&self, _: u8, _: &mut AllocationConsumer<'_>) -> String {
         format!("#{}", self.value)
     }
 }
 
 impl PrettyPrint for UImm12Scaled {
-    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+    fn pretty_print(&self, _: u8, _: &mut AllocationConsumer<'_>) -> String {
         format!("#{}", self.value)
     }
 }
 
 impl PrettyPrint for ImmLogic {
-    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+    fn pretty_print(&self, _: u8, _: &mut AllocationConsumer<'_>) -> String {
         format!("#{}", self.value())
     }
 }
 
 impl PrettyPrint for ImmShift {
-    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+    fn pretty_print(&self, _: u8, _: &mut AllocationConsumer<'_>) -> String {
         format!("#{}", self.imm)
     }
 }
 
 impl PrettyPrint for MoveWideConst {
-    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+    fn pretty_print(&self, _: u8, _: &mut AllocationConsumer<'_>) -> String {
         if self.shift == 0 {
             format!("#{}", self.bits)
         } else {
@@ -950,7 +949,7 @@ impl PrettyPrint for MoveWideConst {
 }
 
 impl PrettyPrint for ASIMDMovModImm {
-    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+    fn pretty_print(&self, _: u8, _: &mut AllocationConsumer<'_>) -> String {
         if self.is_64bit {
             debug_assert_eq!(self.shift, 0);
 
@@ -974,7 +973,7 @@ impl PrettyPrint for ASIMDMovModImm {
 }
 
 impl PrettyPrint for ASIMDFPModImm {
-    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+    fn pretty_print(&self, _: u8, _: &mut AllocationConsumer<'_>) -> String {
         if self.is_64bit {
             format!("#{}", f64::from_bits(Self::value64(self.imm)))
         } else {
diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs
index a0af36d6cb..18d70e527d 100644
--- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs
@@ -7,16 +7,16 @@ use crate::binemit::{Addend, CodeOffset, Reloc};
 use crate::ir::types::{
     B1, B128, B16, B32, B64, B8, F32, F64, FFLAGS, I128, I16, I32, I64, I8, I8X16, IFLAGS, R32, R64,
 };
-use crate::ir::{ExternalName, MemFlags, Opcode, SourceLoc, Type, ValueLabel};
+use crate::ir::{types, ExternalName, MemFlags, Opcode, SourceLoc, Type};
 use crate::isa::CallConv;
 use crate::machinst::*;
 use crate::{settings, CodegenError, CodegenResult};
 
-use regalloc::RegUsageCollector;
-use regalloc::{PrettyPrint, RealRegUniverse, Reg, RegClass, SpillSlot, VirtualReg, Writable};
+use crate::machinst::{PrettyPrint, Reg, RegClass, Writable};
 
 use alloc::vec::Vec;
 use core::convert::TryFrom;
+use regalloc2::VReg;
 use smallvec::{smallvec, SmallVec};
 use std::string::{String, ToString};
 
@@ -531,83 +531,84 @@ impl Inst {
 //=============================================================================
 // Instructions: get_regs
 
-fn memarg_regs(memarg: &AMode, collector: &mut RegUsageCollector) {
+fn memarg_operands<F: Fn(VReg) -> VReg>(memarg: &AMode, collector: &mut OperandCollector<'_, F>) {
+    // This should match `AMode::with_allocs()`.
     match memarg {
         &AMode::Unscaled(reg, ..) | &AMode::UnsignedOffset(reg, ..) => {
-            collector.add_use(reg);
+            collector.reg_use(reg);
         }
         &AMode::RegReg(r1, r2, ..)
         | &AMode::RegScaled(r1, r2, ..)
         | &AMode::RegScaledExtended(r1, r2, ..)
         | &AMode::RegExtended(r1, r2, ..) => {
-            collector.add_use(r1);
-            collector.add_use(r2);
+            collector.reg_use(r1);
+            collector.reg_use(r2);
         }
         &AMode::Label(..) => {}
         &AMode::PreIndexed(reg, ..) | &AMode::PostIndexed(reg, ..) => {
-            collector.add_mod(reg);
-        }
-        &AMode::FPOffset(..) => {
-            collector.add_use(fp_reg());
-        }
-        &AMode::SPOffset(..) | &AMode::NominalSPOffset(..) => {
-            collector.add_use(stack_reg());
+            collector.reg_mod(reg);
         }
+        &AMode::FPOffset(..) => {}
+        &AMode::SPOffset(..) | &AMode::NominalSPOffset(..) => {}
         &AMode::RegOffset(r, ..) => {
-            collector.add_use(r);
+            collector.reg_use(r);
         }
     }
 }
 
-fn pairmemarg_regs(pairmemarg: &PairAMode, collector: &mut RegUsageCollector) {
+fn pairmemarg_operands<F: Fn(VReg) -> VReg>(
+    pairmemarg: &PairAMode,
+    collector: &mut OperandCollector<'_, F>,
+) {
+    // This should match `PairAMode::with_allocs()`.
     match pairmemarg {
         &PairAMode::SignedOffset(reg, ..) => {
-            collector.add_use(reg);
+            collector.reg_use(reg);
         }
         &PairAMode::PreIndexed(reg, ..) | &PairAMode::PostIndexed(reg, ..) => {
-            collector.add_mod(reg);
+            collector.reg_mod(reg);
         }
     }
 }
 
-fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
+fn aarch64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCollector<'_, F>) {
     match inst {
         &Inst::AluRRR { rd, rn, rm, .. } => {
-            collector.add_def(rd);
-            collector.add_use(rn);
-            collector.add_use(rm);
+            collector.reg_def(rd);
+            collector.reg_use(rn);
+            collector.reg_use(rm);
         }
         &Inst::AluRRRR { rd, rn, rm, ra, .. } => {
-            collector.add_def(rd);
-            collector.add_use(rn);
-            collector.add_use(rm);
-            collector.add_use(ra);
+            collector.reg_def(rd);
+            collector.reg_use(rn);
+            collector.reg_use(rm);
+            collector.reg_use(ra);
         }
         &Inst::AluRRImm12 { rd, rn, .. } => {
-            collector.add_def(rd);
-            collector.add_use(rn);
+            collector.reg_def(rd);
+            collector.reg_use(rn);
         }
         &Inst::AluRRImmLogic { rd, rn, .. } => {
-            collector.add_def(rd);
-            collector.add_use(rn);
+            collector.reg_def(rd);
+            collector.reg_use(rn);
         }
         &Inst::AluRRImmShift { rd, rn, .. } => {
-            collector.add_def(rd);
-            collector.add_use(rn);
+            collector.reg_def(rd);
+            collector.reg_use(rn);
         }
         &Inst::AluRRRShift { rd, rn, rm, .. } => {
-            collector.add_def(rd);
-            collector.add_use(rn);
-            collector.add_use(rm);
+            collector.reg_def(rd);
+            collector.reg_use(rn);
+            collector.reg_use(rm);
         }
         &Inst::AluRRRExtend { rd, rn, rm, .. } => {
-            collector.add_def(rd);
-            collector.add_use(rn);
-            collector.add_use(rm);
+            collector.reg_def(rd);
+            collector.reg_use(rn);
+            collector.reg_use(rm);
         }
         &Inst::BitRR { rd, rn, .. } => {
-            collector.add_def(rd);
-            collector.add_use(rn);
+            collector.reg_def(rd);
+            collector.reg_use(rn);
         }
         &Inst::ULoad8 { rd, ref mem, .. }
         | &Inst::SLoad8 { rd, ref mem, .. }
@@ -616,145 +617,143 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
         | &Inst::ULoad32 { rd, ref mem, .. }
         | &Inst::SLoad32 { rd, ref mem, .. }
         | &Inst::ULoad64 { rd, ref mem, .. } => {
-            collector.add_def(rd);
-            memarg_regs(mem, collector);
+            collector.reg_def(rd);
+            memarg_operands(mem, collector);
         }
         &Inst::Store8 { rd, ref mem, .. }
         | &Inst::Store16 { rd, ref mem, .. }
         | &Inst::Store32 { rd, ref mem, .. }
         | &Inst::Store64 { rd, ref mem, .. } => {
-            collector.add_use(rd);
-            memarg_regs(mem, collector);
+            collector.reg_use(rd);
+            memarg_operands(mem, collector);
         }
         &Inst::StoreP64 {
             rt, rt2, ref mem, ..
         } => {
-            collector.add_use(rt);
-            collector.add_use(rt2);
-            pairmemarg_regs(mem, collector);
+            collector.reg_use(rt);
+            collector.reg_use(rt2);
+            pairmemarg_operands(mem, collector);
         }
         &Inst::LoadP64 {
             rt, rt2, ref mem, ..
         } => {
-            collector.add_def(rt);
-            collector.add_def(rt2);
-            pairmemarg_regs(mem, collector);
+            collector.reg_def(rt);
+            collector.reg_def(rt2);
+            pairmemarg_operands(mem, collector);
         }
         &Inst::Mov64 { rd, rm } => {
-            collector.add_def(rd);
-            collector.add_use(rm);
+            collector.reg_def(rd);
+            collector.reg_use(rm);
         }
         &Inst::Mov32 { rd, rm } => {
-            collector.add_def(rd);
-            collector.add_use(rm);
+            collector.reg_def(rd);
+            collector.reg_use(rm);
         }
         &Inst::MovZ { rd, .. } | &Inst::MovN { rd, .. } => {
-            collector.add_def(rd);
+            collector.reg_def(rd);
         }
         &Inst::MovK { rd, .. } => {
-            collector.add_mod(rd);
+            collector.reg_mod(rd);
         }
         &Inst::CSel { rd, rn, rm, .. } => {
-            collector.add_def(rd);
-            collector.add_use(rn);
-            collector.add_use(rm);
+            collector.reg_def(rd);
+            collector.reg_use(rn);
+            collector.reg_use(rm);
         }
         &Inst::CSet { rd, .. } | &Inst::CSetm { rd, .. } => {
-            collector.add_def(rd);
+            collector.reg_def(rd);
         }
         &Inst::CCmpImm { rn, .. } => {
-            collector.add_use(rn);
+            collector.reg_use(rn);
         }
-        &Inst::AtomicRMWLoop { op, .. } => {
-            collector.add_use(xreg(25));
-            collector.add_use(xreg(26));
-            collector.add_def(writable_xreg(24));
-            collector.add_def(writable_xreg(27));
-            if op != AtomicRmwOp::Xchg {
-                collector.add_def(writable_xreg(28));
-            }
+        &Inst::AtomicRMWLoop { .. } => {
+            collector.reg_use(xreg(25));
+            collector.reg_use(xreg(26));
+            collector.reg_def(writable_xreg(24));
+            collector.reg_def(writable_xreg(27));
+            collector.reg_def(writable_xreg(28));
         }
         &Inst::AtomicRMW { rs, rt, rn, .. } => {
-            collector.add_use(rs);
-            collector.add_def(rt);
-            collector.add_use(rn);
+            collector.reg_use(rs);
+            collector.reg_def(rt);
+            collector.reg_use(rn);
         }
         &Inst::AtomicCAS { rs, rt, rn, .. } => {
-            collector.add_mod(rs);
-            collector.add_use(rt);
-            collector.add_use(rn);
+            collector.reg_mod(rs);
+            collector.reg_use(rt);
+            collector.reg_use(rn);
         }
         &Inst::AtomicCASLoop { .. } => {
-            collector.add_use(xreg(25));
-            collector.add_use(xreg(26));
-            collector.add_use(xreg(28));
-            collector.add_def(writable_xreg(24));
-            collector.add_def(writable_xreg(27));
+            collector.reg_use(xreg(25));
+            collector.reg_use(xreg(26));
+            collector.reg_use(xreg(28));
+            collector.reg_def(writable_xreg(24));
+            collector.reg_def(writable_xreg(27));
         }
         &Inst::LoadAcquire { rt, rn, .. } => {
-            collector.add_use(rn);
-            collector.add_def(rt);
+            collector.reg_use(rn);
+            collector.reg_def(rt);
         }
         &Inst::StoreRelease { rt, rn, .. } => {
-            collector.add_use(rn);
-            collector.add_use(rt);
+            collector.reg_use(rn);
+            collector.reg_use(rt);
         }
         &Inst::Fence {} => {}
         &Inst::FpuMove64 { rd, rn } => {
-            collector.add_def(rd);
-            collector.add_use(rn);
+            collector.reg_def(rd);
+            collector.reg_use(rn);
         }
         &Inst::FpuMove128 { rd, rn } => {
-            collector.add_def(rd);
-            collector.add_use(rn);
+            collector.reg_def(rd);
+            collector.reg_use(rn);
         }
         &Inst::FpuMoveFromVec { rd, rn, .. } => {
-            collector.add_def(rd);
-            collector.add_use(rn);
+            collector.reg_def(rd);
+            collector.reg_use(rn);
         }
         &Inst::FpuExtend { rd, rn, .. } => {
-            collector.add_def(rd);
-            collector.add_use(rn);
+            collector.reg_def(rd);
+            collector.reg_use(rn);
         }
         &Inst::FpuRR { rd, rn, .. } => {
-            collector.add_def(rd);
-            collector.add_use(rn);
+            collector.reg_def(rd);
+            collector.reg_use(rn);
         }
         &Inst::FpuRRR { rd, rn, rm, .. } => {
-            collector.add_def(rd);
-            collector.add_use(rn);
-            collector.add_use(rm);
+            collector.reg_def(rd);
+            collector.reg_use(rn);
+            collector.reg_use(rm);
         }
         &Inst::FpuRRI { fpu_op, rd, rn, .. } => {
             match fpu_op {
-                FPUOpRI::UShr32(..) | FPUOpRI::UShr64(..) => collector.add_def(rd),
-                FPUOpRI::Sli32(..) | FPUOpRI::Sli64(..) => collector.add_mod(rd),
+                FPUOpRI::UShr32(..) | FPUOpRI::UShr64(..) => collector.reg_def(rd),
+                FPUOpRI::Sli32(..) | FPUOpRI::Sli64(..) => collector.reg_mod(rd),
             }
-            collector.add_use(rn);
+            collector.reg_use(rn);
         }
         &Inst::FpuRRRR { rd, rn, rm, ra, .. } => {
-            collector.add_def(rd);
-            collector.add_use(rn);
-            collector.add_use(rm);
-            collector.add_use(ra);
+            collector.reg_def(rd);
+            collector.reg_use(rn);
+            collector.reg_use(rm);
+            collector.reg_use(ra);
         }
         &Inst::VecMisc { rd, rn, .. } => {
-            collector.add_def(rd);
-            collector.add_use(rn);
+            collector.reg_def(rd);
+            collector.reg_use(rn);
         }
 
         &Inst::VecLanes { rd, rn, .. } => {
-            collector.add_def(rd);
-            collector.add_use(rn);
+            collector.reg_def(rd);
+            collector.reg_use(rn);
         }
         &Inst::VecShiftImm { rd, rn, .. } => {
-            collector.add_def(rd);
-            collector.add_use(rn);
+            collector.reg_def(rd);
+            collector.reg_use(rn);
         }
         &Inst::VecExtract { rd, rn, rm, .. } => {
-            collector.add_def(rd);
-            collector.add_use(rn);
-            collector.add_use(rm);
+            collector.reg_def(rd);
+            collector.reg_use(rn);
+            collector.reg_use(rm);
         }
         &Inst::VecTbl {
             rd,
@@ -762,13 +761,13 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
             rm,
             is_extension,
         } => {
-            collector.add_use(rn);
-            collector.add_use(rm);
+            collector.reg_use(rn);
+            collector.reg_use(rm);
 
             if is_extension {
-                collector.add_mod(rd);
+                collector.reg_mod(rd);
             } else {
-                collector.add_def(rd);
+                collector.reg_def(rd);
             }
         }
         &Inst::VecTbl2 {
@@ -778,1089 +777,258 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
             rm,
             is_extension,
         } => {
-            collector.add_use(rn);
-            collector.add_use(rn2);
-            collector.add_use(rm);
+            collector.reg_use(rn);
+            collector.reg_use(rn2);
+            collector.reg_use(rm);
 
             if is_extension {
-                collector.add_mod(rd);
+                collector.reg_mod(rd);
             } else {
-                collector.add_def(rd);
+                collector.reg_def(rd);
             }
         }
         &Inst::VecLoadReplicate { rd, rn, .. } => {
-            collector.add_def(rd);
-            collector.add_use(rn);
+            collector.reg_def(rd);
+            collector.reg_use(rn);
         }
         &Inst::VecCSel { rd, rn, rm, .. } => {
-            collector.add_def(rd);
-            collector.add_use(rn);
-            collector.add_use(rm);
+            collector.reg_def(rd);
+            collector.reg_use(rn);
+            collector.reg_use(rm);
         }
         &Inst::FpuCmp32 { rn, rm } | &Inst::FpuCmp64 { rn, rm } => {
-            collector.add_use(rn);
-            collector.add_use(rm);
+            collector.reg_use(rn);
+            collector.reg_use(rm);
         }
         &Inst::FpuLoad32 { rd, ref mem, .. } => {
-            collector.add_def(rd);
-            memarg_regs(mem, collector);
+            collector.reg_def(rd);
+            memarg_operands(mem, collector);
         }
         &Inst::FpuLoad64 { rd, ref mem, .. } => {
-            collector.add_def(rd);
-            memarg_regs(mem, collector);
+            collector.reg_def(rd);
+            memarg_operands(mem, collector);
         }
         &Inst::FpuLoad128 { rd, ref mem, .. } => {
-            collector.add_def(rd);
-            memarg_regs(mem, collector);
+            collector.reg_def(rd);
+            memarg_operands(mem, collector);
         }
         &Inst::FpuStore32 { rd, ref mem, .. } => {
-            collector.add_use(rd);
-            memarg_regs(mem, collector);
+            collector.reg_use(rd);
+            memarg_operands(mem, collector);
         }
         &Inst::FpuStore64 { rd, ref mem, .. } => {
-            collector.add_use(rd);
-            memarg_regs(mem, collector);
+            collector.reg_use(rd);
+            memarg_operands(mem, collector);
         }
         &Inst::FpuStore128 { rd, ref mem, .. } => {
-            collector.add_use(rd);
-            memarg_regs(mem, collector);
+            collector.reg_use(rd);
+            memarg_operands(mem, collector);
         }
         &Inst::FpuLoadP64 {
             rt, rt2, ref mem, ..
         } => {
-            collector.add_def(rt);
-            collector.add_def(rt2);
-            pairmemarg_regs(mem, collector);
+            collector.reg_def(rt);
+            collector.reg_def(rt2);
+            pairmemarg_operands(mem, collector);
         }
         &Inst::FpuStoreP64 {
             rt, rt2, ref mem, ..
         } => {
-            collector.add_use(rt);
-            collector.add_use(rt2);
-            pairmemarg_regs(mem, collector);
+            collector.reg_use(rt);
+            collector.reg_use(rt2);
+            pairmemarg_operands(mem, collector);
         }
         &Inst::FpuLoadP128 {
             rt, rt2, ref mem, ..
         } => {
-            collector.add_def(rt);
-            collector.add_def(rt2);
-            pairmemarg_regs(mem, collector);
+            collector.reg_def(rt);
+            collector.reg_def(rt2);
+            pairmemarg_operands(mem, collector);
         }
         &Inst::FpuStoreP128 {
             rt, rt2, ref mem, ..
         } => {
-            collector.add_use(rt);
-            collector.add_use(rt2);
-            pairmemarg_regs(mem, collector);
+            collector.reg_use(rt);
+            collector.reg_use(rt2);
+            pairmemarg_operands(mem, collector);
         }
         &Inst::LoadFpuConst64 { rd, .. } | &Inst::LoadFpuConst128 { rd, .. } => {
-            collector.add_def(rd);
+            collector.reg_def(rd);
         }
         &Inst::FpuToInt { rd, rn, .. } => {
-            collector.add_def(rd);
-            collector.add_use(rn);
+            collector.reg_def(rd);
+            collector.reg_use(rn);
         }
         &Inst::IntToFpu { rd, rn, .. } => {
-            collector.add_def(rd);
-            collector.add_use(rn);
+            collector.reg_def(rd);
+            collector.reg_use(rn);
         }
         &Inst::FpuCSel32 { rd, rn, rm, .. } | &Inst::FpuCSel64 { rd, rn, rm, .. } => {
-            collector.add_def(rd);
-            collector.add_use(rn);
-            collector.add_use(rm);
+            collector.reg_def(rd);
+            collector.reg_use(rn);
+            collector.reg_use(rm);
         }
         &Inst::FpuRound { rd, rn, .. } => {
-            collector.add_def(rd);
-            collector.add_use(rn);
+            collector.reg_def(rd);
+            collector.reg_use(rn);
         }
         &Inst::MovToFpu { rd, rn, .. } => {
-            collector.add_def(rd);
-            collector.add_use(rn);
+            collector.reg_def(rd);
+            collector.reg_use(rn);
         }
         &Inst::FpuMoveFPImm { rd, .. } => {
-            collector.add_def(rd);
+            collector.reg_def(rd);
         }
         &Inst::MovToVec { rd, rn, .. } => {
-            collector.add_mod(rd);
-            collector.add_use(rn);
+            collector.reg_mod(rd);
+            collector.reg_use(rn);
         }
         &Inst::MovFromVec { rd, rn, .. } | &Inst::MovFromVecSigned { rd, rn, .. } => {
-            collector.add_def(rd);
-            collector.add_use(rn);
+            collector.reg_def(rd);
+            collector.reg_use(rn);
         }
         &Inst::VecDup { rd, rn, .. } => {
-            collector.add_def(rd);
-            collector.add_use(rn);
+            collector.reg_def(rd);
+            collector.reg_use(rn);
         }
         &Inst::VecDupFromFpu { rd, rn, .. } => {
-            collector.add_def(rd);
-            collector.add_use(rn);
+            collector.reg_def(rd);
+            collector.reg_use(rn);
         }
         &Inst::VecDupFPImm { rd, .. } => {
-            collector.add_def(rd);
+            collector.reg_def(rd);
         }
         &Inst::VecDupImm { rd, .. } => {
-            collector.add_def(rd);
+            collector.reg_def(rd);
         }
         &Inst::VecExtend { rd, rn, .. } => {
-            collector.add_def(rd);
-            collector.add_use(rn);
+            collector.reg_def(rd);
+            collector.reg_use(rn);
         }
         &Inst::VecMovElement { rd, rn, .. } => {
-            collector.add_mod(rd);
-            collector.add_use(rn);
+            collector.reg_mod(rd);
+            collector.reg_use(rn);
         }
         &Inst::VecRRLong { rd, rn, .. } => {
-            collector.add_def(rd);
-            collector.add_use(rn);
+            collector.reg_def(rd);
+            collector.reg_use(rn);
         }
         &Inst::VecRRNarrow {
             rd, rn, high_half, ..
         } => {
-            collector.add_use(rn);
+            collector.reg_use(rn);
 
             if high_half {
-                collector.add_mod(rd);
+                collector.reg_mod(rd);
             } else {
-                collector.add_def(rd);
+                collector.reg_def(rd);
             }
         }
         &Inst::VecRRPair { rd, rn, .. } => {
-            collector.add_def(rd);
-            collector.add_use(rn);
+            collector.reg_def(rd);
+            collector.reg_use(rn);
         }
         &Inst::VecRRRLong {
             alu_op, rd, rn, rm, ..
         } => {
             match alu_op {
                 VecRRRLongOp::Umlal8 | VecRRRLongOp::Umlal16 | VecRRRLongOp::Umlal32 => {
-                    collector.add_mod(rd)
+                    collector.reg_mod(rd)
                 }
-                _ => collector.add_def(rd),
+                _ => collector.reg_def(rd),
             };
-            collector.add_use(rn);
-            collector.add_use(rm);
+            collector.reg_use(rn);
+            collector.reg_use(rm);
         }
         &Inst::VecRRPairLong { rd, rn, .. } => {
-            collector.add_def(rd);
-            collector.add_use(rn);
+            collector.reg_def(rd);
+            collector.reg_use(rn);
         }
         &Inst::VecRRR {
             alu_op, rd, rn, rm, ..
         } => {
             if alu_op == VecALUOp::Bsl {
-                collector.add_mod(rd);
+                collector.reg_mod(rd);
             } else {
-                collector.add_def(rd);
+                collector.reg_def(rd);
             }
-            collector.add_use(rn);
-            collector.add_use(rm);
+            collector.reg_use(rn);
+            collector.reg_use(rm);
         }
         &Inst::MovToNZCV { rn } => {
-            collector.add_use(rn);
+            collector.reg_use(rn);
         }
         &Inst::MovFromNZCV { rd } => {
-            collector.add_def(rd);
+            collector.reg_def(rd);
         }
         &Inst::Extend { rd, rn, .. } => {
-            collector.add_def(rd);
-            collector.add_use(rn);
+            collector.reg_def(rd);
+            collector.reg_use(rn);
         }
-        &Inst::Jump { .. } | &Inst::Ret | &Inst::EpiloguePlaceholder => {}
+        &Inst::Ret { ref rets } => {
+            for &ret in rets {
+                collector.reg_use(ret);
+            }
+        }
+        &Inst::Jump { .. } | &Inst::EpiloguePlaceholder => {}
         &Inst::Call { ref info, .. } => {
-            collector.add_uses(&*info.uses);
-            collector.add_defs(&*info.defs);
+            collector.reg_uses(&info.uses[..]);
+            collector.reg_defs(&info.defs[..]);
         }
         &Inst::CallInd { ref info, .. } => {
-            collector.add_uses(&*info.uses);
-            collector.add_defs(&*info.defs);
-            collector.add_use(info.rn);
+            collector.reg_use(info.rn);
+            collector.reg_uses(&info.uses[..]);
+            collector.reg_defs(&info.defs[..]);
         }
         &Inst::CondBr { ref kind, .. } => match kind {
             CondBrKind::Zero(rt) | CondBrKind::NotZero(rt) => {
-                collector.add_use(*rt);
+                collector.reg_use(*rt);
             }
             CondBrKind::Cond(_) => {}
         },
         &Inst::IndirectBr { rn, .. } => {
-            collector.add_use(rn);
+            collector.reg_use(rn);
         }
         &Inst::Nop0 | Inst::Nop4 => {}
         &Inst::Brk => {}
         &Inst::Udf { .. } => {}
         &Inst::TrapIf { ref kind, .. } => match kind {
             CondBrKind::Zero(rt) | CondBrKind::NotZero(rt) => {
-                collector.add_use(*rt);
+                collector.reg_use(*rt);
             }
             CondBrKind::Cond(_) => {}
         },
         &Inst::Adr { rd, .. } => {
-            collector.add_def(rd);
+            collector.reg_def(rd);
         }
         &Inst::Word4 { .. } | &Inst::Word8 { .. } => {}
         &Inst::JTSequence {
             ridx, rtmp1, rtmp2, ..
         } => {
-            collector.add_use(ridx);
-            collector.add_def(rtmp1);
-            collector.add_def(rtmp2);
+            collector.reg_use(ridx);
+            collector.reg_early_def(rtmp1);
+            collector.reg_early_def(rtmp2);
         }
         &Inst::LoadExtName { rd, .. } => {
-            collector.add_def(rd);
+            collector.reg_def(rd);
         }
         &Inst::LoadAddr { rd, ref mem } => {
-            collector.add_def(rd);
-            memarg_regs(mem, collector);
+            collector.reg_def(rd);
+            memarg_operands(mem, collector);
         }
         &Inst::VirtualSPOffsetAdj { .. } => {}
-        &Inst::ValueLabelMarker { reg, .. } => {
-            collector.add_use(reg);
-        }
 
         &Inst::ElfTlsGetAddr { .. } => {
             for reg in AArch64MachineDeps::get_regs_clobbered_by_call(CallConv::SystemV) {
-                collector.add_def(reg);
+                collector.reg_def(reg);
             }
         }
         &Inst::Unwind { .. } => {}
         &Inst::EmitIsland { .. } => {}
-    }
-}
-
-//=============================================================================
-// Instructions: map_regs
-
-pub fn aarch64_map_regs<RM: RegMapper>(inst: &mut Inst, mapper: &RM) {
-    fn map_mem<RM: RegMapper>(m: &RM, mem: &mut AMode) {
-        // N.B.: we take only the pre-map here, but this is OK because the
-        // only addressing modes that update registers (pre/post-increment on
-        // AArch64) both read and write registers, so they are "mods" rather
-        // than "defs", so must be the same in both the pre- and post-map.
-        match mem {
-            &mut AMode::Unscaled(ref mut reg, ..) => m.map_use(reg),
-            &mut AMode::UnsignedOffset(ref mut reg, ..) => m.map_use(reg),
-            &mut AMode::RegReg(ref mut r1, ref mut r2)
-            | &mut AMode::RegScaled(ref mut r1, ref mut r2, ..)
-            | &mut AMode::RegScaledExtended(ref mut r1, ref mut r2, ..)
-            | &mut AMode::RegExtended(ref mut r1, ref mut r2, ..) => {
-                m.map_use(r1);
-                m.map_use(r2);
-            }
-            &mut AMode::Label(..) => {}
-            &mut AMode::PreIndexed(ref mut r, ..) => m.map_mod(r),
-            &mut AMode::PostIndexed(ref mut r, ..) => m.map_mod(r),
-            &mut AMode::FPOffset(..)
-            | &mut AMode::SPOffset(..)
-            | &mut AMode::NominalSPOffset(..) => {}
-            &mut AMode::RegOffset(ref mut r, ..) => m.map_use(r),
-        };
-    }
-
-    fn map_pairmem<RM: RegMapper>(m: &RM, mem: &mut PairAMode) {
-        match mem {
-            &mut PairAMode::SignedOffset(ref mut reg, ..) => m.map_use(reg),
-            &mut PairAMode::PreIndexed(ref mut reg, ..) => m.map_def(reg),
-            &mut PairAMode::PostIndexed(ref mut reg, ..) => m.map_def(reg),
+        &Inst::DummyUse { reg } => {
+            collector.reg_use(reg);
         }
     }
-
-    fn map_br<RM: RegMapper>(m: &RM, br: &mut CondBrKind) {
-        match br {
-            &mut CondBrKind::Zero(ref mut reg) => m.map_use(reg),
-            &mut CondBrKind::NotZero(ref mut reg) => m.map_use(reg),
-            &mut CondBrKind::Cond(..) => {}
-        };
-    }
-
-    match inst {
-        &mut Inst::AluRRR {
-            ref mut rd,
-            ref mut rn,
-            ref mut rm,
-            ..
-        } => {
-            mapper.map_def(rd);
-            mapper.map_use(rn);
-            mapper.map_use(rm);
-        }
-        &mut Inst::AluRRRR {
-            ref mut rd,
-            ref mut rn,
-            ref mut rm,
-            ref mut ra,
-            ..
-        } => {
-            mapper.map_def(rd);
-            mapper.map_use(rn);
-            mapper.map_use(rm);
-            mapper.map_use(ra);
-        }
-        &mut Inst::AluRRImm12 {
-            ref mut rd,
-            ref mut rn,
-            ..
-        } => {
-            mapper.map_def(rd);
-            mapper.map_use(rn);
-        }
-        &mut Inst::AluRRImmLogic {
-            ref mut rd,
-            ref mut rn,
-            ..
-        } => {
-            mapper.map_def(rd);
-            mapper.map_use(rn);
-        }
-        &mut Inst::AluRRImmShift {
-            ref mut rd,
-            ref mut rn,
-            ..
-        } => {
-            mapper.map_def(rd);
-            mapper.map_use(rn);
-        }
-        &mut Inst::AluRRRShift {
-            ref mut rd,
-            ref mut rn,
-            ref mut rm,
-            ..
-        } => {
-            mapper.map_def(rd);
-            mapper.map_use(rn);
-            mapper.map_use(rm);
-        }
-        &mut Inst::AluRRRExtend {
-            ref mut rd,
-            ref mut rn,
-            ref mut rm,
-            ..
-        } => {
-            mapper.map_def(rd);
-            mapper.map_use(rn);
-            mapper.map_use(rm);
-        }
-        &mut Inst::BitRR {
-            ref mut rd,
-            ref mut rn,
-            ..
-        } => {
-            mapper.map_def(rd);
-            mapper.map_use(rn);
-        }
-        &mut Inst::ULoad8 {
-            ref mut rd,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_def(rd);
-            map_mem(mapper, mem);
-        }
-        &mut Inst::SLoad8 {
-            ref mut rd,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_def(rd);
-            map_mem(mapper, mem);
-        }
-        &mut Inst::ULoad16 {
-            ref mut rd,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_def(rd);
-            map_mem(mapper, mem);
-        }
-        &mut Inst::SLoad16 {
-            ref mut rd,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_def(rd);
-            map_mem(mapper, mem);
-        }
-        &mut Inst::ULoad32 {
-            ref mut rd,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_def(rd);
-            map_mem(mapper, mem);
-        }
-        &mut Inst::SLoad32 {
-            ref mut rd,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_def(rd);
-            map_mem(mapper, mem);
-        }
-
-        &mut Inst::ULoad64 {
-            ref mut rd,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_def(rd);
-            map_mem(mapper, mem);
-        }
-        &mut Inst::Store8 {
-            ref mut rd,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_use(rd);
-            map_mem(mapper, mem);
-        }
-        &mut Inst::Store16 {
-            ref mut rd,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_use(rd);
-            map_mem(mapper, mem);
-        }
-        &mut Inst::Store32 {
-            ref mut rd,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_use(rd);
-            map_mem(mapper, mem);
-        }
-        &mut Inst::Store64 {
-            ref mut rd,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_use(rd);
-            map_mem(mapper, mem);
-        }
-
-        &mut Inst::StoreP64 {
-            ref mut rt,
-            ref mut rt2,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_use(rt);
-            mapper.map_use(rt2);
-            map_pairmem(mapper, mem);
-        }
-        &mut Inst::LoadP64 {
-            ref mut rt,
-            ref mut rt2,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_def(rt);
-            mapper.map_def(rt2);
-            map_pairmem(mapper, mem);
-        }
-        &mut Inst::Mov64 {
-            ref mut rd,
-            ref mut rm,
-        } => {
-            mapper.map_def(rd);
-            mapper.map_use(rm);
-        }
-        &mut Inst::Mov32 {
-            ref mut rd,
-            ref mut rm,
-        } => {
-            mapper.map_def(rd);
-            mapper.map_use(rm);
-        }
-        &mut Inst::MovZ { ref mut rd, .. } => {
-            mapper.map_def(rd);
-        }
-        &mut Inst::MovN { ref mut rd, .. } => {
-            mapper.map_def(rd);
-        }
-        &mut Inst::MovK { ref mut rd, .. } => {
-            mapper.map_def(rd);
-        }
-        &mut Inst::CSel {
-            ref mut rd,
-            ref mut rn,
-            ref mut rm,
-            ..
-        } => {
-            mapper.map_def(rd);
-            mapper.map_use(rn);
-            mapper.map_use(rm);
-        }
-        &mut Inst::CSet { ref mut rd, .. } | &mut Inst::CSetm { ref mut rd, .. } => {
-            mapper.map_def(rd);
-        }
-        &mut Inst::CCmpImm { ref mut rn, .. } => {
-            mapper.map_use(rn);
-        }
-        &mut Inst::AtomicRMWLoop { .. } => {
-            // There are no vregs to map in this insn.
-        }
-        &mut Inst::AtomicRMW {
-            ref mut rs,
-            ref mut rt,
-            ref mut rn,
-            ..
-        } => {
-            mapper.map_use(rs);
-            mapper.map_def(rt);
-            mapper.map_use(rn);
-        }
-        &mut Inst::AtomicCAS {
-            ref mut rs,
-            ref mut rt,
-            ref mut rn,
-            ..
-        } => {
-            mapper.map_mod(rs);
-            mapper.map_use(rt);
-            mapper.map_use(rn);
-        }
-        &mut Inst::AtomicCASLoop { .. } => {
-            // There are no vregs to map in this insn.
-        }
-        &mut Inst::LoadAcquire {
-            ref mut rt,
-            ref mut rn,
-            ..
-        } => {
-            mapper.map_def(rt);
-            mapper.map_use(rn);
-        }
-        &mut Inst::StoreRelease {
-            ref mut rt,
-            ref mut rn,
-            ..
-        } => {
-            mapper.map_use(rt);
-            mapper.map_use(rn);
-        }
-        &mut Inst::Fence {} => {}
-        &mut Inst::FpuMove64 {
-            ref mut rd,
-            ref mut rn,
-        } => {
-            mapper.map_def(rd);
-            mapper.map_use(rn);
-        }
-        &mut Inst::FpuMove128 {
-            ref mut rd,
-            ref mut rn,
-        } => {
-            mapper.map_def(rd);
-            mapper.map_use(rn);
-        }
-        &mut Inst::FpuMoveFromVec {
-            ref mut rd,
-            ref mut rn,
-            ..
-        } => {
-            mapper.map_def(rd);
-            mapper.map_use(rn);
-        }
-        &mut Inst::FpuExtend {
-            ref mut rd,
-            ref mut rn,
-            ..
-        } => {
-            mapper.map_def(rd);
-            mapper.map_use(rn);
-        }
-        &mut Inst::FpuRR {
-            ref mut rd,
-            ref mut rn,
-            ..
-        } => {
-            mapper.map_def(rd);
-            mapper.map_use(rn);
-        }
-        &mut Inst::FpuRRR {
-            ref mut rd,
-            ref mut rn,
-            ref mut rm,
-            ..
-        } => {
-            mapper.map_def(rd);
-            mapper.map_use(rn);
-            mapper.map_use(rm);
-        }
-        &mut Inst::FpuRRI {
-            fpu_op,
-            ref mut rd,
-            ref mut rn,
-            ..
-        } => {
-            match fpu_op {
-                FPUOpRI::UShr32(..) | FPUOpRI::UShr64(..) => mapper.map_def(rd),
-                FPUOpRI::Sli32(..) | FPUOpRI::Sli64(..) => mapper.map_mod(rd),
-            }
-            mapper.map_use(rn);
-        }
-        &mut Inst::FpuRRRR {
-            ref mut rd,
-            ref mut rn,
-            ref mut rm,
-            ref mut ra,
-            ..
-        } => {
-            mapper.map_def(rd);
-            mapper.map_use(rn);
-            mapper.map_use(rm);
-            mapper.map_use(ra);
-        }
-        &mut Inst::VecMisc {
-            ref mut rd,
-            ref mut rn,
-            ..
-        } => {
-            mapper.map_def(rd);
-            mapper.map_use(rn);
-        }
-        &mut Inst::VecLanes {
-            ref mut rd,
-            ref mut rn,
-            ..
-        } => {
-            mapper.map_def(rd);
-            mapper.map_use(rn);
-        }
-        &mut Inst::VecShiftImm {
-            ref mut rd,
-            ref mut rn,
-            ..
-        } => {
-            mapper.map_def(rd);
-            mapper.map_use(rn);
-        }
-        &mut Inst::VecExtract {
-            ref mut rd,
-            ref mut rn,
-            ref mut rm,
-            ..
-        } => {
-            mapper.map_def(rd);
-            mapper.map_use(rn);
-            mapper.map_use(rm);
-        }
-        &mut Inst::VecTbl {
-            ref mut rd,
-            ref mut rn,
-            ref mut rm,
-            is_extension,
-        } => {
-            mapper.map_use(rn);
-            mapper.map_use(rm);
-
-            if is_extension {
-                mapper.map_mod(rd);
-            } else {
-                mapper.map_def(rd);
-            }
-        }
-        &mut Inst::VecTbl2 {
-            ref mut rd,
-            ref mut rn,
-            ref mut rn2,
-            ref mut rm,
-            is_extension,
-        } => {
-            mapper.map_use(rn);
-            mapper.map_use(rn2);
-            mapper.map_use(rm);
-
-            if is_extension {
-                mapper.map_mod(rd);
-            } else {
-                mapper.map_def(rd);
-            }
-        }
-        &mut Inst::VecLoadReplicate {
-            ref mut rd,
-            ref mut rn,
-            ..
-        } => {
-            mapper.map_def(rd);
-            mapper.map_use(rn);
-        }
-        &mut Inst::VecCSel {
-            ref mut rd,
-            ref mut rn,
-            ref mut rm,
-            ..
-        } => {
-            mapper.map_def(rd);
-            mapper.map_use(rn);
-            mapper.map_use(rm);
-        }
-        &mut Inst::FpuCmp32 {
-            ref mut rn,
-            ref mut rm,
-        } => {
-            mapper.map_use(rn);
-            mapper.map_use(rm);
-        }
-        &mut Inst::FpuCmp64 {
-            ref mut rn,
-            ref mut rm,
-        } => {
-            mapper.map_use(rn);
-            mapper.map_use(rm);
-        }
-        &mut Inst::FpuLoad32 {
-            ref mut rd,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_def(rd);
-            map_mem(mapper, mem);
-        }
-        &mut Inst::FpuLoad64 {
-            ref mut rd,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_def(rd);
-            map_mem(mapper, mem);
-        }
-        &mut Inst::FpuLoad128 {
-            ref mut rd,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_def(rd);
-            map_mem(mapper, mem);
-        }
-        &mut Inst::FpuStore32 {
-            ref mut rd,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_use(rd);
-            map_mem(mapper, mem);
-        }
-        &mut Inst::FpuStore64 {
-            ref mut rd,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_use(rd);
-            map_mem(mapper, mem);
-        }
-        &mut Inst::FpuStore128 {
-            ref mut rd,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_use(rd);
-            map_mem(mapper, mem);
-        }
-        &mut Inst::FpuLoadP64 {
-            ref mut rt,
-            ref mut rt2,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_def(rt);
-            mapper.map_def(rt2);
-            map_pairmem(mapper, mem);
-        }
-        &mut Inst::FpuStoreP64 {
-            ref mut rt,
-            ref mut rt2,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_use(rt);
-            mapper.map_use(rt2);
-            map_pairmem(mapper, mem);
-        }
-        &mut Inst::FpuLoadP128 {
-            ref mut rt,
-            ref mut rt2,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_def(rt);
-            mapper.map_def(rt2);
-            map_pairmem(mapper, mem);
-        }
-        &mut Inst::FpuStoreP128 {
-            ref mut rt,
-            ref mut rt2,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_use(rt);
-            mapper.map_use(rt2);
-            map_pairmem(mapper, mem);
-        }
-        &mut Inst::LoadFpuConst64 { ref mut rd, .. } => {
-            mapper.map_def(rd);
-        }
-        &mut Inst::LoadFpuConst128 { ref mut rd, .. } => {
-            mapper.map_def(rd);
-        }
-        &mut Inst::FpuToInt {
-            ref mut rd,
-            ref mut rn,
-            ..
-        } => {
-            mapper.map_def(rd);
-            mapper.map_use(rn);
-        }
-        &mut Inst::IntToFpu {
-            ref mut rd,
-            ref mut rn,
-            ..
-        } => {
-            mapper.map_def(rd);
-            mapper.map_use(rn);
-        }
-        &mut Inst::FpuCSel32 {
-            ref mut rd,
-            ref mut rn,
-            ref mut rm,
-            ..
-        } => {
-            mapper.map_def(rd);
-            mapper.map_use(rn);
-            mapper.map_use(rm);
-        }
-        &mut Inst::FpuCSel64 {
-            ref mut rd,
-            ref mut rn,
-            ref mut rm,
-            ..
-        } => {
-            mapper.map_def(rd);
-            mapper.map_use(rn);
-            mapper.map_use(rm);
-        }
-        &mut Inst::FpuRound {
-            ref mut rd,
-            ref mut rn,
-            ..
-        } => {
-            mapper.map_def(rd);
-            mapper.map_use(rn);
-        }
-        &mut Inst::MovToFpu {
-            ref mut rd,
-            ref mut rn,
-            ..
-        } => {
-            mapper.map_def(rd);
-            mapper.map_use(rn);
-        }
-        &mut Inst::FpuMoveFPImm { ref mut rd, .. } => {
-            mapper.map_def(rd);
-        }
-        &mut Inst::MovToVec {
-            ref mut rd,
-            ref mut rn,
-            ..
-        } => {
-            mapper.map_mod(rd);
-            mapper.map_use(rn);
-        }
-        &mut Inst::MovFromVec {
-            ref mut rd,
-            ref mut rn,
-            ..
-        }
-        | &mut Inst::MovFromVecSigned {
-            ref mut rd,
-            ref mut rn,
-            ..
-        } => {
-            mapper.map_def(rd);
-            mapper.map_use(rn);
-        }
-        &mut Inst::VecDup {
-            ref mut rd,
-            ref mut rn,
-            ..
-        } => {
-            mapper.map_def(rd);
-            mapper.map_use(rn);
-        }
-        &mut Inst::VecDupFromFpu {
-            ref mut rd,
-            ref mut rn,
-            ..
-        } => {
-            mapper.map_def(rd);
-            mapper.map_use(rn);
-        }
-        &mut Inst::VecDupFPImm { ref mut rd, .. } => {
-            mapper.map_def(rd);
-        }
-        &mut Inst::VecDupImm { ref mut rd, .. } => {
-            mapper.map_def(rd);
-        }
-        &mut Inst::VecExtend {
-            ref mut rd,
-            ref mut rn,
-            ..
-        } => {
-            mapper.map_def(rd);
-            mapper.map_use(rn);
-        }
-        &mut Inst::VecMovElement {
-            ref mut rd,
-            ref mut rn,
-            ..
-        } => {
-            mapper.map_mod(rd);
-            mapper.map_use(rn);
-        }
-        &mut Inst::VecRRLong {
-            ref mut rd,
-            ref mut rn,
-            ..
-        } => {
-            mapper.map_def(rd);
-            mapper.map_use(rn);
-        }
-        &mut Inst::VecRRNarrow {
-            ref mut rd,
-            ref mut rn,
-            high_half,
-            ..
-        } => {
-            mapper.map_use(rn);
-
-            if high_half {
-                mapper.map_mod(rd);
-            } else {
-                mapper.map_def(rd);
-            }
-        }
-        &mut Inst::VecRRPair {
-            ref mut rd,
-            ref mut rn,
-            ..
-        } => {
-            mapper.map_def(rd);
-            mapper.map_use(rn);
-        }
-        &mut Inst::VecRRRLong {
-            alu_op,
-            ref mut rd,
-            ref mut rn,
-            ref mut rm,
-            ..
-        } => {
-            match alu_op {
-                VecRRRLongOp::Umlal8 | VecRRRLongOp::Umlal16 | VecRRRLongOp::Umlal32 => {
-                    mapper.map_mod(rd)
-                }
-                _ => mapper.map_def(rd),
-            };
-            mapper.map_use(rn);
-            mapper.map_use(rm);
-        }
-        &mut Inst::VecRRPairLong {
-            ref mut rd,
-            ref mut rn,
-            ..
-        } => {
-            mapper.map_def(rd);
-            mapper.map_use(rn);
-        }
-        &mut Inst::VecRRR {
-            alu_op,
-            ref mut rd,
-            ref mut rn,
-            ref mut rm,
-            ..
-        } => {
-            if alu_op == VecALUOp::Bsl {
-                mapper.map_mod(rd);
-            } else {
-                mapper.map_def(rd);
-            }
-            mapper.map_use(rn);
-            mapper.map_use(rm);
-        }
-        &mut Inst::MovToNZCV { ref mut rn } => {
-            mapper.map_use(rn);
-        }
-        &mut Inst::MovFromNZCV { ref mut rd } => {
-            mapper.map_def(rd);
-        }
-        &mut Inst::Extend {
-            ref mut rd,
-            ref mut rn,
-            ..
-        } => {
-            mapper.map_def(rd);
-            mapper.map_use(rn);
-        }
-        &mut Inst::Jump { .. } => {}
-        &mut Inst::Call { ref mut info } => {
-            for r in info.uses.iter_mut() {
-                mapper.map_use(r);
-            }
-            for r in info.defs.iter_mut() {
-                mapper.map_def(r);
-            }
-        }
-        &mut Inst::Ret | &mut Inst::EpiloguePlaceholder => {}
-        &mut Inst::CallInd { ref mut info, .. } => {
-            for r in info.uses.iter_mut() {
-                mapper.map_use(r);
-            }
-            for r in info.defs.iter_mut() {
-                mapper.map_def(r);
-            }
-            mapper.map_use(&mut info.rn);
-        }
-        &mut Inst::CondBr { ref mut kind, .. } => {
-            map_br(mapper, kind);
-        }
-        &mut Inst::IndirectBr { ref mut rn, .. } => {
-            mapper.map_use(rn);
-        }
-        &mut Inst::Nop0 | &mut Inst::Nop4 | &mut Inst::Brk | &mut Inst::Udf { .. } => {}
-        &mut Inst::TrapIf { ref mut kind, .. } => {
-            map_br(mapper, kind);
-        }
-        &mut Inst::Adr { ref mut rd, .. } => {
-            mapper.map_def(rd);
-        }
-        &mut Inst::Word4 { .. } | &mut Inst::Word8 { .. } => {}
-        &mut Inst::JTSequence {
-            ref mut ridx,
-            ref mut rtmp1,
-            ref mut rtmp2,
-            ..
-        } => {
-            mapper.map_use(ridx);
-            mapper.map_def(rtmp1);
-            mapper.map_def(rtmp2);
-        }
-        &mut Inst::LoadExtName { ref mut rd, .. } => {
-            mapper.map_def(rd);
-        }
-        &mut Inst::LoadAddr {
-            ref mut rd,
-            ref mut mem,
-        } => {
-            mapper.map_def(rd);
-            map_mem(mapper, mem);
-        }
-        &mut Inst::VirtualSPOffsetAdj { .. } => {}
-        &mut Inst::EmitIsland { .. } => {}
-        &mut Inst::ElfTlsGetAddr { .. } => {}
-        &mut Inst::ValueLabelMarker { ref mut reg, .. } => {
-            mapper.map_use(reg);
-        }
-        &mut Inst::Unwind { .. } => {}
-    }
 }
 
 //=============================================================================
@@ -1869,12 +1037,8 @@ pub fn aarch64_map_regs<RM: RegMapper>(inst: &mut Inst, mapper: &RM) {
 impl MachInst for Inst {
     type LabelUse = LabelUse;
 
-    fn get_regs(&self, collector: &mut RegUsageCollector) {
-        aarch64_get_regs(self, collector)
-    }
-
-    fn map_regs<RM: RegMapper>(&mut self, mapper: &RM) {
-        aarch64_map_regs(self, mapper);
+    fn get_operands<F: Fn(VReg) -> VReg>(&self, collector: &mut OperandCollector<'_, F>) {
+        aarch64_get_operands(self, collector);
     }
 
     fn is_move(&self) -> Option<(Writable<Reg>, Reg)> {
@@ -1914,7 +1078,7 @@ impl MachInst for Inst {
 
     fn is_term<'a>(&'a self) -> MachTerminator<'a> {
         match self {
-            &Inst::Ret | &Inst::EpiloguePlaceholder => MachTerminator::Ret,
+            &Inst::Ret { .. } | &Inst::EpiloguePlaceholder => MachTerminator::Ret,
             &Inst::Jump { dest } => MachTerminator::Uncond(dest.as_label().unwrap()),
             &Inst::CondBr {
                 taken, not_taken, ..
@@ -1931,27 +1095,35 @@ impl MachInst for Inst {
         let bits = ty.bits();
 
         assert!(bits <= 128);
-        assert!(to_reg.to_reg().get_class() == from_reg.get_class());
-
-        if from_reg.get_class() == RegClass::I64 {
-            Inst::Mov64 {
+        assert!(to_reg.to_reg().class() == from_reg.class());
+        match from_reg.class() {
+            RegClass::Int => Inst::Mov64 {
                 rd: to_reg,
                 rm: from_reg,
-            }
-        } else if from_reg.get_class() == RegClass::V128 {
-            if bits > 64 {
-                Inst::FpuMove128 {
-                    rd: to_reg,
-                    rn: from_reg,
-                }
-            } else {
-                Inst::FpuMove64 {
-                    rd: to_reg,
-                    rn: from_reg,
+            },
+            RegClass::Float => {
+                if bits > 64 {
+                    Inst::FpuMove128 {
+                        rd: to_reg,
+                        rn: from_reg,
+                    }
+                } else {
+                    Inst::FpuMove64 {
+                        rd: to_reg,
+                        rn: from_reg,
+                    }
                 }
             }
-        } else {
-            panic!("Unexpected register class: {:?}", from_reg.get_class());
+        }
+    }
+
+    fn is_safepoint(&self) -> bool {
+        match self {
+            &Inst::Call { .. }
+            | &Inst::CallInd { .. }
+            | &Inst::TrapIf { .. }
+            | &Inst::Udf { .. } => true,
+            _ => false,
         }
     }
 
@@ -1973,6 +1145,10 @@ impl MachInst for Inst {
         }
     }
 
+    fn gen_dummy_use(reg: Reg) -> Inst {
+        Inst::DummyUse { reg }
+    }
+
     fn gen_nop(preferred_size: usize) -> Inst {
         if preferred_size == 0 {
             return Inst::Nop0;
@@ -1982,32 +1158,28 @@ impl MachInst for Inst {
         Inst::Nop4
     }
 
-    fn maybe_direct_reload(&self, _reg: VirtualReg, _slot: SpillSlot) -> Option<Inst> {
-        None
-    }
-
     fn rc_for_type(ty: Type) -> CodegenResult<(&'static [RegClass], &'static [Type])> {
         match ty {
-            I8 => Ok((&[RegClass::I64], &[I8])),
-            I16 => Ok((&[RegClass::I64], &[I16])),
-            I32 => Ok((&[RegClass::I64], &[I32])),
-            I64 => Ok((&[RegClass::I64], &[I64])),
-            B1 => Ok((&[RegClass::I64], &[B1])),
-            B8 => Ok((&[RegClass::I64], &[B8])),
-            B16 => Ok((&[RegClass::I64], &[B16])),
-            B32 => Ok((&[RegClass::I64], &[B32])),
-            B64 => Ok((&[RegClass::I64], &[B64])),
+            I8 => Ok((&[RegClass::Int], &[I8])),
+            I16 => Ok((&[RegClass::Int], &[I16])),
+            I32 => Ok((&[RegClass::Int], &[I32])),
+            I64 => Ok((&[RegClass::Int], &[I64])),
+            B1 => Ok((&[RegClass::Int], &[B1])),
+            B8 => Ok((&[RegClass::Int], &[B8])),
+            B16 => Ok((&[RegClass::Int], &[B16])),
+            B32 => Ok((&[RegClass::Int], &[B32])),
+            B64 => Ok((&[RegClass::Int], &[B64])),
             R32 => panic!("32-bit reftype pointer should never be seen on AArch64"),
-            R64 => Ok((&[RegClass::I64], &[R64])),
-            F32 => Ok((&[RegClass::V128], &[F32])),
-            F64 => Ok((&[RegClass::V128], &[F64])),
-            I128 => Ok((&[RegClass::I64, RegClass::I64], &[I64, I64])),
-            B128 => Ok((&[RegClass::I64, RegClass::I64], &[B64, B64])),
+            R64 => Ok((&[RegClass::Int], &[R64])),
+            F32 => Ok((&[RegClass::Float], &[F32])),
+            F64 => Ok((&[RegClass::Float], &[F64])),
+            I128 => Ok((&[RegClass::Int, RegClass::Int], &[I64, I64])),
+            B128 => Ok((&[RegClass::Int, RegClass::Int], &[B64, B64])),
             _ if ty.is_vector() => {
                 assert!(ty.bits() <= 128);
-                Ok((&[RegClass::V128], &[I8X16]))
+                Ok((&[RegClass::Float], &[I8X16]))
             }
-            IFLAGS | FFLAGS => Ok((&[RegClass::I64], &[I64])),
+            IFLAGS | FFLAGS => Ok((&[RegClass::Int], &[I64])),
             _ => Err(CodegenError::Unsupported(format!(
                 "Unexpected SSA-value type: {}",
                 ty
@@ -2015,6 +1187,13 @@ impl MachInst for Inst {
         }
     }
 
+    fn canonical_type_for_rc(rc: RegClass) -> Type {
+        match rc {
+            RegClass::Float => types::I8X16,
+            RegClass::Int => types::I64,
+        }
+    }
+
     fn gen_jump(target: MachLabel) -> Inst {
         Inst::Jump {
             dest: BranchTarget::Label(target),
@@ -2033,33 +1212,20 @@ impl MachInst for Inst {
     }
 
     fn ref_type_regclass(_: &settings::Flags) -> RegClass {
-        RegClass::I64
-    }
-
-    fn gen_value_label_marker(label: ValueLabel, reg: Reg) -> Self {
-        Inst::ValueLabelMarker { label, reg }
-    }
-
-    fn defines_value_label(&self) -> Option<(ValueLabel, Reg)> {
-        match self {
-            Inst::ValueLabelMarker { label, reg } => Some((*label, *reg)),
-            _ => None,
-        }
+        RegClass::Int
     }
 }
 
 //=============================================================================
 // Pretty-printing of instructions.
 
-fn mem_finalize_for_show(
-    mem: &AMode,
-    mb_rru: Option<&RealRegUniverse>,
-    state: &EmitState,
-) -> (String, AMode) {
+fn mem_finalize_for_show(mem: &AMode, state: &EmitState) -> (String, AMode) {
     let (mem_insts, mem) = mem_finalize(0, mem, state);
     let mut mem_str = mem_insts
         .into_iter()
-        .map(|inst| inst.show_rru(mb_rru))
+        .map(|inst| {
+            inst.print_with_state(&mut EmitState::default(), &mut AllocationConsumer::new(&[]))
+        })
         .collect::<Vec<_>>()
         .join(" ; ");
     if !mem_str.is_empty() {
@@ -2069,14 +1235,10 @@ fn mem_finalize_for_show(
     (mem_str, mem)
 }
 
-impl PrettyPrint for Inst {
-    fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
-        self.pretty_print(mb_rru, &mut EmitState::default())
-    }
-}
-
 impl Inst {
-    fn print_with_state(&self, mb_rru: Option<&RealRegUniverse>, state: &mut EmitState) -> String {
+    fn print_with_state(&self, state: &mut EmitState, allocs: &mut AllocationConsumer) -> String {
+        let mut empty_allocs = AllocationConsumer::default();
+
         fn op_name(alu_op: ALUOp) -> &'static str {
             match alu_op {
                 ALUOp::Add => "add",
@@ -2105,6 +1267,9 @@ impl Inst {
             }
         }
 
+        // N.B.: order of `allocs` consumption (via register
+        // pretty-printing or memarg.with_allocs()) needs to match the
+        // order in `aarch64_get_operands` above.
         match self {
             &Inst::Nop0 => "nop-zero-len".to_string(),
             &Inst::Nop4 => "nop".to_string(),
@@ -2116,9 +1281,9 @@ impl Inst {
                 rm,
             } => {
                 let op = op_name(alu_op);
-                let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
-                let rn = show_ireg_sized(rn, mb_rru, size);
-                let rm = show_ireg_sized(rm, mb_rru, size);
+                let rd = pretty_print_ireg(rd.to_reg(), size, allocs);
+                let rn = pretty_print_ireg(rn, size, allocs);
+                let rm = pretty_print_ireg(rm, size, allocs);
                 format!("{} {}, {}, {}", op, rd, rn, rm)
             }
             &Inst::AluRRRR {
@@ -2134,10 +1299,10 @@ impl Inst {
                     ALUOp3::MSub32 => ("msub", OperandSize::Size32),
                     ALUOp3::MSub64 => ("msub", OperandSize::Size64),
                 };
-                let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
-                let rn = show_ireg_sized(rn, mb_rru, size);
-                let rm = show_ireg_sized(rm, mb_rru, size);
-                let ra = show_ireg_sized(ra, mb_rru, size);
+                let rd = pretty_print_ireg(rd.to_reg(), size, allocs);
+                let rn = pretty_print_ireg(rn, size, allocs);
+                let rm = pretty_print_ireg(rm, size, allocs);
+                let ra = pretty_print_ireg(ra, size, allocs);
 
                 format!("{} {}, {}, {}, {}", op, rd, rn, rm, ra)
             }
@@ -2149,14 +1314,14 @@ impl Inst {
                 ref imm12,
             } => {
                 let op = op_name(alu_op);
-                let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
-                let rn = show_ireg_sized(rn, mb_rru, size);
+                let rd = pretty_print_ireg(rd.to_reg(), size, allocs);
+                let rn = pretty_print_ireg(rn, size, allocs);
 
                 if imm12.bits == 0 && alu_op == ALUOp::Add && size.is64() {
                     // special-case MOV (used for moving into SP).
                     format!("mov {}, {}", rd, rn)
                 } else {
-                    let imm12 = imm12.show_rru(mb_rru);
+                    let imm12 = imm12.pretty_print(0, allocs);
                     format!("{} {}, {}, {}", op, rd, rn, imm12)
                 }
             }
@@ -2168,9 +1333,9 @@ impl Inst {
                 ref imml,
             } => {
                 let op = op_name(alu_op);
-                let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
-                let rn = show_ireg_sized(rn, mb_rru, size);
-                let imml = imml.show_rru(mb_rru);
+                let rd = pretty_print_ireg(rd.to_reg(), size, allocs);
+                let rn = pretty_print_ireg(rn, size, allocs);
+                let imml = imml.pretty_print(0, allocs);
                 format!("{} {}, {}, {}", op, rd, rn, imml)
             }
             &Inst::AluRRImmShift {
@@ -2181,9 +1346,9 @@ impl Inst {
                 ref immshift,
             } => {
                 let op = op_name(alu_op);
-                let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
-                let rn = show_ireg_sized(rn, mb_rru, size);
-                let immshift = immshift.show_rru(mb_rru);
+                let rd = pretty_print_ireg(rd.to_reg(), size, allocs);
+                let rn = pretty_print_ireg(rn, size, allocs);
+                let immshift = immshift.pretty_print(0, allocs);
                 format!("{} {}, {}, {}", op, rd, rn, immshift)
             }
             &Inst::AluRRRShift {
@@ -2195,10 +1360,10 @@ impl Inst {
                 ref shiftop,
             } => {
                 let op = op_name(alu_op);
-                let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
-                let rn = show_ireg_sized(rn, mb_rru, size);
-                let rm = show_ireg_sized(rm, mb_rru, size);
-                let shiftop = shiftop.show_rru(mb_rru);
+                let rd = pretty_print_ireg(rd.to_reg(), size, allocs);
+                let rn = pretty_print_ireg(rn, size, allocs);
+                let rm = pretty_print_ireg(rm, size, allocs);
+                let shiftop = shiftop.pretty_print(0, allocs);
                 format!("{} {}, {}, {}, {}", op, rd, rn, rm, shiftop)
             }
             &Inst::AluRRRExtend {
@@ -2210,16 +1375,16 @@ impl Inst {
                 ref extendop,
             } => {
                 let op = op_name(alu_op);
-                let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
-                let rn = show_ireg_sized(rn, mb_rru, size);
-                let rm = show_ireg_sized(rm, mb_rru, size);
-                let extendop = extendop.show_rru(mb_rru);
+                let rd = pretty_print_ireg(rd.to_reg(), size, allocs);
+                let rn = pretty_print_ireg(rn, size, allocs);
+                let rm = pretty_print_ireg(rm, size, allocs);
+                let extendop = extendop.pretty_print(0, allocs);
                 format!("{} {}, {}, {}, {}", op, rd, rn, rm, extendop)
             }
             &Inst::BitRR { op, size, rd, rn } => {
                 let op = op.op_str();
-                let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
-                let rn = show_ireg_sized(rn, mb_rru, size);
+                let rd = pretty_print_ireg(rd.to_reg(), size, allocs);
+                let rn = pretty_print_ireg(rn, size, allocs);
                 format!("{} {}, {}", op, rd, rn)
             }
             &Inst::ULoad8 { rd, ref mem, .. }
@@ -2229,8 +1394,6 @@ impl Inst {
             | &Inst::ULoad32 { rd, ref mem, .. }
             | &Inst::SLoad32 { rd, ref mem, .. }
             | &Inst::ULoad64 { rd, ref mem, .. } => {
-                let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state);
-
                 let is_unscaled = match &mem {
                     &AMode::Unscaled(..) => true,
                     _ => false,
@@ -2252,16 +1415,18 @@ impl Inst {
                     (&Inst::ULoad64 { .. }, true) => ("ldur", OperandSize::Size64),
                     _ => unreachable!(),
                 };
-                let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
-                let mem = mem.show_rru(mb_rru);
+
+                let rd = pretty_print_ireg(rd.to_reg(), size, allocs);
+                let mem = mem.with_allocs(allocs);
+                let (mem_str, mem) = mem_finalize_for_show(&mem, state);
+                let mem = mem.pretty_print_default();
+
                 format!("{}{} {}, {}", mem_str, op, rd, mem)
             }
             &Inst::Store8 { rd, ref mem, .. }
             | &Inst::Store16 { rd, ref mem, .. }
             | &Inst::Store32 { rd, ref mem, .. }
             | &Inst::Store64 { rd, ref mem, .. } => {
-                let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state);
-
                 let is_unscaled = match &mem {
                     &AMode::Unscaled(..) => true,
                     _ => false,
@@ -2277,66 +1442,72 @@ impl Inst {
                     (&Inst::Store64 { .. }, true) => ("stur", OperandSize::Size64),
                     _ => unreachable!(),
                 };
-                let rd = show_ireg_sized(rd, mb_rru, size);
-                let mem = mem.show_rru(mb_rru);
+
+                let rd = pretty_print_ireg(rd, size, allocs);
+                let mem = mem.with_allocs(allocs);
+                let (mem_str, mem) = mem_finalize_for_show(&mem, state);
+                let mem = mem.pretty_print_default();
+
                 format!("{}{} {}, {}", mem_str, op, rd, mem)
             }
             &Inst::StoreP64 {
                 rt, rt2, ref mem, ..
             } => {
-                let rt = rt.show_rru(mb_rru);
-                let rt2 = rt2.show_rru(mb_rru);
-                let mem = mem.show_rru(mb_rru);
+                let rt = pretty_print_ireg(rt, OperandSize::Size64, allocs);
+                let rt2 = pretty_print_ireg(rt2, OperandSize::Size64, allocs);
+                let mem = mem.with_allocs(allocs);
+                let mem = mem.pretty_print_default();
                 format!("stp {}, {}, {}", rt, rt2, mem)
             }
             &Inst::LoadP64 {
                 rt, rt2, ref mem, ..
             } => {
-                let rt = rt.to_reg().show_rru(mb_rru);
-                let rt2 = rt2.to_reg().show_rru(mb_rru);
-                let mem = mem.show_rru(mb_rru);
+                let rt = pretty_print_ireg(rt.to_reg(), OperandSize::Size64, allocs);
+                let rt2 = pretty_print_ireg(rt2.to_reg(), OperandSize::Size64, allocs);
+                let mem = mem.with_allocs(allocs);
+                let mem = mem.pretty_print_default();
                 format!("ldp {}, {}, {}", rt, rt2, mem)
             }
             &Inst::Mov64 { rd, rm } => {
-                let rd = rd.to_reg().show_rru(mb_rru);
-                let rm = rm.show_rru(mb_rru);
+                let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64, allocs);
+                let rm = pretty_print_ireg(rm, OperandSize::Size64, allocs);
                 format!("mov {}, {}", rd, rm)
             }
             &Inst::Mov32 { rd, rm } => {
-                let rd = show_ireg_sized(rd.to_reg(), mb_rru, OperandSize::Size32);
-                let rm = show_ireg_sized(rm, mb_rru, OperandSize::Size32);
+                let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size32, allocs);
+                let rm = pretty_print_ireg(rm, OperandSize::Size32, allocs);
                 format!("mov {}, {}", rd, rm)
             }
             &Inst::MovZ { rd, ref imm, size } => {
-                let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
-                let imm = imm.show_rru(mb_rru);
+                let rd = pretty_print_ireg(rd.to_reg(), size, allocs);
+                let imm = imm.pretty_print(0, allocs);
                 format!("movz {}, {}", rd, imm)
             }
             &Inst::MovN { rd, ref imm, size } => {
-                let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
-                let imm = imm.show_rru(mb_rru);
+                let rd = pretty_print_ireg(rd.to_reg(), size, allocs);
+                let imm = imm.pretty_print(0, allocs);
                 format!("movn {}, {}", rd, imm)
             }
             &Inst::MovK { rd, ref imm, size } => {
-                let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
-                let imm = imm.show_rru(mb_rru);
+                let rd = pretty_print_ireg(rd.to_reg(), size, allocs);
+                let imm = imm.pretty_print(0, allocs);
                 format!("movk {}, {}", rd, imm)
             }
             &Inst::CSel { rd, rn, rm, cond } => {
-                let rd = rd.to_reg().show_rru(mb_rru);
-                let rn = rn.show_rru(mb_rru);
-                let rm = rm.show_rru(mb_rru);
-                let cond = cond.show_rru(mb_rru);
+                let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64, allocs);
+                let rn = pretty_print_ireg(rn, OperandSize::Size64, allocs);
+                let rm = pretty_print_ireg(rm, OperandSize::Size64, allocs);
+                let cond = cond.pretty_print(0, allocs);
                 format!("csel {}, {}, {}, {}", rd, rn, rm, cond)
             }
             &Inst::CSet { rd, cond } => {
-                let rd = rd.to_reg().show_rru(mb_rru);
-                let cond = cond.show_rru(mb_rru);
+                let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64, allocs);
+                let cond = cond.pretty_print(0, allocs);
                 format!("cset {}, {}", rd, cond)
             }
             &Inst::CSetm { rd, cond } => {
-                let rd = rd.to_reg().show_rru(mb_rru);
-                let cond = cond.show_rru(mb_rru);
+                let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64, allocs);
+                let cond = cond.pretty_print(0, allocs);
                 format!("csetm {}, {}", rd, cond)
             }
             &Inst::CCmpImm {
@@ -2346,10 +1517,10 @@ impl Inst {
                 nzcv,
                 cond,
             } => {
-                let rn = show_ireg_sized(rn, mb_rru, size);
-                let imm = imm.show_rru(mb_rru);
-                let nzcv = nzcv.show_rru(mb_rru);
-                let cond = cond.show_rru(mb_rru);
+                let rn = pretty_print_ireg(rn, size, allocs);
+                let imm = imm.pretty_print(0, allocs);
+                let nzcv = nzcv.pretty_print(0, allocs);
+                let cond = cond.pretty_print(0, allocs);
                 format!("ccmp {}, {}, {}, {}", rn, imm, nzcv, cond)
             }
             &Inst::AtomicRMW { rs, rt, rn, ty, op } => {
@@ -2365,9 +1536,9 @@ impl Inst {
                 };
 
                 let size = OperandSize::from_ty(ty);
-                let rs = show_ireg_sized(rs, mb_rru, size);
-                let rt = show_ireg_sized(rt.to_reg(), mb_rru, size);
-                let rn = rn.show_rru(mb_rru);
+                let rs = pretty_print_ireg(rs, size, allocs);
+                let rt = pretty_print_ireg(rt.to_reg(), size, allocs);
+                let rn = pretty_print_ireg(rn, OperandSize::Size64, allocs);
 
                 let ty_suffix = match ty {
                     I8 => "b",
@@ -2383,13 +1554,14 @@ impl Inst {
                     _ => "",
                 };
                 let size = OperandSize::from_ty(ty);
-                let r_status = show_ireg_sized(xreg(24), mb_rru, OperandSize::Size32);
-                let r_arg2 = show_ireg_sized(xreg(26), mb_rru, size);
-                let r_tmp = show_ireg_sized(xreg(27), mb_rru, size);
-                let mut r_dst = show_ireg_sized(xreg(28), mb_rru, size);
+                let r_addr = pretty_print_ireg(xreg(25), OperandSize::Size64, allocs);
+                let r_arg2 = pretty_print_ireg(xreg(26), size, allocs);
+                let r_status = pretty_print_ireg(xreg(24), OperandSize::Size32, allocs);
+                let r_tmp = pretty_print_ireg(xreg(27), size, allocs);
+                let mut r_dst = pretty_print_ireg(xreg(28), size, allocs);
 
                 let mut loop_str: String = "1: ".to_string();
-                loop_str.push_str(&format!("ldaxr{} {}, [x25]; ", ty_suffix, r_tmp));
+                loop_str.push_str(&format!("ldaxr{} {}, [{}]; ", ty_suffix, r_tmp, r_addr));
 
                 let op_str = match op {
                     inst_common::AtomicRmwOp::Add => "add",
@@ -2426,8 +1598,8 @@ impl Inst {
                     loop_str.push_str(&format!("{} {}, {}, {}; ", op_str, r_dst, r_tmp, r_arg2));
                 }
                 loop_str.push_str(&format!(
-                    "stlxr{} {}, {}, [x25]; ",
-                    ty_suffix, r_status, r_dst
+                    "stlxr{} {}, {}, [{}]; ",
+                    ty_suffix, r_status, r_dst, r_addr
                 ));
                 loop_str.push_str(&format!("cbnz {}, 1b", r_status));
                 loop_str
@@ -2440,9 +1612,9 @@ impl Inst {
                     _ => panic!("Unsupported type: {}", ty),
                 };
                 let size = OperandSize::from_ty(ty);
-                let rs = show_ireg_sized(rs.to_reg(), mb_rru, size);
-                let rt = show_ireg_sized(rt, mb_rru, size);
-                let rn = rn.show_rru(mb_rru);
+                let rs = pretty_print_ireg(rs.to_reg(), size, allocs);
+                let rt = pretty_print_ireg(rt, size, allocs);
+                let rn = pretty_print_ireg(rn, OperandSize::Size64, allocs);
 
                 format!("{} {}, {}, [{}]", op, rs, rt, rn)
             }
@@ -2462,8 +1634,8 @@ impl Inst {
                     _ => panic!("Unsupported type: {}", access_ty),
                 };
                 let size = OperandSize::from_ty(ty);
-                let rt = show_ireg_sized(rt.to_reg(), mb_rru, size);
-                let rn = rn.show_rru(mb_rru);
+                let rn = pretty_print_ireg(rn, OperandSize::Size64, allocs);
+                let rt = pretty_print_ireg(rt.to_reg(), size, allocs);
                 format!("{} {}, [{}]", op, rt, rn)
             }
             &Inst::StoreRelease {
@@ -2477,32 +1649,31 @@ impl Inst {
                     _ => panic!("Unsupported type: {}", access_ty),
                 };
                 let size = OperandSize::from_ty(ty);
-                let rt = show_ireg_sized(rt, mb_rru, size);
-                let rn = rn.show_rru(mb_rru);
+                let rn = pretty_print_ireg(rn, OperandSize::Size64, allocs);
+                let rt = pretty_print_ireg(rt, size, allocs);
                 format!("{} {}, [{}]", op, rt, rn)
             }
             &Inst::Fence {} => {
                 format!("dmb ish")
             }
             &Inst::FpuMove64 { rd, rn } => {
-                let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size64);
-                let rn = show_vreg_scalar(rn, mb_rru, ScalarSize::Size64);
+                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64, allocs);
+                let rn = pretty_print_vreg_scalar(rn, ScalarSize::Size64, allocs);
                 format!("fmov {}, {}", rd, rn)
             }
             &Inst::FpuMove128 { rd, rn } => {
-                let rd = rd.to_reg().show_rru(mb_rru);
-                let rn = rn.show_rru(mb_rru);
+                let rd = pretty_print_reg(rd.to_reg(), allocs);
+                let rn = pretty_print_reg(rn, allocs);
                 format!("mov {}.16b, {}.16b", rd, rn)
             }
             &Inst::FpuMoveFromVec { rd, rn, idx, size } => {
-                let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size.lane_size());
-                let rn = show_vreg_element(rn, mb_rru, idx, size);
+                let rd = pretty_print_vreg_scalar(rd.to_reg(), size.lane_size(), allocs);
+                let rn = pretty_print_vreg_element(rn, idx as usize, size, allocs);
                 format!("mov {}, {}", rd, rn)
             }
             &Inst::FpuExtend { rd, rn, size } => {
-                let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size);
-                let rn = show_vreg_scalar(rn, mb_rru, size);
-
+                let rd = pretty_print_vreg_scalar(rd.to_reg(), size, allocs);
+                let rn = pretty_print_vreg_scalar(rn, size, allocs);
                 format!("fmov {}, {}", rd, rn)
             }
             &Inst::FpuRR { fpu_op, rd, rn } => {
@@ -2516,8 +1687,8 @@ impl Inst {
                     FPUOp1::Cvt32To64 => ("fcvt", ScalarSize::Size32, ScalarSize::Size64),
                     FPUOp1::Cvt64To32 => ("fcvt", ScalarSize::Size64, ScalarSize::Size32),
                 };
-                let rd = show_vreg_scalar(rd.to_reg(), mb_rru, sizedest);
-                let rn = show_vreg_scalar(rn, mb_rru, sizesrc);
+                let rd = pretty_print_vreg_scalar(rd.to_reg(), sizedest, allocs);
+                let rn = pretty_print_vreg_scalar(rn, sizesrc, allocs);
                 format!("{} {}, {}", op, rd, rn)
             }
             &Inst::FpuRRR { fpu_op, rd, rn, rm } => {
@@ -2539,26 +1710,30 @@ impl Inst {
                     FPUOp2::Sqsub64 => ("sqsub", ScalarSize::Size64),
                     FPUOp2::Uqsub64 => ("uqsub", ScalarSize::Size64),
                 };
-                let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size);
-                let rn = show_vreg_scalar(rn, mb_rru, size);
-                let rm = show_vreg_scalar(rm, mb_rru, size);
+                let rd = pretty_print_vreg_scalar(rd.to_reg(), size, allocs);
+                let rn = pretty_print_vreg_scalar(rn, size, allocs);
+                let rm = pretty_print_vreg_scalar(rm, size, allocs);
                 format!("{} {}, {}, {}", op, rd, rn, rm)
             }
             &Inst::FpuRRI { fpu_op, rd, rn } => {
                 let (op, imm, vector) = match fpu_op {
-                    FPUOpRI::UShr32(imm) => ("ushr", imm.show_rru(mb_rru), true),
-                    FPUOpRI::UShr64(imm) => ("ushr", imm.show_rru(mb_rru), false),
-                    FPUOpRI::Sli32(imm) => ("sli", imm.show_rru(mb_rru), true),
-                    FPUOpRI::Sli64(imm) => ("sli", imm.show_rru(mb_rru), false),
+                    FPUOpRI::UShr32(imm) => ("ushr", imm.pretty_print(0, allocs), true),
+                    FPUOpRI::UShr64(imm) => ("ushr", imm.pretty_print(0, allocs), false),
+                    FPUOpRI::Sli32(imm) => ("sli", imm.pretty_print(0, allocs), true),
+                    FPUOpRI::Sli64(imm) => ("sli", imm.pretty_print(0, allocs), false),
                 };
 
-                let show_vreg_fn: fn(Reg, Option<&RealRegUniverse>) -> String = if vector {
-                    |reg, mb_rru| show_vreg_vector(reg, mb_rru, VectorSize::Size32x2)
+                let (rd, rn) = if vector {
+                    (
+                        pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size32x2, allocs),
+                        pretty_print_vreg_vector(rn, VectorSize::Size32x2, allocs),
+                    )
                 } else {
-                    |reg, mb_rru| show_vreg_scalar(reg, mb_rru, ScalarSize::Size64)
+                    (
+                        pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64, allocs),
+                        pretty_print_vreg_scalar(rn, ScalarSize::Size64, allocs),
+                    )
                 };
-                let rd = show_vreg_fn(rd.to_reg(), mb_rru);
-                let rn = show_vreg_fn(rn, mb_rru);
                 format!("{} {}, {}, {}", op, rd, rn, imm)
             }
             &Inst::FpuRRRR {
@@ -2572,98 +1747,108 @@ impl Inst {
                     FPUOp3::MAdd32 => ("fmadd", ScalarSize::Size32),
                     FPUOp3::MAdd64 => ("fmadd", ScalarSize::Size64),
                 };
-                let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size);
-                let rn = show_vreg_scalar(rn, mb_rru, size);
-                let rm = show_vreg_scalar(rm, mb_rru, size);
-                let ra = show_vreg_scalar(ra, mb_rru, size);
+                let rd = pretty_print_vreg_scalar(rd.to_reg(), size, allocs);
+                let rn = pretty_print_vreg_scalar(rn, size, allocs);
+                let rm = pretty_print_vreg_scalar(rm, size, allocs);
+                let ra = pretty_print_vreg_scalar(ra, size, allocs);
                 format!("{} {}, {}, {}, {}", op, rd, rn, rm, ra)
             }
             &Inst::FpuCmp32 { rn, rm } => {
-                let rn = show_vreg_scalar(rn, mb_rru, ScalarSize::Size32);
-                let rm = show_vreg_scalar(rm, mb_rru, ScalarSize::Size32);
+                let rn = pretty_print_vreg_scalar(rn, ScalarSize::Size32, allocs);
+                let rm = pretty_print_vreg_scalar(rm, ScalarSize::Size32, allocs);
                 format!("fcmp {}, {}", rn, rm)
             }
             &Inst::FpuCmp64 { rn, rm } => {
-                let rn = show_vreg_scalar(rn, mb_rru, ScalarSize::Size64);
-                let rm = show_vreg_scalar(rm, mb_rru, ScalarSize::Size64);
+                let rn = pretty_print_vreg_scalar(rn, ScalarSize::Size64, allocs);
+                let rm = pretty_print_vreg_scalar(rm, ScalarSize::Size64, allocs);
                 format!("fcmp {}, {}", rn, rm)
             }
             &Inst::FpuLoad32 { rd, ref mem, .. } => {
-                let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size32);
-                let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state);
-                let mem = mem.show_rru(mb_rru);
+                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size32, allocs);
+                let mem = mem.with_allocs(allocs);
+                let (mem_str, mem) = mem_finalize_for_show(&mem, state);
+                let mem = mem.pretty_print_default();
                 format!("{}ldr {}, {}", mem_str, rd, mem)
             }
             &Inst::FpuLoad64 { rd, ref mem, .. } => {
-                let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size64);
-                let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state);
-                let mem = mem.show_rru(mb_rru);
+                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64, allocs);
+                let mem = mem.with_allocs(allocs);
+                let (mem_str, mem) = mem_finalize_for_show(&mem, state);
+                let mem = mem.pretty_print_default();
                 format!("{}ldr {}, {}", mem_str, rd, mem)
             }
             &Inst::FpuLoad128 { rd, ref mem, .. } => {
-                let rd = rd.to_reg().show_rru(mb_rru);
+                let rd = pretty_print_reg(rd.to_reg(), allocs);
                 let rd = "q".to_string() + &rd[1..];
-                let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state);
-                let mem = mem.show_rru(mb_rru);
+                let mem = mem.with_allocs(allocs);
+                let (mem_str, mem) = mem_finalize_for_show(&mem, state);
+                let mem = mem.pretty_print_default();
                 format!("{}ldr {}, {}", mem_str, rd, mem)
             }
             &Inst::FpuStore32 { rd, ref mem, .. } => {
-                let rd = show_vreg_scalar(rd, mb_rru, ScalarSize::Size32);
-                let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state);
-                let mem = mem.show_rru(mb_rru);
+                let rd = pretty_print_vreg_scalar(rd, ScalarSize::Size32, allocs);
+                let mem = mem.with_allocs(allocs);
+                let (mem_str, mem) = mem_finalize_for_show(&mem, state);
+                let mem = mem.pretty_print_default();
                 format!("{}str {}, {}", mem_str, rd, mem)
             }
             &Inst::FpuStore64 { rd, ref mem, .. } => {
-                let rd = show_vreg_scalar(rd, mb_rru, ScalarSize::Size64);
-                let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state);
-                let mem = mem.show_rru(mb_rru);
+                let rd = pretty_print_vreg_scalar(rd, ScalarSize::Size64, allocs);
+                let mem = mem.with_allocs(allocs);
+                let (mem_str, mem) = mem_finalize_for_show(&mem, state);
+                let mem = mem.pretty_print_default();
                 format!("{}str {}, {}", mem_str, rd, mem)
             }
             &Inst::FpuStore128 { rd, ref mem, .. } => {
-                let rd = rd.show_rru(mb_rru);
+                let rd = pretty_print_reg(rd, allocs);
                 let rd = "q".to_string() + &rd[1..];
-                let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state);
-                let mem = mem.show_rru(mb_rru);
+                let mem = mem.with_allocs(allocs);
+                let (mem_str, mem) = mem_finalize_for_show(&mem, state);
+                let mem = mem.pretty_print_default();
                 format!("{}str {}, {}", mem_str, rd, mem)
             }
             &Inst::FpuLoadP64 {
                 rt, rt2, ref mem, ..
             } => {
-                let rt = show_vreg_scalar(rt.to_reg(), mb_rru, ScalarSize::Size64);
-                let rt2 = show_vreg_scalar(rt2.to_reg(), mb_rru, ScalarSize::Size64);
-                let mem = mem.show_rru(mb_rru);
+                let rt = pretty_print_vreg_scalar(rt.to_reg(), ScalarSize::Size64, allocs);
+                let rt2 = pretty_print_vreg_scalar(rt2.to_reg(), ScalarSize::Size64, allocs);
+                let mem = mem.with_allocs(allocs);
+                let mem = mem.pretty_print_default();
 
                 format!("ldp {}, {}, {}", rt, rt2, mem)
             }
             &Inst::FpuStoreP64 {
                 rt, rt2, ref mem, ..
             } => {
-                let rt = show_vreg_scalar(rt, mb_rru, ScalarSize::Size64);
-                let rt2 = show_vreg_scalar(rt2, mb_rru, ScalarSize::Size64);
-                let mem = mem.show_rru(mb_rru);
+                let rt = pretty_print_vreg_scalar(rt, ScalarSize::Size64, allocs);
+                let rt2 = pretty_print_vreg_scalar(rt2, ScalarSize::Size64, allocs);
+                let mem = mem.with_allocs(allocs);
+                let mem = mem.pretty_print_default();
 
                 format!("stp {}, {}, {}", rt, rt2, mem)
             }
             &Inst::FpuLoadP128 {
                 rt, rt2, ref mem, ..
             } => {
-                let rt = show_vreg_scalar(rt.to_reg(), mb_rru, ScalarSize::Size128);
-                let rt2 = show_vreg_scalar(rt2.to_reg(), mb_rru, ScalarSize::Size128);
-                let mem = mem.show_rru(mb_rru);
+                let rt = pretty_print_vreg_scalar(rt.to_reg(), ScalarSize::Size128, allocs);
+                let rt2 = pretty_print_vreg_scalar(rt2.to_reg(), ScalarSize::Size128, allocs);
+                let mem = mem.with_allocs(allocs);
+                let mem = mem.pretty_print_default();
 
                 format!("ldp {}, {}, {}", rt, rt2, mem)
             }
             &Inst::FpuStoreP128 {
                 rt, rt2, ref mem, ..
             } => {
-                let rt = show_vreg_scalar(rt, mb_rru, ScalarSize::Size128);
-                let rt2 = show_vreg_scalar(rt2, mb_rru, ScalarSize::Size128);
-                let mem = mem.show_rru(mb_rru);
+                let rt = pretty_print_vreg_scalar(rt, ScalarSize::Size128, allocs);
+                let rt2 = pretty_print_vreg_scalar(rt2, ScalarSize::Size128, allocs);
+                let mem = mem.with_allocs(allocs);
+                let mem = mem.pretty_print_default();
 
                 format!("stp {}, {}, {}", rt, rt2, mem)
             }
             &Inst::LoadFpuConst64 { rd, const_data } => {
-                let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size64);
+                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64, allocs);
                 format!(
                     "ldr {}, pc+8 ; b 12 ; data.f64 {}",
                     rd,
@@ -2671,7 +1856,7 @@ impl Inst {
                 )
             }
             &Inst::LoadFpuConst128 { rd, const_data } => {
-                let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size128);
+                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size128, allocs);
                 format!("ldr {}, pc+8 ; b 20 ; data.f128 0x{:032x}", rd, const_data)
             }
             &Inst::FpuToInt { op, rd, rn } => {
@@ -2685,8 +1870,8 @@ impl Inst {
                     FpuToIntOp::F64ToI64 => ("fcvtzs", ScalarSize::Size64, OperandSize::Size64),
                     FpuToIntOp::F64ToU64 => ("fcvtzu", ScalarSize::Size64, OperandSize::Size64),
                 };
-                let rd = show_ireg_sized(rd.to_reg(), mb_rru, sizedest);
-                let rn = show_vreg_scalar(rn, mb_rru, sizesrc);
+                let rd = pretty_print_ireg(rd.to_reg(), sizedest, allocs);
+                let rn = pretty_print_vreg_scalar(rn, sizesrc, allocs);
                 format!("{} {}, {}", op, rd, rn)
             }
             &Inst::IntToFpu { op, rd, rn } => {
@@ -2700,22 +1885,22 @@ impl Inst {
                     IntToFpuOp::I64ToF64 => ("scvtf", OperandSize::Size64, ScalarSize::Size64),
                     IntToFpuOp::U64ToF64 => ("ucvtf", OperandSize::Size64, ScalarSize::Size64),
                 };
-                let rd = show_vreg_scalar(rd.to_reg(), mb_rru, sizedest);
-                let rn = show_ireg_sized(rn, mb_rru, sizesrc);
+                let rd = pretty_print_vreg_scalar(rd.to_reg(), sizedest, allocs);
+                let rn = pretty_print_ireg(rn, sizesrc, allocs);
                 format!("{} {}, {}", op, rd, rn)
             }
             &Inst::FpuCSel32 { rd, rn, rm, cond } => {
-                let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size32);
-                let rn = show_vreg_scalar(rn, mb_rru, ScalarSize::Size32);
-                let rm = show_vreg_scalar(rm, mb_rru, ScalarSize::Size32);
-                let cond = cond.show_rru(mb_rru);
+                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size32, allocs);
+                let rn = pretty_print_vreg_scalar(rn, ScalarSize::Size32, allocs);
+                let rm = pretty_print_vreg_scalar(rm, ScalarSize::Size32, allocs);
+                let cond = cond.pretty_print(0, allocs);
                 format!("fcsel {}, {}, {}, {}", rd, rn, rm, cond)
             }
             &Inst::FpuCSel64 { rd, rn, rm, cond } => {
-                let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size64);
-                let rn = show_vreg_scalar(rn, mb_rru, ScalarSize::Size64);
-                let rm = show_vreg_scalar(rm, mb_rru, ScalarSize::Size64);
-                let cond = cond.show_rru(mb_rru);
+                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64, allocs);
+                let rn = pretty_print_vreg_scalar(rn, ScalarSize::Size64, allocs);
+                let rm = pretty_print_vreg_scalar(rm, ScalarSize::Size64, allocs);
+                let cond = cond.pretty_print(0, allocs);
                 format!("fcsel {}, {}, {}, {}", rd, rn, rm, cond)
             }
             &Inst::FpuRound { op, rd, rn } => {
@@ -2729,25 +1914,25 @@ impl Inst {
                     FpuRoundMode::Nearest32 => ("frintn", ScalarSize::Size32),
                     FpuRoundMode::Nearest64 => ("frintn", ScalarSize::Size64),
                 };
-                let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size);
-                let rn = show_vreg_scalar(rn, mb_rru, size);
+                let rd = pretty_print_vreg_scalar(rd.to_reg(), size, allocs);
+                let rn = pretty_print_vreg_scalar(rn, size, allocs);
                 format!("{} {}, {}", inst, rd, rn)
             }
             &Inst::MovToFpu { rd, rn, size } => {
                 let operand_size = size.operand_size();
-                let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size);
-                let rn = show_ireg_sized(rn, mb_rru, operand_size);
+                let rd = pretty_print_vreg_scalar(rd.to_reg(), size, allocs);
+                let rn = pretty_print_ireg(rn, operand_size, allocs);
                 format!("fmov {}, {}", rd, rn)
             }
             &Inst::FpuMoveFPImm { rd, imm, size } => {
-                let imm = imm.show_rru(mb_rru);
-                let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size);
+                let imm = imm.pretty_print(0, allocs);
+                let rd = pretty_print_vreg_scalar(rd.to_reg(), size, allocs);
 
                 format!("fmov {}, {}", rd, imm)
             }
             &Inst::MovToVec { rd, rn, idx, size } => {
-                let rd = show_vreg_element(rd.to_reg(), mb_rru, idx, size);
-                let rn = show_ireg_sized(rn, mb_rru, size.operand_size());
+                let rd = pretty_print_vreg_element(rd.to_reg(), idx as usize, size, allocs);
+                let rn = pretty_print_ireg(rn, size.operand_size(), allocs);
                 format!("mov {}, {}", rd, rn)
             }
             &Inst::MovFromVec { rd, rn, idx, size } => {
@@ -2758,8 +1943,8 @@ impl Inst {
                     VectorSize::Size64x2 => "mov",
                     _ => unimplemented!(),
                 };
-                let rd = show_ireg_sized(rd.to_reg(), mb_rru, size.operand_size());
-                let rn = show_vreg_element(rn, mb_rru, idx, size);
+                let rd = pretty_print_ireg(rd.to_reg(), size.operand_size(), allocs);
+                let rn = pretty_print_vreg_element(rn, idx as usize, size, allocs);
                 format!("{} {}, {}", op, rd, rn)
             }
             &Inst::MovFromVecSigned {
@@ -2769,23 +1954,23 @@ impl Inst {
                 size,
                 scalar_size,
             } => {
-                let rd = show_ireg_sized(rd.to_reg(), mb_rru, scalar_size);
-                let rn = show_vreg_element(rn, mb_rru, idx, size);
+                let rd = pretty_print_ireg(rd.to_reg(), scalar_size, allocs);
+                let rn = pretty_print_vreg_element(rn, idx as usize, size, allocs);
                 format!("smov {}, {}", rd, rn)
             }
             &Inst::VecDup { rd, rn, size } => {
-                let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
-                let rn = show_ireg_sized(rn, mb_rru, size.operand_size());
+                let rd = pretty_print_vreg_vector(rd.to_reg(), size, allocs);
+                let rn = pretty_print_ireg(rn, size.operand_size(), allocs);
                 format!("dup {}, {}", rd, rn)
             }
             &Inst::VecDupFromFpu { rd, rn, size } => {
-                let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
-                let rn = show_vreg_element(rn, mb_rru, 0, size);
+                let rd = pretty_print_vreg_vector(rd.to_reg(), size, allocs);
+                let rn = pretty_print_vreg_element(rn, 0, size, allocs);
                 format!("dup {}, {}", rd, rn)
             }
             &Inst::VecDupFPImm { rd, imm, size } => {
-                let imm = imm.show_rru(mb_rru);
-                let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
+                let imm = imm.pretty_print(0, allocs);
+                let rd = pretty_print_vreg_vector(rd.to_reg(), size, allocs);
 
                 format!("fmov {}, {}", rd, imm)
             }
@@ -2795,9 +1980,9 @@ impl Inst {
                 invert,
                 size,
             } => {
-                let imm = imm.show_rru(mb_rru);
+                let imm = imm.pretty_print(0, allocs);
                 let op = if invert { "mvni" } else { "movi" };
-                let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
+                let rd = pretty_print_vreg_vector(rd.to_reg(), size, allocs);
 
                 format!("{} {}, {}", op, rd, imm)
             }
@@ -2845,8 +2030,8 @@ impl Inst {
                         ("uxtl2", VectorSize::Size64x2, VectorSize::Size32x4)
                     }
                 };
-                let rd = show_vreg_vector(rd.to_reg(), mb_rru, dest);
-                let rn = show_vreg_vector(rn, mb_rru, src);
+                let rd = pretty_print_vreg_vector(rd.to_reg(), dest, allocs);
+                let rn = pretty_print_vreg_vector(rn, src, allocs);
                 format!("{} {}, {}", op, rd, rn)
             }
             &Inst::VecMovElement {
@@ -2856,8 +2041,8 @@ impl Inst {
                 src_idx,
                 size,
             } => {
-                let rd = show_vreg_element(rd.to_reg(), mb_rru, dest_idx, size);
-                let rn = show_vreg_element(rn, mb_rru, src_idx, size);
+                let rd = pretty_print_vreg_element(rd.to_reg(), dest_idx as usize, size, allocs);
+                let rn = pretty_print_vreg_element(rn, src_idx as usize, size, allocs);
                 format!("mov {}, {}", rd, rn)
             }
             &Inst::VecRRLong {
@@ -2898,8 +2083,8 @@ impl Inst {
                         ("shll2", VectorSize::Size64x2, VectorSize::Size32x4, ", #32")
                     }
                 };
-                let rd = show_vreg_vector(rd.to_reg(), mb_rru, rd_size);
-                let rn = show_vreg_vector(rn, mb_rru, size);
+                let rd = pretty_print_vreg_vector(rd.to_reg(), rd_size, allocs);
+                let rn = pretty_print_vreg_vector(rn, size, allocs);
 
                 format!("{} {}, {}{}", op, rd, rn, suffix)
             }
@@ -2995,8 +2180,8 @@ impl Inst {
                         ("fcvtn2", VectorSize::Size32x4, VectorSize::Size64x2)
                     }
                 };
-                let rd = show_vreg_vector(rd.to_reg(), mb_rru, rd_size);
-                let rn = show_vreg_vector(rn, mb_rru, size);
+                let rn = pretty_print_vreg_vector(rn, size, allocs);
+                let rd = pretty_print_vreg_vector(rd.to_reg(), rd_size, allocs);
 
                 format!("{} {}, {}", op, rd, rn)
             }
@@ -3004,8 +2189,8 @@ impl Inst {
                 let op = match op {
                     VecPairOp::Addp => "addp",
                 };
-                let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size64);
-                let rn = show_vreg_vector(rn, mb_rru, VectorSize::Size64x2);
+                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64, allocs);
+                let rn = pretty_print_vreg_vector(rn, VectorSize::Size64x2, allocs);
 
                 format!("{} {}, {}", op, rd, rn)
             }
@@ -3024,8 +2209,8 @@ impl Inst {
                         ("uaddlp", VectorSize::Size32x4, VectorSize::Size16x8)
                     }
                 };
-                let rd = show_vreg_vector(rd.to_reg(), mb_rru, dest);
-                let rn = show_vreg_vector(rn, mb_rru, src);
+                let rd = pretty_print_vreg_vector(rd.to_reg(), dest, allocs);
+                let rn = pretty_print_vreg_vector(rn, src, allocs);
 
                 format!("{} {}, {}", op, rd, rn)
             }
@@ -3075,9 +2260,9 @@ impl Inst {
                     VecALUOp::Zip1 => ("zip1", size),
                     VecALUOp::Sqrdmulh => ("sqrdmulh", size),
                 };
-                let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
-                let rn = show_vreg_vector(rn, mb_rru, size);
-                let rm = show_vreg_vector(rm, mb_rru, size);
+                let rd = pretty_print_vreg_vector(rd.to_reg(), size, allocs);
+                let rn = pretty_print_vreg_vector(rn, size, allocs);
+                let rm = pretty_print_vreg_vector(rm, size, allocs);
                 format!("{} {}, {}, {}", op, rd, rn, rm)
             }
             &Inst::VecRRRLong {
@@ -3143,9 +2328,9 @@ impl Inst {
                         ("umlal2", VectorSize::Size64x2, VectorSize::Size32x4)
                     }
                 };
-                let rd = show_vreg_vector(rd.to_reg(), mb_rru, dest_size);
-                let rn = show_vreg_vector(rn, mb_rru, src_size);
-                let rm = show_vreg_vector(rm, mb_rru, src_size);
+                let rd = pretty_print_vreg_vector(rd.to_reg(), dest_size, allocs);
+                let rn = pretty_print_vreg_vector(rn, src_size, allocs);
+                let rm = pretty_print_vreg_vector(rm, src_size, allocs);
                 format!("{} {}, {}, {}", op, rd, rn, rm)
             }
             &Inst::VecMisc { op, rd, rn, size } => {
@@ -3185,8 +2370,8 @@ impl Inst {
                     VecMisc2::Fcmle0 => ("fcmle", size, ", #0.0"),
                     VecMisc2::Fcmlt0 => ("fcmlt", size, ", #0.0"),
                 };
-                let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
-                let rn = show_vreg_vector(rn, mb_rru, size);
+                let rd = pretty_print_vreg_vector(rd.to_reg(), size, allocs);
+                let rn = pretty_print_vreg_vector(rn, size, allocs);
                 format!("{} {}, {}{}", op, rd, rn, suffix)
             }
             &Inst::VecLanes { op, rd, rn, size } => {
@@ -3194,8 +2379,8 @@ impl Inst {
                     VecLanesOp::Uminv => "uminv",
                     VecLanesOp::Addv => "addv",
                 };
-                let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size.lane_size());
-                let rn = show_vreg_vector(rn, mb_rru, size);
+                let rd = pretty_print_vreg_scalar(rd.to_reg(), size.lane_size(), allocs);
+                let rn = pretty_print_vreg_vector(rn, size, allocs);
                 format!("{} {}, {}", op, rd, rn)
             }
             &Inst::VecShiftImm {
@@ -3210,14 +2395,14 @@ impl Inst {
                     VecShiftImmOp::Ushr => "ushr",
                     VecShiftImmOp::Sshr => "sshr",
                 };
-                let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
-                let rn = show_vreg_vector(rn, mb_rru, size);
+                let rd = pretty_print_vreg_vector(rd.to_reg(), size, allocs);
+                let rn = pretty_print_vreg_vector(rn, size, allocs);
                 format!("{} {}, {}, #{}", op, rd, rn, imm)
             }
             &Inst::VecExtract { rd, rn, rm, imm4 } => {
-                let rd = show_vreg_vector(rd.to_reg(), mb_rru, VectorSize::Size8x16);
-                let rn = show_vreg_vector(rn, mb_rru, VectorSize::Size8x16);
-                let rm = show_vreg_vector(rm, mb_rru, VectorSize::Size8x16);
+                let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16, allocs);
+                let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16, allocs);
+                let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16, allocs);
                 format!("ext {}, {}, {}, #{}", rd, rn, rm, imm4)
             }
             &Inst::VecTbl {
@@ -3227,9 +2412,9 @@ impl Inst {
                 is_extension,
             } => {
                 let op = if is_extension { "tbx" } else { "tbl" };
-                let rd = show_vreg_vector(rd.to_reg(), mb_rru, VectorSize::Size8x16);
-                let rn = show_vreg_vector(rn, mb_rru, VectorSize::Size8x16);
-                let rm = show_vreg_vector(rm, mb_rru, VectorSize::Size8x16);
+                let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16, allocs);
+                let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16, allocs);
+                let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16, allocs);
                 format!("{} {}, {{ {} }}, {}", op, rd, rn, rm)
             }
             &Inst::VecTbl2 {
@@ -3240,34 +2425,34 @@ impl Inst {
                 is_extension,
             } => {
                 let op = if is_extension { "tbx" } else { "tbl" };
-                let rd = show_vreg_vector(rd.to_reg(), mb_rru, VectorSize::Size8x16);
-                let rn = show_vreg_vector(rn, mb_rru, VectorSize::Size8x16);
-                let rn2 = show_vreg_vector(rn2, mb_rru, VectorSize::Size8x16);
-                let rm = show_vreg_vector(rm, mb_rru, VectorSize::Size8x16);
+                let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16, allocs);
+                let rn2 = pretty_print_vreg_vector(rn2, VectorSize::Size8x16, allocs);
+                let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16, allocs);
+                let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16, allocs);
                 format!("{} {}, {{ {}, {} }}, {}", op, rd, rn, rn2, rm)
             }
             &Inst::VecLoadReplicate { rd, rn, size, .. } => {
-                let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
-                let rn = rn.show_rru(mb_rru);
+                let rd = pretty_print_vreg_vector(rd.to_reg(), size, allocs);
+                let rn = pretty_print_reg(rn, allocs);
 
                 format!("ld1r {{ {} }}, [{}]", rd, rn)
             }
             &Inst::VecCSel { rd, rn, rm, cond } => {
-                let rd = show_vreg_vector(rd.to_reg(), mb_rru, VectorSize::Size8x16);
-                let rn = show_vreg_vector(rn, mb_rru, VectorSize::Size8x16);
-                let rm = show_vreg_vector(rm, mb_rru, VectorSize::Size8x16);
-                let cond = cond.show_rru(mb_rru);
+                let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16, allocs);
+                let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16, allocs);
+                let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16, allocs);
+                let cond = cond.pretty_print(0, allocs);
                 format!(
                     "vcsel {}, {}, {}, {} (if-then-else diamond)",
                     rd, rn, rm, cond
                 )
             }
             &Inst::MovToNZCV { rn } => {
-                let rn = rn.show_rru(mb_rru);
+                let rn = pretty_print_reg(rn, allocs);
                 format!("msr nzcv, {}", rn)
             }
             &Inst::MovFromNZCV { rd } => {
-                let rd = rd.to_reg().show_rru(mb_rru);
+                let rd = pretty_print_reg(rd.to_reg(), allocs);
                 format!("mrs {}, nzcv", rd)
             }
             &Inst::Extend {
@@ -3277,8 +2462,8 @@ impl Inst {
                 from_bits: 1,
                 ..
             } => {
-                let rd = show_ireg_sized(rd.to_reg(), mb_rru, OperandSize::Size32);
-                let rn = show_ireg_sized(rn, mb_rru, OperandSize::Size32);
+                let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size32, allocs);
+                let rn = pretty_print_ireg(rn, OperandSize::Size32, allocs);
                 format!("and {}, {}, #1", rd, rn)
             }
             &Inst::Extend {
@@ -3291,8 +2476,8 @@ impl Inst {
                 // The case of a zero extension from 32 to 64 bits, is implemented
                 // with a "mov" to a 32-bit (W-reg) dest, because this zeroes
                 // the top 32 bits.
-                let rd = show_ireg_sized(rd.to_reg(), mb_rru, OperandSize::Size32);
-                let rn = show_ireg_sized(rn, mb_rru, OperandSize::Size32);
+                let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size32, allocs);
+                let rn = pretty_print_ireg(rn, OperandSize::Size32, allocs);
                 format!("mov {}, {}", rd, rn)
             }
             &Inst::Extend {
@@ -3314,8 +2499,8 @@ impl Inst {
                 };
                 if op == "sbfx" || op == "ubfx" {
                     let dest_size = OperandSize::from_bits(to_bits);
-                    let rd = show_ireg_sized(rd.to_reg(), mb_rru, dest_size);
-                    let rn = show_ireg_sized(rn, mb_rru, dest_size);
+                    let rd = pretty_print_ireg(rd.to_reg(), dest_size, allocs);
+                    let rn = pretty_print_ireg(rn, dest_size, allocs);
                     format!("{} {}, {}, #0, #{}", op, rd, rn, from_bits)
                 } else {
                     let dest_size = if signed {
@@ -3323,20 +2508,20 @@ impl Inst {
                     } else {
                         OperandSize::Size32
                     };
-                    let rd = show_ireg_sized(rd.to_reg(), mb_rru, dest_size);
-                    let rn = show_ireg_sized(rn, mb_rru, OperandSize::from_bits(from_bits));
+                    let rd = pretty_print_ireg(rd.to_reg(), dest_size, allocs);
+                    let rn = pretty_print_ireg(rn, OperandSize::from_bits(from_bits), allocs);
                     format!("{} {}, {}", op, rd, rn)
                 }
             }
             &Inst::Call { .. } => format!("bl 0"),
             &Inst::CallInd { ref info, .. } => {
-                let rn = info.rn.show_rru(mb_rru);
+                let rn = pretty_print_reg(info.rn, allocs);
                 format!("blr {}", rn)
             }
-            &Inst::Ret => "ret".to_string(),
+            &Inst::Ret { .. } => "ret".to_string(),
             &Inst::EpiloguePlaceholder => "epilogue placeholder".to_string(),
             &Inst::Jump { ref dest } => {
-                let dest = dest.show_rru(mb_rru);
+                let dest = dest.pretty_print(0, allocs);
                 format!("b {}", dest)
             }
             &Inst::CondBr {
@@ -3344,45 +2529,45 @@ impl Inst {
                 ref not_taken,
                 ref kind,
             } => {
-                let taken = taken.show_rru(mb_rru);
-                let not_taken = not_taken.show_rru(mb_rru);
+                let taken = taken.pretty_print(0, allocs);
+                let not_taken = not_taken.pretty_print(0, allocs);
                 match kind {
                     &CondBrKind::Zero(reg) => {
-                        let reg = reg.show_rru(mb_rru);
+                        let reg = pretty_print_reg(reg, allocs);
                         format!("cbz {}, {} ; b {}", reg, taken, not_taken)
                     }
                     &CondBrKind::NotZero(reg) => {
-                        let reg = reg.show_rru(mb_rru);
+                        let reg = pretty_print_reg(reg, allocs);
                         format!("cbnz {}, {} ; b {}", reg, taken, not_taken)
                     }
                     &CondBrKind::Cond(c) => {
-                        let c = c.show_rru(mb_rru);
+                        let c = c.pretty_print(0, allocs);
                         format!("b.{} {} ; b {}", c, taken, not_taken)
                     }
                 }
             }
             &Inst::IndirectBr { rn, .. } => {
-                let rn = rn.show_rru(mb_rru);
+                let rn = pretty_print_reg(rn, allocs);
                 format!("br {}", rn)
             }
             &Inst::Brk => "brk #0".to_string(),
             &Inst::Udf { .. } => "udf".to_string(),
             &Inst::TrapIf { ref kind, .. } => match kind {
                 &CondBrKind::Zero(reg) => {
-                    let reg = reg.show_rru(mb_rru);
+                    let reg = pretty_print_reg(reg, allocs);
                     format!("cbnz {}, 8 ; udf", reg)
                 }
                 &CondBrKind::NotZero(reg) => {
-                    let reg = reg.show_rru(mb_rru);
+                    let reg = pretty_print_reg(reg, allocs);
                     format!("cbz {}, 8 ; udf", reg)
                 }
                 &CondBrKind::Cond(c) => {
-                    let c = c.invert().show_rru(mb_rru);
+                    let c = c.invert().pretty_print(0, allocs);
                     format!("b.{} 8 ; udf", c)
                 }
             },
             &Inst::Adr { rd, off } => {
-                let rd = rd.show_rru(mb_rru);
+                let rd = pretty_print_reg(rd.to_reg(), allocs);
                 format!("adr {}, pc+{}", rd, off)
             }
             &Inst::Word4 { data } => format!("data.i32 {}", data),
@@ -3394,10 +2579,10 @@ impl Inst {
                 rtmp2,
                 ..
             } => {
-                let ridx = ridx.show_rru(mb_rru);
-                let rtmp1 = rtmp1.show_rru(mb_rru);
-                let rtmp2 = rtmp2.show_rru(mb_rru);
-                let default_target = info.default_target.show_rru(mb_rru);
+                let ridx = pretty_print_reg(ridx, allocs);
+                let rtmp1 = pretty_print_reg(rtmp1.to_reg(), allocs);
+                let rtmp2 = pretty_print_reg(rtmp2.to_reg(), allocs);
+                let default_target = info.default_target.pretty_print(0, allocs);
                 format!(
                     concat!(
                         "b.hs {} ; ",
@@ -3424,7 +2609,7 @@ impl Inst {
                 ref name,
                 offset,
             } => {
-                let rd = rd.show_rru(mb_rru);
+                let rd = pretty_print_reg(rd.to_reg(), allocs);
                 format!("ldr {}, 8 ; b 12 ; data {:?} + {}", rd, name, offset)
             }
             &Inst::LoadAddr { rd, ref mem } => {
@@ -3432,10 +2617,14 @@ impl Inst {
                 // this logic between `emit()` and `show_rru()` -- a separate 1-to-N
                 // expansion stage (i.e., legalization, but without the slow edit-in-place
                 // of the existing legalization framework).
-                let (mem_insts, mem) = mem_finalize(0, mem, state);
+                let rd = allocs.next_writable(rd);
+                let mem = mem.with_allocs(allocs);
+                let (mem_insts, mem) = mem_finalize(0, &mem, state);
                 let mut ret = String::new();
                 for inst in mem_insts.into_iter() {
-                    ret.push_str(&inst.show_rru(mb_rru));
+                    ret.push_str(
+                        &inst.print_with_state(&mut EmitState::default(), &mut empty_allocs),
+                    );
                 }
                 let (reg, index_reg, offset) = match mem {
                     AMode::RegExtended(r, idx, extendop) => (r, Some((idx, extendop)), 0),
@@ -3462,10 +2651,14 @@ impl Inst {
                         extendop,
                     };
 
-                    ret.push_str(&add.show_rru(mb_rru));
+                    ret.push_str(
+                        &add.print_with_state(&mut EmitState::default(), &mut empty_allocs),
+                    );
                 } else if offset == 0 {
                     let mov = Inst::gen_move(rd, reg, I64);
-                    ret.push_str(&mov.show_rru(mb_rru));
+                    ret.push_str(
+                        &mov.print_with_state(&mut EmitState::default(), &mut empty_allocs),
+                    );
                 } else if let Some(imm12) = Imm12::maybe_from_u64(abs_offset) {
                     let add = Inst::AluRRImm12 {
                         alu_op,
@@ -3474,11 +2667,15 @@ impl Inst {
                         rn: reg,
                         imm12,
                     };
-                    ret.push_str(&add.show_rru(mb_rru));
+                    ret.push_str(
+                        &add.print_with_state(&mut EmitState::default(), &mut empty_allocs),
+                    );
                 } else {
                     let tmp = writable_spilltmp_reg();
                     for inst in Inst::load_constant(tmp, abs_offset).into_iter() {
-                        ret.push_str(&inst.show_rru(mb_rru));
+                        ret.push_str(
+                            &inst.print_with_state(&mut EmitState::default(), &mut empty_allocs),
+                        );
                     }
                     let add = Inst::AluRRR {
                         alu_op,
@@ -3487,7 +2684,9 @@ impl Inst {
                         rn: reg,
                         rm: tmp.to_reg(),
                     };
-                    ret.push_str(&add.show_rru(mb_rru));
+                    ret.push_str(
+                        &add.print_with_state(&mut EmitState::default(), &mut empty_allocs),
+                    );
                 }
                 ret
             }
@@ -3500,14 +2699,13 @@ impl Inst {
             &Inst::ElfTlsGetAddr { ref symbol } => {
                 format!("elf_tls_get_addr {}", symbol)
             }
-
-            &Inst::ValueLabelMarker { label, reg } => {
-                format!("value_label {:?}, {}", label, reg.show_rru(mb_rru))
-            }
-
             &Inst::Unwind { ref inst } => {
                 format!("unwind {:?}", inst)
             }
+            &Inst::DummyUse { reg } => {
+                let reg = pretty_print_reg(reg, allocs);
+                format!("dummy_use {}", reg)
+            }
         }
     }
 }
diff --git a/cranelift/codegen/src/isa/aarch64/inst/regs.rs b/cranelift/codegen/src/isa/aarch64/inst/regs.rs
index 0b4babe04a..ad74d662b4 100644
--- a/cranelift/codegen/src/isa/aarch64/inst/regs.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/regs.rs
@@ -3,11 +3,13 @@
 use crate::isa::aarch64::inst::OperandSize;
 use crate::isa::aarch64::inst::ScalarSize;
 use crate::isa::aarch64::inst::VectorSize;
+use crate::machinst::AllocationConsumer;
+use crate::machinst::RealReg;
+use crate::machinst::{Reg, RegClass, Writable};
 use crate::settings;
-
-use regalloc::{
-    PrettyPrint, RealRegUniverse, Reg, RegClass, RegClassInfo, Writable, NUM_REG_CLASSES,
-};
+use regalloc2::MachineEnv;
+use regalloc2::PReg;
+use regalloc2::VReg;
 
 use std::string::{String, ToString};
 
@@ -19,40 +21,12 @@ use std::string::{String, ToString};
 /// https://searchfox.org/mozilla-central/source/js/src/jit/arm64/Assembler-arm64.h#103
 pub const PINNED_REG: u8 = 21;
 
-#[rustfmt::skip]
-const XREG_INDICES: [u8; 31] = [
-    // X0 - X7
-    32, 33, 34, 35, 36, 37, 38, 39,
-    // X8 - X15
-    40, 41, 42, 43, 44, 45, 46, 47,
-    // X16, X17
-    58, 59,
-    // X18
-    60,
-    // X19, X20
-    48, 49,
-    // X21, put aside because it's the pinned register.
-    57,
-    // X22 - X28
-    50, 51, 52, 53, 54, 55, 56,
-    // X29 (FP)
-    61,
-    // X30 (LR)
-    62,
-];
-
-const ZERO_REG_INDEX: u8 = 63;
-
-const SP_REG_INDEX: u8 = 64;
-
-/// Get a reference to an X-register (integer register).
+/// Get a reference to an X-register (integer register). Do not use
+/// this for xsp / xzr; we have two special registers for those.
 pub fn xreg(num: u8) -> Reg {
     assert!(num < 31);
-    Reg::new_real(
-        RegClass::I64,
-        /* enc = */ num,
-        /* index = */ XREG_INDICES[num as usize],
-    )
+    let preg = PReg::new(num as usize, RegClass::Int);
+    Reg::from(VReg::new(preg.index(), RegClass::Int))
 }
 
 /// Get a writable reference to an X-register.
@@ -63,7 +37,8 @@ pub fn writable_xreg(num: u8) -> Writable<Reg> {
 /// Get a reference to a V-register (vector/FP register).
 pub fn vreg(num: u8) -> Reg {
     assert!(num < 32);
-    Reg::new_real(RegClass::V128, /* enc = */ num, /* index = */ num)
+    let preg = PReg::new(num as usize, RegClass::Float);
+    Reg::from(VReg::new(preg.index(), RegClass::Float))
 }
 
 /// Get a writable reference to a V-register.
@@ -73,13 +48,8 @@ pub fn writable_vreg(num: u8) -> Writable<Reg> {
 
 /// Get a reference to the zero-register.
 pub fn zero_reg() -> Reg {
-    // This should be the same as what xreg(31) returns, except that
-    // we use the special index into the register index space.
-    Reg::new_real(
-        RegClass::I64,
-        /* enc = */ 31,
-        /* index = */ ZERO_REG_INDEX,
-    )
+    let preg = PReg::new(31, RegClass::Int);
+    Reg::from(VReg::new(preg.index(), RegClass::Int))
 }
 
 /// Get a writable reference to the zero-register (this discards a result).
@@ -89,16 +59,19 @@ pub fn writable_zero_reg() -> Writable<Reg> {
 
 /// Get a reference to the stack-pointer register.
 pub fn stack_reg() -> Reg {
-    // XSP (stack) and XZR (zero) are logically different registers which have
-    // the same hardware encoding, and whose meaning, in real aarch64
-    // instructions, is context-dependent.  For convenience of
-    // universe-construction and for correct printing, we make them be two
-    // different real registers.
-    Reg::new_real(
-        RegClass::I64,
-        /* enc = */ 31,
-        /* index = */ SP_REG_INDEX,
-    )
+    // XSP (stack) and XZR (zero) are logically different registers
+    // which have the same hardware encoding, and whose meaning, in
+    // real aarch64 instructions, is context-dependent. For extra
+    // correctness assurances and for correct printing, we make them
+    // be two different real registers from a regalloc perspective.
+    //
+    // We represent XZR as if it were xreg(31); XSP is xreg(31 +
+    // 32). The PReg bit-packing allows 6 bits (64 registers) so we
+    // make use of this extra space to distinguish xzr and xsp. We
+    // mask off the 6th bit (hw_enc & 31) to get the actual hardware
+    // register encoding.
+    let preg = PReg::new(31 + 32, RegClass::Int);
+    Reg::from(VReg::new(preg.index(), RegClass::Int))
 }
 
 /// Get a writable reference to the stack-pointer register.
@@ -159,158 +132,193 @@ pub fn writable_tmp2_reg() -> Writable<Reg> {
 }
 
 /// Create the register universe for AArch64.
-pub fn create_reg_universe(flags: &settings::Flags) -> RealRegUniverse {
-    let mut regs = vec![];
-    let mut allocable_by_class = [None; NUM_REG_CLASSES];
-
-    // Numbering Scheme: we put V-regs first, then X-regs. The X-regs exclude several registers:
-    // x18 (globally reserved for platform-specific purposes), x29 (frame pointer), x30 (link
-    // register), x31 (stack pointer or zero register, depending on context).
-
-    let v_reg_base = 0u8; // in contiguous real-register index space
-    let v_reg_count = 32;
-    for i in 0u8..v_reg_count {
-        let reg = Reg::new_real(
-            RegClass::V128,
-            /* enc = */ i,
-            /* index = */ v_reg_base + i,
-        )
-        .to_real_reg();
-        let name = format!("v{}", i);
-        regs.push((reg, name));
+pub fn create_reg_env(flags: &settings::Flags) -> MachineEnv {
+    fn preg(r: Reg) -> PReg {
+        r.to_real_reg().unwrap().into()
     }
-    let v_reg_last = v_reg_base + v_reg_count - 1;
 
-    // Add the X registers. N.B.: the order here must match the order implied
-    // by XREG_INDICES, ZERO_REG_INDEX, and SP_REG_INDEX above.
-
-    let x_reg_base = 32u8; // in contiguous real-register index space
-    let mut x_reg_count = 0;
-
-    let uses_pinned_reg = flags.enable_pinned_reg();
-
-    for i in 0u8..32u8 {
-        // See above for excluded registers.
-        if i == 16 || i == 17 || i == 18 || i == 29 || i == 30 || i == 31 || i == PINNED_REG {
-            continue;
-        }
-        let reg = Reg::new_real(
-            RegClass::I64,
-            /* enc = */ i,
-            /* index = */ x_reg_base + x_reg_count,
-        )
-        .to_real_reg();
-        let name = format!("x{}", i);
-        regs.push((reg, name));
-        x_reg_count += 1;
-    }
-    let x_reg_last = x_reg_base + x_reg_count - 1;
-
-    allocable_by_class[RegClass::I64.rc_to_usize()] = Some(RegClassInfo {
-        first: x_reg_base as usize,
-        last: x_reg_last as usize,
-        suggested_scratch: Some(XREG_INDICES[19] as usize),
-    });
-    allocable_by_class[RegClass::V128.rc_to_usize()] = Some(RegClassInfo {
-        first: v_reg_base as usize,
-        last: v_reg_last as usize,
-        suggested_scratch: Some(/* V31: */ 31),
-    });
-
-    // Other regs, not available to the allocator.
-    let allocable = if uses_pinned_reg {
-        // The pinned register is not allocatable in this case, so record the length before adding
-        // it.
-        let len = regs.len();
-        regs.push((xreg(PINNED_REG).to_real_reg(), "x21/pinned_reg".to_string()));
-        len
-    } else {
-        regs.push((xreg(PINNED_REG).to_real_reg(), "x21".to_string()));
-        regs.len()
+    let mut env = MachineEnv {
+        preferred_regs_by_class: [
+            vec![
+                preg(xreg(0)),
+                preg(xreg(1)),
+                preg(xreg(2)),
+                preg(xreg(3)),
+                preg(xreg(4)),
+                preg(xreg(5)),
+                preg(xreg(6)),
+                preg(xreg(7)),
+                preg(xreg(8)),
+                preg(xreg(9)),
+                preg(xreg(10)),
+                preg(xreg(11)),
+                preg(xreg(12)),
+                preg(xreg(13)),
+                preg(xreg(14)),
+                preg(xreg(15)),
+                // x16 and x17 are spilltmp and tmp2 (see above).
+                // x19-28 are callee-saved and so not preferred.
+                // x21 is the pinned register (if enabled) and not allocatable if so.
+                // x29 is FP, x30 is LR, x31 is SP/ZR.
+            ],
+            vec![
+                preg(vreg(0)),
+                preg(vreg(1)),
+                preg(vreg(2)),
+                preg(vreg(3)),
+                preg(vreg(4)),
+                preg(vreg(5)),
+                preg(vreg(6)),
+                preg(vreg(7)),
+                preg(vreg(8)),
+                preg(vreg(9)),
+                preg(vreg(10)),
+                preg(vreg(11)),
+                preg(vreg(12)),
+                preg(vreg(13)),
+                preg(vreg(14)),
+                preg(vreg(15)),
+            ],
+        ],
+        non_preferred_regs_by_class: [
+            vec![
+                preg(xreg(19)),
+                preg(xreg(20)),
+                // x21 is pinned reg if enabled; we add to this list below if not.
+                preg(xreg(22)),
+                preg(xreg(23)),
+                preg(xreg(24)),
+                preg(xreg(25)),
+                preg(xreg(26)),
+                preg(xreg(27)),
+                preg(xreg(28)),
+            ],
+            vec![
+                preg(vreg(16)),
+                preg(vreg(17)),
+                preg(vreg(18)),
+                preg(vreg(19)),
+                preg(vreg(20)),
+                preg(vreg(21)),
+                preg(vreg(22)),
+                preg(vreg(23)),
+                preg(vreg(24)),
+                preg(vreg(25)),
+                preg(vreg(26)),
+                preg(vreg(27)),
+                preg(vreg(28)),
+                preg(vreg(29)),
+                preg(vreg(30)),
+                // v31 is the scratch reg, to allow for parallel moves.
+            ],
+        ],
+        scratch_by_class: [
+            // We use tmp2 (x17) as the regalloc scratch register,
+            // used to resolve cyclic parallel moves. This is valid
+            // because tmp2 is never live between regalloc-visible
+            // instructions, only within them (i.e. in expansion into
+            // multiple machine instructions when that
+            // occurs). spilltmp is used for moves to/from spillslots,
+            // but tmp2 never is, so it is available for this
+            // purpose. (Its only other use is in prologue stack
+            // checks, and the prologue is prepended after regalloc
+            // runs.)
+            preg(tmp2_reg()),
+            // We use v31 for Float/Vec-class parallel moves.
+            preg(vreg(31)),
+        ],
+        fixed_stack_slots: vec![],
     };
 
-    regs.push((xreg(16).to_real_reg(), "x16".to_string()));
-    regs.push((xreg(17).to_real_reg(), "x17".to_string()));
-    regs.push((xreg(18).to_real_reg(), "x18".to_string()));
-    regs.push((fp_reg().to_real_reg(), "fp".to_string()));
-    regs.push((link_reg().to_real_reg(), "lr".to_string()));
-    regs.push((zero_reg().to_real_reg(), "xzr".to_string()));
-    regs.push((stack_reg().to_real_reg(), "sp".to_string()));
-
-    // FIXME JRS 2020Feb06: unfortunately this pushes the number of real regs
-    // to 65, which is potentially inconvenient from a compiler performance
-    // standpoint.  We could possibly drop back to 64 by "losing" a vector
-    // register in future.
-
-    // Assert sanity: the indices in the register structs must match their
-    // actual indices in the array.
-    for (i, reg) in regs.iter().enumerate() {
-        assert_eq!(i, reg.0.get_index());
+    if !flags.enable_pinned_reg() {
+        debug_assert_eq!(PINNED_REG, 21); // We assumed this above in hardcoded reg list.
+        env.non_preferred_regs_by_class[0].push(preg(xreg(PINNED_REG)));
     }
 
-    RealRegUniverse {
-        regs,
-        allocable,
-        allocable_by_class,
+    env
+}
+
+// PrettyPrint cannot be implemented for Reg; we need to invoke
+// backend-specific functions from higher level (inst, arg, ...)
+// types.
+
+fn show_ireg(reg: RealReg) -> String {
+    match reg.hw_enc() {
+        29 => "fp".to_string(),
+        30 => "lr".to_string(),
+        31 => "xzr".to_string(),
+        63 => "sp".to_string(),
+        x => {
+            debug_assert!(x < 29);
+            format!("x{}", x)
+        }
     }
 }
 
-/// If `ireg` denotes an I64-classed reg, make a best-effort attempt to show
+fn show_vreg(reg: RealReg) -> String {
+    format!("v{}", reg.hw_enc() & 31)
+}
+
+fn show_reg(reg: Reg) -> String {
+    if let Some(rreg) = reg.to_real_reg() {
+        match rreg.class() {
+            RegClass::Int => show_ireg(rreg),
+            RegClass::Float => show_vreg(rreg),
+        }
+    } else {
+        format!("%{:?}", reg)
+    }
+}
+
+pub fn pretty_print_reg(reg: Reg, allocs: &mut AllocationConsumer<'_>) -> String {
+    let reg = allocs.next(reg);
+    show_reg(reg)
+}
+
+/// If `ireg` denotes an Int-classed reg, make a best-effort attempt to show
 /// its name at the 32-bit size.
-pub fn show_ireg_sized(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: OperandSize) -> String {
-    let mut s = reg.show_rru(mb_rru);
-    if reg.get_class() != RegClass::I64 || !size.is32() {
+pub fn show_ireg_sized(reg: Reg, size: OperandSize) -> String {
+    let mut s = show_reg(reg);
+    if reg.class() != RegClass::Int || !size.is32() {
         // We can't do any better.
         return s;
     }
 
-    if reg.is_real() {
-        // Change (eg) "x42" into "w42" as appropriate
-        if reg.get_class() == RegClass::I64 && size.is32() && s.starts_with("x") {
-            s = "w".to_string() + &s[1..];
-        }
-    } else {
-        // Add a "w" suffix to RegClass::I64 vregs used in a 32-bit role
-        if reg.get_class() == RegClass::I64 && size.is32() {
-            s.push('w');
-        }
+    // Change (eg) "x42" into "w42" as appropriate
+    if reg.class() == RegClass::Int && size.is32() && s.starts_with("x") {
+        s = "w".to_string() + &s[1..];
     }
+
     s
 }
 
 /// Show a vector register used in a scalar context.
-pub fn show_vreg_scalar(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: ScalarSize) -> String {
-    let mut s = reg.show_rru(mb_rru);
-    if reg.get_class() != RegClass::V128 {
+pub fn show_vreg_scalar(reg: Reg, size: ScalarSize) -> String {
+    let mut s = show_reg(reg);
+    if reg.class() != RegClass::Float {
         // We can't do any better.
         return s;
     }
 
-    if reg.is_real() {
-        // Change (eg) "v0" into "d0".
-        if s.starts_with("v") {
-            let replacement = match size {
-                ScalarSize::Size8 => "b",
-                ScalarSize::Size16 => "h",
-                ScalarSize::Size32 => "s",
-                ScalarSize::Size64 => "d",
-                ScalarSize::Size128 => "q",
-            };
-            s.replace_range(0..1, replacement);
-        }
-    } else {
-        // Add a "d" suffix to RegClass::V128 vregs.
-        if reg.get_class() == RegClass::V128 {
-            s.push('d');
-        }
+    // Change (eg) "v0" into "d0".
+    if s.starts_with("v") {
+        let replacement = match size {
+            ScalarSize::Size8 => "b",
+            ScalarSize::Size16 => "h",
+            ScalarSize::Size32 => "s",
+            ScalarSize::Size64 => "d",
+            ScalarSize::Size128 => "q",
+        };
+        s.replace_range(0..1, replacement);
     }
+
     s
 }
 
 /// Show a vector register.
-pub fn show_vreg_vector(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: VectorSize) -> String {
-    assert_eq!(RegClass::V128, reg.get_class());
-    let mut s = reg.show_rru(mb_rru);
+pub fn show_vreg_vector(reg: Reg, size: VectorSize) -> String {
+    assert_eq!(RegClass::Float, reg.class());
+    let mut s = show_reg(reg);
 
     let suffix = match size {
         VectorSize::Size8x8 => ".8b",
@@ -327,25 +335,54 @@ pub fn show_vreg_vector(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: Vector
 }
 
 /// Show an indexed vector element.
-pub fn show_vreg_element(
-    reg: Reg,
-    mb_rru: Option<&RealRegUniverse>,
-    idx: u8,
-    size: VectorSize,
-) -> String {
-    assert_eq!(RegClass::V128, reg.get_class());
-    let mut s = reg.show_rru(mb_rru);
-
+pub fn show_vreg_element(reg: Reg, idx: u8, size: VectorSize) -> String {
+    assert_eq!(RegClass::Float, reg.class());
+    let s = show_reg(reg);
     let suffix = match size {
-        VectorSize::Size8x8 => "b",
-        VectorSize::Size8x16 => "b",
-        VectorSize::Size16x4 => "h",
-        VectorSize::Size16x8 => "h",
-        VectorSize::Size32x2 => "s",
-        VectorSize::Size32x4 => "s",
-        VectorSize::Size64x2 => "d",
+        VectorSize::Size8x8 => ".b",
+        VectorSize::Size8x16 => ".b",
+        VectorSize::Size16x4 => ".h",
+        VectorSize::Size16x8 => ".h",
+        VectorSize::Size32x2 => ".s",
+        VectorSize::Size32x4 => ".s",
+        VectorSize::Size64x2 => ".d",
     };
-
-    s.push_str(&format!(".{}[{}]", suffix, idx));
-    s
+    format!("{}{}[{}]", s, suffix, idx)
+}
+
+pub fn pretty_print_ireg(
+    reg: Reg,
+    size: OperandSize,
+    allocs: &mut AllocationConsumer<'_>,
+) -> String {
+    let reg = allocs.next(reg);
+    show_ireg_sized(reg, size)
+}
+
+pub fn pretty_print_vreg_scalar(
+    reg: Reg,
+    size: ScalarSize,
+    allocs: &mut AllocationConsumer<'_>,
+) -> String {
+    let reg = allocs.next(reg);
+    show_vreg_scalar(reg, size)
+}
+
+pub fn pretty_print_vreg_vector(
+    reg: Reg,
+    size: VectorSize,
+    allocs: &mut AllocationConsumer<'_>,
+) -> String {
+    let reg = allocs.next(reg);
+    show_vreg_vector(reg, size)
+}
+
+pub fn pretty_print_vreg_element(
+    reg: Reg,
+    idx: usize,
+    size: VectorSize,
+    allocs: &mut AllocationConsumer<'_>,
+) -> String {
+    let reg = allocs.next(reg);
+    show_vreg_element(reg, idx as u8, size)
 }
diff --git a/cranelift/codegen/src/isa/aarch64/inst/unwind/systemv.rs b/cranelift/codegen/src/isa/aarch64/inst/unwind/systemv.rs
index 12651427b6..77f65862a3 100644
--- a/cranelift/codegen/src/isa/aarch64/inst/unwind/systemv.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/unwind/systemv.rs
@@ -2,8 +2,8 @@
 
 use crate::isa::aarch64::inst::regs;
 use crate::isa::unwind::systemv::RegisterMappingError;
+use crate::machinst::{Reg, RegClass};
 use gimli::{write::CommonInformationEntry, Encoding, Format, Register};
-use regalloc::{Reg, RegClass};
 
 /// Creates a new aarch64 common information entry (CIE).
 pub fn create_cie() -> CommonInformationEntry {
@@ -17,11 +17,11 @@ pub fn create_cie() -> CommonInformationEntry {
         },
         4,  // Code alignment factor
         -8, // Data alignment factor
-        Register(regs::link_reg().get_hw_encoding().into()),
+        Register(regs::link_reg().to_real_reg().unwrap().hw_enc().into()),
     );
 
     // Every frame will start with the call frame address (CFA) at SP
-    let sp = Register(regs::stack_reg().get_hw_encoding().into());
+    let sp = Register((regs::stack_reg().to_real_reg().unwrap().hw_enc() & 31).into());
     entry.add_instruction(CallFrameInstruction::Cfa(sp, 0));
 
     entry
@@ -34,16 +34,15 @@ pub fn map_reg(reg: Reg) -> Result<Register, RegisterMappingError> {
     // https://developer.arm.com/documentation/ihi0057/e/?lang=en#dwarf-register-names
     //
     // X0--X31 is 0--31; V0--V31 is 64--95.
-    match reg.get_class() {
-        RegClass::I64 => {
-            let reg = reg.get_hw_encoding() as u16;
+    match reg.class() {
+        RegClass::Int => {
+            let reg = (reg.to_real_reg().unwrap().hw_enc() & 31) as u16;
             Ok(Register(reg))
         }
-        RegClass::V128 => {
-            let reg = reg.get_hw_encoding() as u16;
+        RegClass::Float => {
+            let reg = reg.to_real_reg().unwrap().hw_enc() as u16;
             Ok(Register(64 + reg))
         }
-        _ => Err(RegisterMappingError::UnsupportedRegisterBank("class?")),
     }
 }
 
@@ -54,13 +53,13 @@ impl crate::isa::unwind::systemv::RegisterMapper<Reg> for RegisterMapper {
         Ok(map_reg(reg)?.0)
     }
     fn sp(&self) -> u16 {
-        regs::stack_reg().get_hw_encoding().into()
+        (regs::stack_reg().to_real_reg().unwrap().hw_enc() & 31).into()
     }
     fn fp(&self) -> Option<u16> {
-        Some(regs::fp_reg().get_hw_encoding().into())
+        Some(regs::fp_reg().to_real_reg().unwrap().hw_enc().into())
     }
     fn lr(&self) -> Option<u16> {
-        Some(regs::link_reg().get_hw_encoding().into())
+        Some(regs::link_reg().to_real_reg().unwrap().hw_enc().into())
     }
     fn lr_offset(&self) -> Option<u32> {
         Some(8)
diff --git a/cranelift/codegen/src/isa/aarch64/lower.rs b/cranelift/codegen/src/isa/aarch64/lower.rs
index 6ff8e0dedf..c84ed6afb5 100644
--- a/cranelift/codegen/src/isa/aarch64/lower.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower.rs
@@ -7,21 +7,18 @@
 //!
 //! - Floating-point immediates (FIMM instruction).
 
+use super::lower_inst;
+use crate::data_value::DataValue;
 use crate::ir::condcodes::{FloatCC, IntCC};
 use crate::ir::types::*;
 use crate::ir::Inst as IRInst;
 use crate::ir::{Opcode, Type, Value};
-use crate::machinst::lower::*;
-use crate::machinst::*;
-use crate::{CodegenError, CodegenResult};
-
 use crate::isa::aarch64::inst::*;
 use crate::isa::aarch64::AArch64Backend;
-
-use super::lower_inst;
-
-use crate::data_value::DataValue;
-use regalloc::{Reg, Writable};
+use crate::machinst::lower::*;
+use crate::machinst::*;
+use crate::machinst::{Reg, Writable};
+use crate::{CodegenError, CodegenResult};
 use smallvec::SmallVec;
 use std::cmp;
 
diff --git a/cranelift/codegen/src/isa/aarch64/lower/isle.rs b/cranelift/codegen/src/isa/aarch64/lower/isle.rs
index 73e5b6d4b7..da397aa660 100644
--- a/cranelift/codegen/src/isa/aarch64/lower/isle.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower/isle.rs
@@ -17,9 +17,8 @@ use crate::{
     binemit::CodeOffset,
     ir::{
         immediates::*, types::*, ExternalName, Inst, InstructionData, MemFlags, TrapCode, Value,
-        ValueLabel, ValueList,
+        ValueList,
     },
-    isa::aarch64::inst::aarch64_map_regs,
     isa::aarch64::inst::args::{ShiftOp, ShiftOpShiftImm},
     isa::unwind::UnwindInst,
     machinst::{ty_bits, InsnOutput, LowerCtx},
@@ -45,15 +44,9 @@ pub(crate) fn lower<C>(
 where
     C: LowerCtx<I = MInst>,
 {
-    lower_common(
-        lower_ctx,
-        flags,
-        isa_flags,
-        outputs,
-        inst,
-        |cx, insn| generated_code::constructor_lower(cx, insn),
-        aarch64_map_regs,
-    )
+    lower_common(lower_ctx, flags, isa_flags, outputs, inst, |cx, insn| {
+        generated_code::constructor_lower(cx, insn)
+    })
 }
 
 pub struct ExtendedValue {
@@ -200,11 +193,7 @@ where
     }
 
     fn emit(&mut self, inst: &MInst) -> Unit {
-        self.emitted_insts.push((inst.clone(), false));
-    }
-
-    fn emit_safepoint(&mut self, inst: &MInst) -> Unit {
-        self.emitted_insts.push((inst.clone(), true));
+        self.lower_ctx.emit(inst.clone());
     }
 
     fn cond_br_zero(&mut self, reg: Reg) -> CondBrKind {
diff --git a/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.manifest b/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.manifest
index 08ec6024da..0d989e229c 100644
--- a/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.manifest
+++ b/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.manifest
@@ -1,4 +1,4 @@
 src/clif.isle 443b34b797fc8ace
-src/prelude.isle c0751050a11e2686
-src/isa/aarch64/inst.isle 19ccefb6a496d392
+src/prelude.isle afd037c4d91c875c
+src/isa/aarch64/inst.isle 544b7126192140d5
 src/isa/aarch64/lower.isle d88b62dd6b40622
diff --git a/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.rs b/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.rs
index a73e90405b..684aa0be22 100644
--- a/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.rs
@@ -79,7 +79,6 @@ pub trait Context {
     fn def_inst(&mut self, arg0: Value) -> Option<Inst>;
     fn offset32_to_u32(&mut self, arg0: Offset32) -> u32;
     fn emit(&mut self, arg0: &MInst) -> Unit;
-    fn emit_safepoint(&mut self, arg0: &MInst) -> Unit;
     fn trap_code_division_by_zero(&mut self) -> TrapCode;
     fn trap_code_integer_overflow(&mut self) -> TrapCode;
     fn trap_code_bad_conversion_to_integer(&mut self) -> TrapCode;
@@ -132,13 +131,13 @@ pub trait Context {
     fn rotr_opposite_amount(&mut self, arg0: Type, arg1: ImmShift) -> ImmShift;
 }
 
-/// Internal type SideEffectNoResult: defined at src/prelude.isle line 405.
+/// Internal type SideEffectNoResult: defined at src/prelude.isle line 402.
 #[derive(Clone, Debug)]
 pub enum SideEffectNoResult {
     Inst { inst: MInst },
 }
 
-/// Internal type ProducesFlags: defined at src/prelude.isle line 427.
+/// Internal type ProducesFlags: defined at src/prelude.isle line 418.
 #[derive(Clone, Debug)]
 pub enum ProducesFlags {
     ProducesFlagsSideEffect { inst: MInst },
@@ -146,7 +145,7 @@ pub enum ProducesFlags {
     ProducesFlagsReturnsResultWithConsumer { inst: MInst, result: Reg },
 }
 
-/// Internal type ConsumesFlags: defined at src/prelude.isle line 438.
+/// Internal type ConsumesFlags: defined at src/prelude.isle line 429.
 #[derive(Clone, Debug)]
 pub enum ConsumesFlags {
     ConsumesFlagsReturnsResultWithProducer {
@@ -681,7 +680,9 @@ pub enum MInst {
     CallInd {
         info: BoxCallIndInfo,
     },
-    Ret,
+    Ret {
+        rets: VecReg,
+    },
     EpiloguePlaceholder,
     Jump {
         dest: BranchTarget,
@@ -737,16 +738,15 @@ pub enum MInst {
     ElfTlsGetAddr {
         symbol: ExternalName,
     },
-    ValueLabelMarker {
-        reg: Reg,
-        label: ValueLabel,
-    },
     Unwind {
         inst: UnwindInst,
     },
+    DummyUse {
+        reg: Reg,
+    },
 }
 
-/// Internal type ALUOp: defined at src/isa/aarch64/inst.isle line 796.
+/// Internal type ALUOp: defined at src/isa/aarch64/inst.isle line 795.
 #[derive(Copy, Clone, PartialEq, Eq, Debug)]
 pub enum ALUOp {
     Add,
@@ -774,7 +774,7 @@ pub enum ALUOp {
     SbcS,
 }
 
-/// Internal type ALUOp3: defined at src/isa/aarch64/inst.isle line 834.
+/// Internal type ALUOp3: defined at src/isa/aarch64/inst.isle line 833.
 #[derive(Copy, Clone, PartialEq, Eq, Debug)]
 pub enum ALUOp3 {
     MAdd32,
@@ -783,7 +783,7 @@ pub enum ALUOp3 {
     MSub64,
 }
 
-/// Internal type BitOp: defined at src/isa/aarch64/inst.isle line 877.
+/// Internal type BitOp: defined at src/isa/aarch64/inst.isle line 876.
 #[derive(Copy, Clone, PartialEq, Eq, Debug)]
 pub enum BitOp {
     RBit,
@@ -791,7 +791,7 @@ pub enum BitOp {
     Cls,
 }
 
-/// Internal type FPUOp1: defined at src/isa/aarch64/inst.isle line 944.
+/// Internal type FPUOp1: defined at src/isa/aarch64/inst.isle line 943.
 #[derive(Copy, Clone, PartialEq, Eq, Debug)]
 pub enum FPUOp1 {
     Abs32,
@@ -804,7 +804,7 @@ pub enum FPUOp1 {
     Cvt64To32,
 }
 
-/// Internal type FPUOp2: defined at src/isa/aarch64/inst.isle line 957.
+/// Internal type FPUOp2: defined at src/isa/aarch64/inst.isle line 956.
 #[derive(Copy, Clone, PartialEq, Eq, Debug)]
 pub enum FPUOp2 {
     Add32,
@@ -825,14 +825,14 @@ pub enum FPUOp2 {
     Uqsub64,
 }
 
-/// Internal type FPUOp3: defined at src/isa/aarch64/inst.isle line 982.
+/// Internal type FPUOp3: defined at src/isa/aarch64/inst.isle line 981.
 #[derive(Copy, Clone, PartialEq, Eq, Debug)]
 pub enum FPUOp3 {
     MAdd32,
     MAdd64,
 }
 
-/// Internal type FpuToIntOp: defined at src/isa/aarch64/inst.isle line 989.
+/// Internal type FpuToIntOp: defined at src/isa/aarch64/inst.isle line 988.
 #[derive(Copy, Clone, PartialEq, Eq, Debug)]
 pub enum FpuToIntOp {
     F32ToU32,
@@ -845,7 +845,7 @@ pub enum FpuToIntOp {
     F64ToI64,
 }
 
-/// Internal type IntToFpuOp: defined at src/isa/aarch64/inst.isle line 1002.
+/// Internal type IntToFpuOp: defined at src/isa/aarch64/inst.isle line 1001.
 #[derive(Copy, Clone, PartialEq, Eq, Debug)]
 pub enum IntToFpuOp {
     U32ToF32,
@@ -858,7 +858,7 @@ pub enum IntToFpuOp {
     I64ToF64,
 }
 
-/// Internal type FpuRoundMode: defined at src/isa/aarch64/inst.isle line 1016.
+/// Internal type FpuRoundMode: defined at src/isa/aarch64/inst.isle line 1015.
 #[derive(Copy, Clone, PartialEq, Eq, Debug)]
 pub enum FpuRoundMode {
     Minus32,
@@ -871,7 +871,7 @@ pub enum FpuRoundMode {
     Nearest64,
 }
 
-/// Internal type VecExtendOp: defined at src/isa/aarch64/inst.isle line 1029.
+/// Internal type VecExtendOp: defined at src/isa/aarch64/inst.isle line 1028.
 #[derive(Copy, Clone, PartialEq, Eq, Debug)]
 pub enum VecExtendOp {
     Sxtl8,
@@ -882,7 +882,7 @@ pub enum VecExtendOp {
     Uxtl32,
 }
 
-/// Internal type VecALUOp: defined at src/isa/aarch64/inst.isle line 1046.
+/// Internal type VecALUOp: defined at src/isa/aarch64/inst.isle line 1045.
 #[derive(Copy, Clone, PartialEq, Eq, Debug)]
 pub enum VecALUOp {
     Sqadd,
@@ -924,7 +924,7 @@ pub enum VecALUOp {
     Sqrdmulh,
 }
 
-/// Internal type VecMisc2: defined at src/isa/aarch64/inst.isle line 1125.
+/// Internal type VecMisc2: defined at src/isa/aarch64/inst.isle line 1124.
 #[derive(Copy, Clone, PartialEq, Eq, Debug)]
 pub enum VecMisc2 {
     Not,
@@ -955,7 +955,7 @@ pub enum VecMisc2 {
     Fcmlt0,
 }
 
-/// Internal type VecRRLongOp: defined at src/isa/aarch64/inst.isle line 1182.
+/// Internal type VecRRLongOp: defined at src/isa/aarch64/inst.isle line 1181.
 #[derive(Copy, Clone, PartialEq, Eq, Debug)]
 pub enum VecRRLongOp {
     Fcvtl16,
@@ -965,7 +965,7 @@ pub enum VecRRLongOp {
     Shll32,
 }
 
-/// Internal type VecRRNarrowOp: defined at src/isa/aarch64/inst.isle line 1197.
+/// Internal type VecRRNarrowOp: defined at src/isa/aarch64/inst.isle line 1196.
 #[derive(Copy, Clone, PartialEq, Eq, Debug)]
 pub enum VecRRNarrowOp {
     Xtn16,
@@ -984,7 +984,7 @@ pub enum VecRRNarrowOp {
     Fcvtn64,
 }
 
-/// Internal type VecRRRLongOp: defined at src/isa/aarch64/inst.isle line 1229.
+/// Internal type VecRRRLongOp: defined at src/isa/aarch64/inst.isle line 1228.
 #[derive(Copy, Clone, PartialEq, Eq, Debug)]
 pub enum VecRRRLongOp {
     Smull8,
@@ -998,13 +998,13 @@ pub enum VecRRRLongOp {
     Umlal32,
 }
 
-/// Internal type VecPairOp: defined at src/isa/aarch64/inst.isle line 1246.
+/// Internal type VecPairOp: defined at src/isa/aarch64/inst.isle line 1245.
 #[derive(Copy, Clone, PartialEq, Eq, Debug)]
 pub enum VecPairOp {
     Addp,
 }
 
-/// Internal type VecRRPairLongOp: defined at src/isa/aarch64/inst.isle line 1254.
+/// Internal type VecRRPairLongOp: defined at src/isa/aarch64/inst.isle line 1253.
 #[derive(Copy, Clone, PartialEq, Eq, Debug)]
 pub enum VecRRPairLongOp {
     Saddlp8,
@@ -1013,14 +1013,14 @@ pub enum VecRRPairLongOp {
     Uaddlp16,
 }
 
-/// Internal type VecLanesOp: defined at src/isa/aarch64/inst.isle line 1265.
+/// Internal type VecLanesOp: defined at src/isa/aarch64/inst.isle line 1264.
 #[derive(Copy, Clone, PartialEq, Eq, Debug)]
 pub enum VecLanesOp {
     Addv,
     Uminv,
 }
 
-/// Internal type VecShiftImmOp: defined at src/isa/aarch64/inst.isle line 1274.
+/// Internal type VecShiftImmOp: defined at src/isa/aarch64/inst.isle line 1273.
 #[derive(Copy, Clone, PartialEq, Eq, Debug)]
 pub enum VecShiftImmOp {
     Shl,
@@ -1028,7 +1028,7 @@ pub enum VecShiftImmOp {
     Sshr,
 }
 
-/// Internal type AtomicRMWOp: defined at src/isa/aarch64/inst.isle line 1285.
+/// Internal type AtomicRMWOp: defined at src/isa/aarch64/inst.isle line 1284.
 #[derive(Copy, Clone, PartialEq, Eq, Debug)]
 pub enum AtomicRMWOp {
     Add,
@@ -1088,7 +1088,7 @@ pub fn constructor_side_effect<C: Context>(
         inst: ref pattern1_0,
     } = pattern0_0
     {
-        // Rule at src/prelude.isle line 410.
+        // Rule at src/prelude.isle line 407.
         let expr0_0 = C::emit(ctx, pattern1_0);
         let expr1_0 = C::output_none(ctx);
         return Some(expr1_0);
@@ -1096,24 +1096,6 @@ pub fn constructor_side_effect<C: Context>(
     return None;
 }
 
-// Generated as internal constructor for term safepoint.
-pub fn constructor_safepoint<C: Context>(
-    ctx: &mut C,
-    arg0: &SideEffectNoResult,
-) -> Option<InstOutput> {
-    let pattern0_0 = arg0;
-    if let &SideEffectNoResult::Inst {
-        inst: ref pattern1_0,
-    } = pattern0_0
-    {
-        // Rule at src/prelude.isle line 416.
-        let expr0_0 = C::emit_safepoint(ctx, pattern1_0);
-        let expr1_0 = C::output_none(ctx);
-        return Some(expr1_0);
-    }
-    return None;
-}
-
 // Generated as internal constructor for term produces_flags_get_reg.
 pub fn constructor_produces_flags_get_reg<C: Context>(
     ctx: &mut C,
@@ -1125,7 +1107,7 @@ pub fn constructor_produces_flags_get_reg<C: Context>(
         result: pattern1_1,
     } = pattern0_0
     {
-        // Rule at src/prelude.isle line 454.
+        // Rule at src/prelude.isle line 445.
         return Some(pattern1_1);
     }
     return None;
@@ -1142,7 +1124,7 @@ pub fn constructor_produces_flags_ignore<C: Context>(
             inst: ref pattern1_0,
             result: pattern1_1,
         } => {
-            // Rule at src/prelude.isle line 459.
+            // Rule at src/prelude.isle line 450.
             let expr0_0 = ProducesFlags::ProducesFlagsSideEffect {
                 inst: pattern1_0.clone(),
             };
@@ -1152,7 +1134,7 @@ pub fn constructor_produces_flags_ignore<C: Context>(
             inst: ref pattern1_0,
             result: pattern1_1,
         } => {
-            // Rule at src/prelude.isle line 461.
+            // Rule at src/prelude.isle line 452.
             let expr0_0 = ProducesFlags::ProducesFlagsSideEffect {
                 inst: pattern1_0.clone(),
             };
@@ -1181,7 +1163,7 @@ pub fn constructor_consumes_flags_concat<C: Context>(
             result: pattern3_1,
         } = pattern2_0
         {
-            // Rule at src/prelude.isle line 468.
+            // Rule at src/prelude.isle line 459.
             let expr0_0 = C::value_regs(ctx, pattern1_1, pattern3_1);
             let expr1_0 = ConsumesFlags::ConsumesFlagsTwiceReturnsValueRegs {
                 inst1: pattern1_0.clone(),
@@ -1211,7 +1193,7 @@ pub fn constructor_with_flags<C: Context>(
                     inst: ref pattern3_0,
                     result: pattern3_1,
                 } => {
-                    // Rule at src/prelude.isle line 493.
+                    // Rule at src/prelude.isle line 484.
                     let expr0_0 = C::emit(ctx, pattern1_0);
                     let expr1_0 = C::emit(ctx, pattern3_0);
                     let expr2_0 = C::value_reg(ctx, pattern3_1);
@@ -1222,7 +1204,7 @@ pub fn constructor_with_flags<C: Context>(
                     inst2: ref pattern3_1,
                     result: pattern3_2,
                 } => {
-                    // Rule at src/prelude.isle line 499.
+                    // Rule at src/prelude.isle line 490.
                     let expr0_0 = C::emit(ctx, pattern1_0);
                     let expr1_0 = C::emit(ctx, pattern3_0);
                     let expr2_0 = C::emit(ctx, pattern3_1);
@@ -1235,7 +1217,7 @@ pub fn constructor_with_flags<C: Context>(
                     inst4: ref pattern3_3,
                     result: pattern3_4,
                 } => {
-                    // Rule at src/prelude.isle line 511.
+                    // Rule at src/prelude.isle line 502.
                     let expr0_0 = C::emit(ctx, pattern1_0);
                     let expr1_0 = C::emit(ctx, pattern3_0);
                     let expr2_0 = C::emit(ctx, pattern3_1);
@@ -1256,7 +1238,7 @@ pub fn constructor_with_flags<C: Context>(
                 result: pattern3_1,
             } = pattern2_0
             {
-                // Rule at src/prelude.isle line 487.
+                // Rule at src/prelude.isle line 478.
                 let expr0_0 = C::emit(ctx, pattern1_0);
                 let expr1_0 = C::emit(ctx, pattern3_0);
                 let expr2_0 = C::value_regs(ctx, pattern1_1, pattern3_1);
@@ -1276,7 +1258,7 @@ pub fn constructor_with_flags_reg<C: Context>(
 ) -> Option<Reg> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/prelude.isle line 528.
+    // Rule at src/prelude.isle line 519.
     let expr0_0 = constructor_with_flags(ctx, pattern0_0, pattern1_0)?;
     let expr1_0: usize = 0;
     let expr2_0 = C::value_regs_get(ctx, expr0_0, expr1_0);
@@ -1287,12 +1269,12 @@ pub fn constructor_with_flags_reg<C: Context>(
 pub fn constructor_operand_size<C: Context>(ctx: &mut C, arg0: Type) -> Option<OperandSize> {
     let pattern0_0 = arg0;
     if let Some(pattern1_0) = C::fits_in_32(ctx, pattern0_0) {
-        // Rule at src/isa/aarch64/inst.isle line 895.
+        // Rule at src/isa/aarch64/inst.isle line 894.
         let expr0_0 = OperandSize::Size32;
         return Some(expr0_0);
     }
     if let Some(pattern1_0) = C::fits_in_64(ctx, pattern0_0) {
-        // Rule at src/isa/aarch64/inst.isle line 896.
+        // Rule at src/isa/aarch64/inst.isle line 895.
         let expr0_0 = OperandSize::Size64;
         return Some(expr0_0);
     }
@@ -1305,28 +1287,28 @@ pub fn constructor_vector_size<C: Context>(ctx: &mut C, arg0: Type) -> Option<Ve
     if let Some((pattern1_0, pattern1_1)) = C::multi_lane(ctx, pattern0_0) {
         if pattern1_0 == 8 {
             if pattern1_1 == 16 {
-                // Rule at src/isa/aarch64/inst.isle line 938.
+                // Rule at src/isa/aarch64/inst.isle line 937.
                 let expr0_0 = VectorSize::Size8x16;
                 return Some(expr0_0);
             }
         }
         if pattern1_0 == 16 {
             if pattern1_1 == 8 {
-                // Rule at src/isa/aarch64/inst.isle line 939.
+                // Rule at src/isa/aarch64/inst.isle line 938.
                 let expr0_0 = VectorSize::Size16x8;
                 return Some(expr0_0);
             }
         }
         if pattern1_0 == 32 {
             if pattern1_1 == 4 {
-                // Rule at src/isa/aarch64/inst.isle line 940.
+                // Rule at src/isa/aarch64/inst.isle line 939.
                 let expr0_0 = VectorSize::Size32x4;
                 return Some(expr0_0);
             }
         }
         if pattern1_0 == 64 {
             if pattern1_1 == 2 {
-                // Rule at src/isa/aarch64/inst.isle line 941.
+                // Rule at src/isa/aarch64/inst.isle line 940.
                 let expr0_0 = VectorSize::Size64x2;
                 return Some(expr0_0);
             }
@@ -1343,7 +1325,7 @@ pub fn constructor_movz<C: Context>(
 ) -> Option<Reg> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/aarch64/inst.isle line 1381.
+    // Rule at src/isa/aarch64/inst.isle line 1380.
     let expr0_0: Type = I64;
     let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
     let expr2_0 = MInst::MovZ {
@@ -1364,7 +1346,7 @@ pub fn constructor_movn<C: Context>(
 ) -> Option<Reg> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/aarch64/inst.isle line 1388.
+    // Rule at src/isa/aarch64/inst.isle line 1387.
     let expr0_0: Type = I64;
     let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
     let expr2_0 = MInst::MovN {
@@ -1389,7 +1371,7 @@ pub fn constructor_alu_rr_imm_logic<C: Context>(
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
     let pattern3_0 = arg3;
-    // Rule at src/isa/aarch64/inst.isle line 1395.
+    // Rule at src/isa/aarch64/inst.isle line 1394.
     let expr0_0: Type = I64;
     let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
     let expr2_0 = constructor_operand_size(ctx, pattern1_0)?;
@@ -1417,7 +1399,7 @@ pub fn constructor_alu_rr_imm_shift<C: Context>(
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
     let pattern3_0 = arg3;
-    // Rule at src/isa/aarch64/inst.isle line 1402.
+    // Rule at src/isa/aarch64/inst.isle line 1401.
     let expr0_0: Type = I64;
     let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
     let expr2_0 = constructor_operand_size(ctx, pattern1_0)?;
@@ -1445,7 +1427,7 @@ pub fn constructor_alu_rrr<C: Context>(
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
     let pattern3_0 = arg3;
-    // Rule at src/isa/aarch64/inst.isle line 1409.
+    // Rule at src/isa/aarch64/inst.isle line 1408.
     let expr0_0: Type = I64;
     let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
     let expr2_0 = constructor_operand_size(ctx, pattern1_0)?;
@@ -1473,7 +1455,7 @@ pub fn constructor_vec_rrr<C: Context>(
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
     let pattern3_0 = arg3;
-    // Rule at src/isa/aarch64/inst.isle line 1416.
+    // Rule at src/isa/aarch64/inst.isle line 1415.
     let expr0_0: Type = I8X16;
     let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
     let expr2_0 = MInst::VecRRR {
@@ -1498,7 +1480,7 @@ pub fn constructor_vec_lanes<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1423.
+    // Rule at src/isa/aarch64/inst.isle line 1422.
     let expr0_0: Type = I8X16;
     let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
     let expr2_0 = MInst::VecLanes {
@@ -1516,7 +1498,7 @@ pub fn constructor_vec_lanes<C: Context>(
 pub fn constructor_vec_dup<C: Context>(ctx: &mut C, arg0: Reg, arg1: &VectorSize) -> Option<Reg> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/aarch64/inst.isle line 1430.
+    // Rule at src/isa/aarch64/inst.isle line 1429.
     let expr0_0: Type = I8X16;
     let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
     let expr2_0 = MInst::VecDup {
@@ -1541,7 +1523,7 @@ pub fn constructor_alu_rr_imm12<C: Context>(
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
     let pattern3_0 = arg3;
-    // Rule at src/isa/aarch64/inst.isle line 1437.
+    // Rule at src/isa/aarch64/inst.isle line 1436.
     let expr0_0: Type = I64;
     let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
     let expr2_0 = constructor_operand_size(ctx, pattern1_0)?;
@@ -1571,7 +1553,7 @@ pub fn constructor_alu_rrr_shift<C: Context>(
     let pattern2_0 = arg2;
     let pattern3_0 = arg3;
     let pattern4_0 = arg4;
-    // Rule at src/isa/aarch64/inst.isle line 1444.
+    // Rule at src/isa/aarch64/inst.isle line 1443.
     let expr0_0: Type = I64;
     let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
     let expr2_0 = constructor_operand_size(ctx, pattern1_0)?;
@@ -1602,7 +1584,7 @@ pub fn constructor_alu_rrr_extend<C: Context>(
     let pattern2_0 = arg2;
     let pattern3_0 = arg3;
     let pattern4_0 = arg4;
-    // Rule at src/isa/aarch64/inst.isle line 1451.
+    // Rule at src/isa/aarch64/inst.isle line 1450.
     let expr0_0: Type = I64;
     let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
     let expr2_0 = constructor_operand_size(ctx, pattern1_0)?;
@@ -1631,7 +1613,7 @@ pub fn constructor_alu_rr_extend_reg<C: Context>(
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
     let pattern3_0 = arg3;
-    // Rule at src/isa/aarch64/inst.isle line 1459.
+    // Rule at src/isa/aarch64/inst.isle line 1458.
     let expr0_0 = C::put_extended_in_reg(ctx, pattern3_0);
     let expr1_0 = C::get_extended_op(ctx, pattern3_0);
     let expr2_0 =
@@ -1651,7 +1633,7 @@ pub fn constructor_alu_rrrr<C: Context>(
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
     let pattern3_0 = arg3;
-    // Rule at src/isa/aarch64/inst.isle line 1466.
+    // Rule at src/isa/aarch64/inst.isle line 1465.
     let expr0_0: Type = I64;
     let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
     let expr2_0 = MInst::AluRRRR {
@@ -1676,7 +1658,7 @@ pub fn constructor_bit_rr<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1473.
+    // Rule at src/isa/aarch64/inst.isle line 1472.
     let expr0_0: Type = I64;
     let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
     let expr2_0 = constructor_operand_size(ctx, pattern1_0)?;
@@ -1701,7 +1683,7 @@ pub fn constructor_add_with_flags_paired<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1480.
+    // Rule at src/isa/aarch64/inst.isle line 1479.
     let expr0_0: Type = I64;
     let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
     let expr2_0 = ALUOp::AddS;
@@ -1731,7 +1713,7 @@ pub fn constructor_adc_paired<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1488.
+    // Rule at src/isa/aarch64/inst.isle line 1487.
     let expr0_0: Type = I64;
     let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
     let expr2_0 = ALUOp::Adc;
@@ -1761,7 +1743,7 @@ pub fn constructor_sub_with_flags_paired<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1496.
+    // Rule at src/isa/aarch64/inst.isle line 1495.
     let expr0_0: Type = I64;
     let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
     let expr2_0 = ALUOp::SubS;
@@ -1789,7 +1771,7 @@ pub fn constructor_cmp64_imm<C: Context>(
 ) -> Option<ProducesFlags> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/aarch64/inst.isle line 1503.
+    // Rule at src/isa/aarch64/inst.isle line 1502.
     let expr0_0 = ALUOp::SubS;
     let expr1_0 = OperandSize::Size64;
     let expr2_0 = C::writable_zero_reg(ctx);
@@ -1814,7 +1796,7 @@ pub fn constructor_sbc_paired<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1510.
+    // Rule at src/isa/aarch64/inst.isle line 1509.
     let expr0_0: Type = I64;
     let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
     let expr2_0 = ALUOp::Sbc;
@@ -1844,7 +1826,7 @@ pub fn constructor_vec_misc<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1518.
+    // Rule at src/isa/aarch64/inst.isle line 1517.
     let expr0_0: Type = I8X16;
     let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
     let expr2_0 = MInst::VecMisc {
@@ -1870,7 +1852,7 @@ pub fn constructor_vec_rrr_long<C: Context>(
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
     let pattern3_0 = arg3;
-    // Rule at src/isa/aarch64/inst.isle line 1525.
+    // Rule at src/isa/aarch64/inst.isle line 1524.
     let expr0_0: Type = I8X16;
     let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
     let expr2_0 = MInst::VecRRRLong {
@@ -1899,7 +1881,7 @@ pub fn constructor_vec_rrrr_long<C: Context>(
     let pattern2_0 = arg2;
     let pattern3_0 = arg3;
     let pattern4_0 = arg4;
-    // Rule at src/isa/aarch64/inst.isle line 1535.
+    // Rule at src/isa/aarch64/inst.isle line 1534.
     let expr0_0: Type = I8X16;
     let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
     let expr2_0 = MInst::FpuMove128 {
@@ -1929,7 +1911,7 @@ pub fn constructor_vec_rr_narrow<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1543.
+    // Rule at src/isa/aarch64/inst.isle line 1542.
     let expr0_0: Type = I8X16;
     let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
     let expr2_0 = MInst::VecRRNarrow {
@@ -1953,7 +1935,7 @@ pub fn constructor_vec_rr_long<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1550.
+    // Rule at src/isa/aarch64/inst.isle line 1549.
     let expr0_0: Type = I8X16;
     let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
     let expr2_0 = MInst::VecRRLong {
@@ -1975,7 +1957,7 @@ pub fn constructor_mov_to_fpu<C: Context>(
 ) -> Option<Reg> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/aarch64/inst.isle line 1557.
+    // Rule at src/isa/aarch64/inst.isle line 1556.
     let expr0_0: Type = I8X16;
     let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
     let expr2_0 = MInst::MovToFpu {
@@ -2000,7 +1982,7 @@ pub fn constructor_mov_to_vec<C: Context>(
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
     let pattern3_0 = arg3;
-    // Rule at src/isa/aarch64/inst.isle line 1564.
+    // Rule at src/isa/aarch64/inst.isle line 1563.
     let expr0_0: Type = I8X16;
     let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
     let expr2_0 = MInst::FpuMove128 {
@@ -2029,7 +2011,7 @@ pub fn constructor_mov_from_vec<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1572.
+    // Rule at src/isa/aarch64/inst.isle line 1571.
     let expr0_0: Type = I64;
     let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
     let expr2_0 = MInst::MovFromVec {
@@ -2055,7 +2037,7 @@ pub fn constructor_mov_from_vec_signed<C: Context>(
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
     let pattern3_0 = arg3;
-    // Rule at src/isa/aarch64/inst.isle line 1579.
+    // Rule at src/isa/aarch64/inst.isle line 1578.
     let expr0_0: Type = I64;
     let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
     let expr2_0 = MInst::MovFromVecSigned {
@@ -2082,7 +2064,7 @@ pub fn constructor_extend<C: Context>(
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
     let pattern3_0 = arg3;
-    // Rule at src/isa/aarch64/inst.isle line 1586.
+    // Rule at src/isa/aarch64/inst.isle line 1585.
     let expr0_0: Type = I64;
     let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
     let expr2_0 = MInst::Extend {
@@ -2101,7 +2083,7 @@ pub fn constructor_extend<C: Context>(
 pub fn constructor_load_acquire<C: Context>(ctx: &mut C, arg0: Type, arg1: Reg) -> Option<Reg> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/aarch64/inst.isle line 1593.
+    // Rule at src/isa/aarch64/inst.isle line 1592.
     let expr0_0: Type = I64;
     let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
     let expr2_0 = MInst::LoadAcquire {
@@ -2124,7 +2106,7 @@ pub fn constructor_tst_imm<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1603.
+    // Rule at src/isa/aarch64/inst.isle line 1602.
     let expr0_0 = ALUOp::AndS;
     let expr1_0 = constructor_operand_size(ctx, pattern0_0)?;
     let expr2_0 = C::writable_zero_reg(ctx);
@@ -2149,7 +2131,7 @@ pub fn constructor_csel<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1617.
+    // Rule at src/isa/aarch64/inst.isle line 1616.
     let expr0_0: Type = I64;
     let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
     let expr2_0 = MInst::CSel {
@@ -2171,7 +2153,7 @@ pub fn constructor_add<C: Context>(ctx: &mut C, arg0: Type, arg1: Reg, arg2: Reg
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1626.
+    // Rule at src/isa/aarch64/inst.isle line 1625.
     let expr0_0 = ALUOp::Add;
     let expr1_0 = constructor_alu_rrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -2187,7 +2169,7 @@ pub fn constructor_add_imm<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1629.
+    // Rule at src/isa/aarch64/inst.isle line 1628.
     let expr0_0 = ALUOp::Add;
     let expr1_0 = constructor_alu_rr_imm12(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -2203,7 +2185,7 @@ pub fn constructor_add_extend<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1632.
+    // Rule at src/isa/aarch64/inst.isle line 1631.
     let expr0_0 = ALUOp::Add;
     let expr1_0 = constructor_alu_rr_extend_reg(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -2221,7 +2203,7 @@ pub fn constructor_add_shift<C: Context>(
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
     let pattern3_0 = arg3;
-    // Rule at src/isa/aarch64/inst.isle line 1635.
+    // Rule at src/isa/aarch64/inst.isle line 1634.
     let expr0_0 = ALUOp::Add;
     let expr1_0 = constructor_alu_rrr_shift(
         ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0, pattern3_0,
@@ -2239,7 +2221,7 @@ pub fn constructor_add_vec<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1638.
+    // Rule at src/isa/aarch64/inst.isle line 1637.
     let expr0_0 = VecALUOp::Add;
     let expr1_0 = constructor_vec_rrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -2250,7 +2232,7 @@ pub fn constructor_sub<C: Context>(ctx: &mut C, arg0: Type, arg1: Reg, arg2: Reg
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1643.
+    // Rule at src/isa/aarch64/inst.isle line 1642.
     let expr0_0 = ALUOp::Sub;
     let expr1_0 = constructor_alu_rrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -2266,7 +2248,7 @@ pub fn constructor_sub_imm<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1646.
+    // Rule at src/isa/aarch64/inst.isle line 1645.
     let expr0_0 = ALUOp::Sub;
     let expr1_0 = constructor_alu_rr_imm12(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -2282,7 +2264,7 @@ pub fn constructor_sub_extend<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1649.
+    // Rule at src/isa/aarch64/inst.isle line 1648.
     let expr0_0 = ALUOp::Sub;
     let expr1_0 = constructor_alu_rr_extend_reg(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -2300,7 +2282,7 @@ pub fn constructor_sub_shift<C: Context>(
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
     let pattern3_0 = arg3;
-    // Rule at src/isa/aarch64/inst.isle line 1652.
+    // Rule at src/isa/aarch64/inst.isle line 1651.
     let expr0_0 = ALUOp::Sub;
     let expr1_0 = constructor_alu_rrr_shift(
         ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0, pattern3_0,
@@ -2318,7 +2300,7 @@ pub fn constructor_sub_vec<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1655.
+    // Rule at src/isa/aarch64/inst.isle line 1654.
     let expr0_0 = VecALUOp::Sub;
     let expr1_0 = constructor_vec_rrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -2337,7 +2319,7 @@ pub fn constructor_madd<C: Context>(
         let pattern2_0 = arg1;
         let pattern3_0 = arg2;
         let pattern4_0 = arg3;
-        // Rule at src/isa/aarch64/inst.isle line 1661.
+        // Rule at src/isa/aarch64/inst.isle line 1660.
         let expr0_0 = constructor_madd64(ctx, pattern2_0, pattern3_0, pattern4_0)?;
         return Some(expr0_0);
     }
@@ -2345,7 +2327,7 @@ pub fn constructor_madd<C: Context>(
         let pattern2_0 = arg1;
         let pattern3_0 = arg2;
         let pattern4_0 = arg3;
-        // Rule at src/isa/aarch64/inst.isle line 1660.
+        // Rule at src/isa/aarch64/inst.isle line 1659.
         let expr0_0 = constructor_madd32(ctx, pattern2_0, pattern3_0, pattern4_0)?;
         return Some(expr0_0);
     }
@@ -2357,7 +2339,7 @@ pub fn constructor_madd32<C: Context>(ctx: &mut C, arg0: Reg, arg1: Reg, arg2: R
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1664.
+    // Rule at src/isa/aarch64/inst.isle line 1663.
     let expr0_0 = ALUOp3::MAdd32;
     let expr1_0 = constructor_alu_rrrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -2368,7 +2350,7 @@ pub fn constructor_madd64<C: Context>(ctx: &mut C, arg0: Reg, arg1: Reg, arg2: R
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1667.
+    // Rule at src/isa/aarch64/inst.isle line 1666.
     let expr0_0 = ALUOp3::MAdd64;
     let expr1_0 = constructor_alu_rrrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -2379,7 +2361,7 @@ pub fn constructor_msub64<C: Context>(ctx: &mut C, arg0: Reg, arg1: Reg, arg2: R
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1672.
+    // Rule at src/isa/aarch64/inst.isle line 1671.
     let expr0_0 = ALUOp3::MSub64;
     let expr1_0 = constructor_alu_rrrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -2395,7 +2377,7 @@ pub fn constructor_uqadd<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1676.
+    // Rule at src/isa/aarch64/inst.isle line 1675.
     let expr0_0 = VecALUOp::Uqadd;
     let expr1_0 = constructor_vec_rrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -2411,7 +2393,7 @@ pub fn constructor_sqadd<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1680.
+    // Rule at src/isa/aarch64/inst.isle line 1679.
     let expr0_0 = VecALUOp::Sqadd;
     let expr1_0 = constructor_vec_rrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -2427,7 +2409,7 @@ pub fn constructor_uqsub<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1684.
+    // Rule at src/isa/aarch64/inst.isle line 1683.
     let expr0_0 = VecALUOp::Uqsub;
     let expr1_0 = constructor_vec_rrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -2443,7 +2425,7 @@ pub fn constructor_sqsub<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1688.
+    // Rule at src/isa/aarch64/inst.isle line 1687.
     let expr0_0 = VecALUOp::Sqsub;
     let expr1_0 = constructor_vec_rrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -2454,7 +2436,7 @@ pub fn constructor_umulh<C: Context>(ctx: &mut C, arg0: Type, arg1: Reg, arg2: R
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1692.
+    // Rule at src/isa/aarch64/inst.isle line 1691.
     let expr0_0 = ALUOp::UMulH;
     let expr1_0 = constructor_alu_rrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -2465,7 +2447,7 @@ pub fn constructor_smulh<C: Context>(ctx: &mut C, arg0: Type, arg1: Reg, arg2: R
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1696.
+    // Rule at src/isa/aarch64/inst.isle line 1695.
     let expr0_0 = ALUOp::SMulH;
     let expr1_0 = constructor_alu_rrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -2481,7 +2463,7 @@ pub fn constructor_mul<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1700.
+    // Rule at src/isa/aarch64/inst.isle line 1699.
     let expr0_0 = VecALUOp::Mul;
     let expr1_0 = constructor_vec_rrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -2491,7 +2473,7 @@ pub fn constructor_mul<C: Context>(
 pub fn constructor_neg<C: Context>(ctx: &mut C, arg0: Reg, arg1: &VectorSize) -> Option<Reg> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/aarch64/inst.isle line 1704.
+    // Rule at src/isa/aarch64/inst.isle line 1703.
     let expr0_0 = VecMisc2::Neg;
     let expr1_0 = constructor_vec_misc(ctx, &expr0_0, pattern0_0, pattern1_0)?;
     return Some(expr1_0);
@@ -2501,7 +2483,7 @@ pub fn constructor_neg<C: Context>(ctx: &mut C, arg0: Reg, arg1: &VectorSize) ->
 pub fn constructor_rev64<C: Context>(ctx: &mut C, arg0: Reg, arg1: &VectorSize) -> Option<Reg> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/aarch64/inst.isle line 1708.
+    // Rule at src/isa/aarch64/inst.isle line 1707.
     let expr0_0 = VecMisc2::Rev64;
     let expr1_0 = constructor_vec_misc(ctx, &expr0_0, pattern0_0, pattern1_0)?;
     return Some(expr1_0);
@@ -2511,7 +2493,7 @@ pub fn constructor_rev64<C: Context>(ctx: &mut C, arg0: Reg, arg1: &VectorSize)
 pub fn constructor_xtn64<C: Context>(ctx: &mut C, arg0: Reg, arg1: bool) -> Option<Reg> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/aarch64/inst.isle line 1712.
+    // Rule at src/isa/aarch64/inst.isle line 1711.
     let expr0_0 = VecRRNarrowOp::Xtn64;
     let expr1_0 = constructor_vec_rr_narrow(ctx, &expr0_0, pattern0_0, pattern1_0)?;
     return Some(expr1_0);
@@ -2527,7 +2509,7 @@ pub fn constructor_addp<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1716.
+    // Rule at src/isa/aarch64/inst.isle line 1715.
     let expr0_0 = VecALUOp::Addp;
     let expr1_0 = constructor_vec_rrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -2537,7 +2519,7 @@ pub fn constructor_addp<C: Context>(
 pub fn constructor_addv<C: Context>(ctx: &mut C, arg0: Reg, arg1: &VectorSize) -> Option<Reg> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/aarch64/inst.isle line 1720.
+    // Rule at src/isa/aarch64/inst.isle line 1719.
     let expr0_0 = VecLanesOp::Addv;
     let expr1_0 = constructor_vec_lanes(ctx, &expr0_0, pattern0_0, pattern1_0)?;
     return Some(expr1_0);
@@ -2547,7 +2529,7 @@ pub fn constructor_addv<C: Context>(ctx: &mut C, arg0: Reg, arg1: &VectorSize) -
 pub fn constructor_shll32<C: Context>(ctx: &mut C, arg0: Reg, arg1: bool) -> Option<Reg> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/aarch64/inst.isle line 1724.
+    // Rule at src/isa/aarch64/inst.isle line 1723.
     let expr0_0 = VecRRLongOp::Shll32;
     let expr1_0 = constructor_vec_rr_long(ctx, &expr0_0, pattern0_0, pattern1_0)?;
     return Some(expr1_0);
@@ -2565,7 +2547,7 @@ pub fn constructor_umlal32<C: Context>(
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
     let pattern3_0 = arg3;
-    // Rule at src/isa/aarch64/inst.isle line 1728.
+    // Rule at src/isa/aarch64/inst.isle line 1727.
     let expr0_0 = VecRRRLongOp::Umlal32;
     let expr1_0 = constructor_vec_rrrr_long(
         ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0, pattern3_0,
@@ -2583,7 +2565,7 @@ pub fn constructor_smull8<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1732.
+    // Rule at src/isa/aarch64/inst.isle line 1731.
     let expr0_0 = VecRRRLongOp::Smull8;
     let expr1_0 = constructor_vec_rrr_long(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -2599,7 +2581,7 @@ pub fn constructor_umull8<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1736.
+    // Rule at src/isa/aarch64/inst.isle line 1735.
     let expr0_0 = VecRRRLongOp::Umull8;
     let expr1_0 = constructor_vec_rrr_long(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -2615,7 +2597,7 @@ pub fn constructor_smull16<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1740.
+    // Rule at src/isa/aarch64/inst.isle line 1739.
     let expr0_0 = VecRRRLongOp::Smull16;
     let expr1_0 = constructor_vec_rrr_long(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -2631,7 +2613,7 @@ pub fn constructor_umull16<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1744.
+    // Rule at src/isa/aarch64/inst.isle line 1743.
     let expr0_0 = VecRRRLongOp::Umull16;
     let expr1_0 = constructor_vec_rrr_long(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -2647,7 +2629,7 @@ pub fn constructor_smull32<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1748.
+    // Rule at src/isa/aarch64/inst.isle line 1747.
     let expr0_0 = VecRRRLongOp::Smull32;
     let expr1_0 = constructor_vec_rrr_long(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -2663,7 +2645,7 @@ pub fn constructor_umull32<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1752.
+    // Rule at src/isa/aarch64/inst.isle line 1751.
     let expr0_0 = VecRRRLongOp::Umull32;
     let expr1_0 = constructor_vec_rrr_long(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -2674,7 +2656,7 @@ pub fn constructor_asr<C: Context>(ctx: &mut C, arg0: Type, arg1: Reg, arg2: Reg
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1756.
+    // Rule at src/isa/aarch64/inst.isle line 1755.
     let expr0_0 = ALUOp::Asr;
     let expr1_0 = constructor_alu_rrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -2690,7 +2672,7 @@ pub fn constructor_asr_imm<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1759.
+    // Rule at src/isa/aarch64/inst.isle line 1758.
     let expr0_0 = ALUOp::Asr;
     let expr1_0 = constructor_alu_rr_imm_shift(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -2701,7 +2683,7 @@ pub fn constructor_lsr<C: Context>(ctx: &mut C, arg0: Type, arg1: Reg, arg2: Reg
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1763.
+    // Rule at src/isa/aarch64/inst.isle line 1762.
     let expr0_0 = ALUOp::Lsr;
     let expr1_0 = constructor_alu_rrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -2717,7 +2699,7 @@ pub fn constructor_lsr_imm<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1766.
+    // Rule at src/isa/aarch64/inst.isle line 1765.
     let expr0_0 = ALUOp::Lsr;
     let expr1_0 = constructor_alu_rr_imm_shift(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -2728,7 +2710,7 @@ pub fn constructor_lsl<C: Context>(ctx: &mut C, arg0: Type, arg1: Reg, arg2: Reg
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1770.
+    // Rule at src/isa/aarch64/inst.isle line 1769.
     let expr0_0 = ALUOp::Lsl;
     let expr1_0 = constructor_alu_rrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -2744,7 +2726,7 @@ pub fn constructor_lsl_imm<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1773.
+    // Rule at src/isa/aarch64/inst.isle line 1772.
     let expr0_0 = ALUOp::Lsl;
     let expr1_0 = constructor_alu_rr_imm_shift(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -2760,7 +2742,7 @@ pub fn constructor_a64_udiv<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1777.
+    // Rule at src/isa/aarch64/inst.isle line 1776.
     let expr0_0 = ALUOp::UDiv;
     let expr1_0 = constructor_alu_rrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -2776,7 +2758,7 @@ pub fn constructor_a64_sdiv<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1781.
+    // Rule at src/isa/aarch64/inst.isle line 1780.
     let expr0_0 = ALUOp::SDiv;
     let expr1_0 = constructor_alu_rrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -2786,7 +2768,7 @@ pub fn constructor_a64_sdiv<C: Context>(
 pub fn constructor_not<C: Context>(ctx: &mut C, arg0: Reg, arg1: &VectorSize) -> Option<Reg> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/aarch64/inst.isle line 1785.
+    // Rule at src/isa/aarch64/inst.isle line 1784.
     let expr0_0 = VecMisc2::Not;
     let expr1_0 = constructor_vec_misc(ctx, &expr0_0, pattern0_0, pattern1_0)?;
     return Some(expr1_0);
@@ -2802,7 +2784,7 @@ pub fn constructor_orr_not<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1790.
+    // Rule at src/isa/aarch64/inst.isle line 1789.
     let expr0_0 = ALUOp::OrrNot;
     let expr1_0 = constructor_alu_rrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -2820,7 +2802,7 @@ pub fn constructor_orr_not_shift<C: Context>(
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
     let pattern3_0 = arg3;
-    // Rule at src/isa/aarch64/inst.isle line 1793.
+    // Rule at src/isa/aarch64/inst.isle line 1792.
     let expr0_0 = ALUOp::OrrNot;
     let expr1_0 = constructor_alu_rrr_shift(
         ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0, pattern3_0,
@@ -2833,7 +2815,7 @@ pub fn constructor_orr<C: Context>(ctx: &mut C, arg0: Type, arg1: Reg, arg2: Reg
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1798.
+    // Rule at src/isa/aarch64/inst.isle line 1797.
     let expr0_0 = ALUOp::Orr;
     let expr1_0 = constructor_alu_rrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -2849,7 +2831,7 @@ pub fn constructor_orr_imm<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1801.
+    // Rule at src/isa/aarch64/inst.isle line 1800.
     let expr0_0 = ALUOp::Orr;
     let expr1_0 = constructor_alu_rr_imm_logic(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -2865,7 +2847,7 @@ pub fn constructor_orr_vec<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1804.
+    // Rule at src/isa/aarch64/inst.isle line 1803.
     let expr0_0 = VecALUOp::Orr;
     let expr1_0 = constructor_vec_rrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -2881,7 +2863,7 @@ pub fn constructor_and_imm<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1809.
+    // Rule at src/isa/aarch64/inst.isle line 1808.
     let expr0_0 = ALUOp::And;
     let expr1_0 = constructor_alu_rr_imm_logic(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -2897,7 +2879,7 @@ pub fn constructor_and_vec<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1812.
+    // Rule at src/isa/aarch64/inst.isle line 1811.
     let expr0_0 = VecALUOp::And;
     let expr1_0 = constructor_vec_rrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -2913,7 +2895,7 @@ pub fn constructor_eor_vec<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1816.
+    // Rule at src/isa/aarch64/inst.isle line 1815.
     let expr0_0 = VecALUOp::Eor;
     let expr1_0 = constructor_vec_rrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -2929,7 +2911,7 @@ pub fn constructor_bic_vec<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1820.
+    // Rule at src/isa/aarch64/inst.isle line 1819.
     let expr0_0 = VecALUOp::Bic;
     let expr1_0 = constructor_vec_rrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -2945,7 +2927,7 @@ pub fn constructor_sshl<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1824.
+    // Rule at src/isa/aarch64/inst.isle line 1823.
     let expr0_0 = VecALUOp::Sshl;
     let expr1_0 = constructor_vec_rrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -2961,7 +2943,7 @@ pub fn constructor_ushl<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1828.
+    // Rule at src/isa/aarch64/inst.isle line 1827.
     let expr0_0 = VecALUOp::Ushl;
     let expr1_0 = constructor_vec_rrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -2977,7 +2959,7 @@ pub fn constructor_a64_rotr<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1833.
+    // Rule at src/isa/aarch64/inst.isle line 1832.
     let expr0_0 = ALUOp::RotR;
     let expr1_0 = constructor_alu_rrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -2993,7 +2975,7 @@ pub fn constructor_a64_rotr_imm<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1836.
+    // Rule at src/isa/aarch64/inst.isle line 1835.
     let expr0_0 = ALUOp::RotR;
     let expr1_0 = constructor_alu_rr_imm_shift(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -3003,7 +2985,7 @@ pub fn constructor_a64_rotr_imm<C: Context>(
 pub fn constructor_rbit<C: Context>(ctx: &mut C, arg0: Type, arg1: Reg) -> Option<Reg> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/aarch64/inst.isle line 1841.
+    // Rule at src/isa/aarch64/inst.isle line 1840.
     let expr0_0 = BitOp::RBit;
     let expr1_0 = constructor_bit_rr(ctx, &expr0_0, pattern0_0, pattern1_0)?;
     return Some(expr1_0);
@@ -3013,7 +2995,7 @@ pub fn constructor_rbit<C: Context>(ctx: &mut C, arg0: Type, arg1: Reg) -> Optio
 pub fn constructor_a64_clz<C: Context>(ctx: &mut C, arg0: Type, arg1: Reg) -> Option<Reg> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/aarch64/inst.isle line 1846.
+    // Rule at src/isa/aarch64/inst.isle line 1845.
     let expr0_0 = BitOp::Clz;
     let expr1_0 = constructor_bit_rr(ctx, &expr0_0, pattern0_0, pattern1_0)?;
     return Some(expr1_0);
@@ -3023,7 +3005,7 @@ pub fn constructor_a64_clz<C: Context>(ctx: &mut C, arg0: Type, arg1: Reg) -> Op
 pub fn constructor_a64_cls<C: Context>(ctx: &mut C, arg0: Type, arg1: Reg) -> Option<Reg> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/aarch64/inst.isle line 1851.
+    // Rule at src/isa/aarch64/inst.isle line 1850.
     let expr0_0 = BitOp::Cls;
     let expr1_0 = constructor_bit_rr(ctx, &expr0_0, pattern0_0, pattern1_0)?;
     return Some(expr1_0);
@@ -3034,7 +3016,7 @@ pub fn constructor_eon<C: Context>(ctx: &mut C, arg0: Type, arg1: Reg, arg2: Reg
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1856.
+    // Rule at src/isa/aarch64/inst.isle line 1855.
     let expr0_0 = ALUOp::EorNot;
     let expr1_0 = constructor_alu_rrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -3044,7 +3026,7 @@ pub fn constructor_eon<C: Context>(ctx: &mut C, arg0: Type, arg1: Reg, arg2: Reg
 pub fn constructor_vec_cnt<C: Context>(ctx: &mut C, arg0: Reg, arg1: &VectorSize) -> Option<Reg> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/aarch64/inst.isle line 1861.
+    // Rule at src/isa/aarch64/inst.isle line 1860.
     let expr0_0 = VecMisc2::Cnt;
     let expr1_0 = constructor_vec_misc(ctx, &expr0_0, pattern0_0, pattern1_0)?;
     return Some(expr1_0);
@@ -3061,7 +3043,7 @@ pub fn constructor_imm<C: Context>(ctx: &mut C, arg0: Type, arg1: u64) -> Option
         };
         if let Some(pattern3_0) = closure3() {
             if let Some(pattern4_0) = C::imm_logic_from_u64(ctx, pattern2_0, pattern3_0) {
-                // Rule at src/isa/aarch64/inst.isle line 1876.
+                // Rule at src/isa/aarch64/inst.isle line 1875.
                 let expr0_0: Type = I64;
                 let expr1_0 = C::zero_reg(ctx);
                 let expr2_0 = constructor_orr_imm(ctx, expr0_0, expr1_0, pattern4_0)?;
@@ -3069,18 +3051,18 @@ pub fn constructor_imm<C: Context>(ctx: &mut C, arg0: Type, arg1: u64) -> Option
             }
         }
         if let Some(pattern3_0) = C::move_wide_const_from_u64(ctx, pattern2_0) {
-            // Rule at src/isa/aarch64/inst.isle line 1868.
+            // Rule at src/isa/aarch64/inst.isle line 1867.
             let expr0_0 = OperandSize::Size64;
             let expr1_0 = constructor_movz(ctx, pattern3_0, &expr0_0)?;
             return Some(expr1_0);
         }
         if let Some(pattern3_0) = C::move_wide_const_from_negated_u64(ctx, pattern2_0) {
-            // Rule at src/isa/aarch64/inst.isle line 1872.
+            // Rule at src/isa/aarch64/inst.isle line 1871.
             let expr0_0 = OperandSize::Size64;
             let expr1_0 = constructor_movn(ctx, pattern3_0, &expr0_0)?;
             return Some(expr1_0);
         }
-        // Rule at src/isa/aarch64/inst.isle line 1883.
+        // Rule at src/isa/aarch64/inst.isle line 1882.
         let expr0_0 = C::load_constant64_full(ctx, pattern2_0);
         return Some(expr0_0);
     }
@@ -3092,17 +3074,17 @@ pub fn constructor_put_in_reg_sext32<C: Context>(ctx: &mut C, arg0: Value) -> Op
     let pattern0_0 = arg0;
     let pattern1_0 = C::value_type(ctx, pattern0_0);
     if pattern1_0 == I32 {
-        // Rule at src/isa/aarch64/inst.isle line 1894.
+        // Rule at src/isa/aarch64/inst.isle line 1893.
         let expr0_0 = C::put_in_reg(ctx, pattern0_0);
         return Some(expr0_0);
     }
     if pattern1_0 == I64 {
-        // Rule at src/isa/aarch64/inst.isle line 1895.
+        // Rule at src/isa/aarch64/inst.isle line 1894.
         let expr0_0 = C::put_in_reg(ctx, pattern0_0);
         return Some(expr0_0);
     }
     if let Some(pattern2_0) = C::fits_in_32(ctx, pattern1_0) {
-        // Rule at src/isa/aarch64/inst.isle line 1890.
+        // Rule at src/isa/aarch64/inst.isle line 1889.
         let expr0_0 = C::put_in_reg(ctx, pattern0_0);
         let expr1_0: bool = true;
         let expr2_0 = C::ty_bits(ctx, pattern2_0);
@@ -3118,17 +3100,17 @@ pub fn constructor_put_in_reg_zext32<C: Context>(ctx: &mut C, arg0: Value) -> Op
     let pattern0_0 = arg0;
     let pattern1_0 = C::value_type(ctx, pattern0_0);
     if pattern1_0 == I32 {
-        // Rule at src/isa/aarch64/inst.isle line 1903.
+        // Rule at src/isa/aarch64/inst.isle line 1902.
         let expr0_0 = C::put_in_reg(ctx, pattern0_0);
         return Some(expr0_0);
     }
     if pattern1_0 == I64 {
-        // Rule at src/isa/aarch64/inst.isle line 1904.
+        // Rule at src/isa/aarch64/inst.isle line 1903.
         let expr0_0 = C::put_in_reg(ctx, pattern0_0);
         return Some(expr0_0);
     }
     if let Some(pattern2_0) = C::fits_in_32(ctx, pattern1_0) {
-        // Rule at src/isa/aarch64/inst.isle line 1899.
+        // Rule at src/isa/aarch64/inst.isle line 1898.
         let expr0_0 = C::put_in_reg(ctx, pattern0_0);
         let expr1_0: bool = false;
         let expr2_0 = C::ty_bits(ctx, pattern2_0);
@@ -3144,12 +3126,12 @@ pub fn constructor_put_in_reg_sext64<C: Context>(ctx: &mut C, arg0: Value) -> Op
     let pattern0_0 = arg0;
     let pattern1_0 = C::value_type(ctx, pattern0_0);
     if pattern1_0 == I64 {
-        // Rule at src/isa/aarch64/inst.isle line 1912.
+        // Rule at src/isa/aarch64/inst.isle line 1911.
         let expr0_0 = C::put_in_reg(ctx, pattern0_0);
         return Some(expr0_0);
     }
     if let Some(pattern2_0) = C::fits_in_32(ctx, pattern1_0) {
-        // Rule at src/isa/aarch64/inst.isle line 1908.
+        // Rule at src/isa/aarch64/inst.isle line 1907.
         let expr0_0 = C::put_in_reg(ctx, pattern0_0);
         let expr1_0: bool = true;
         let expr2_0 = C::ty_bits(ctx, pattern2_0);
@@ -3165,12 +3147,12 @@ pub fn constructor_put_in_reg_zext64<C: Context>(ctx: &mut C, arg0: Value) -> Op
     let pattern0_0 = arg0;
     let pattern1_0 = C::value_type(ctx, pattern0_0);
     if pattern1_0 == I64 {
-        // Rule at src/isa/aarch64/inst.isle line 1920.
+        // Rule at src/isa/aarch64/inst.isle line 1919.
         let expr0_0 = C::put_in_reg(ctx, pattern0_0);
         return Some(expr0_0);
     }
     if let Some(pattern2_0) = C::fits_in_32(ctx, pattern1_0) {
-        // Rule at src/isa/aarch64/inst.isle line 1916.
+        // Rule at src/isa/aarch64/inst.isle line 1915.
         let expr0_0 = C::put_in_reg(ctx, pattern0_0);
         let expr1_0: bool = false;
         let expr2_0 = C::ty_bits(ctx, pattern2_0);
@@ -3184,7 +3166,7 @@ pub fn constructor_put_in_reg_zext64<C: Context>(ctx: &mut C, arg0: Value) -> Op
 // Generated as internal constructor for term trap_if_zero_divisor.
 pub fn constructor_trap_if_zero_divisor<C: Context>(ctx: &mut C, arg0: Reg) -> Option<Reg> {
     let pattern0_0 = arg0;
-    // Rule at src/isa/aarch64/inst.isle line 1925.
+    // Rule at src/isa/aarch64/inst.isle line 1924.
     let expr0_0 = C::cond_br_zero(ctx, pattern0_0);
     let expr1_0 = C::trap_code_division_by_zero(ctx);
     let expr2_0 = MInst::TrapIf {
@@ -3199,12 +3181,12 @@ pub fn constructor_trap_if_zero_divisor<C: Context>(ctx: &mut C, arg0: Reg) -> O
 pub fn constructor_size_from_ty<C: Context>(ctx: &mut C, arg0: Type) -> Option<OperandSize> {
     let pattern0_0 = arg0;
     if pattern0_0 == I64 {
-        // Rule at src/isa/aarch64/inst.isle line 1931.
+        // Rule at src/isa/aarch64/inst.isle line 1930.
         let expr0_0 = OperandSize::Size64;
         return Some(expr0_0);
     }
     if let Some(pattern1_0) = C::fits_in_32(ctx, pattern0_0) {
-        // Rule at src/isa/aarch64/inst.isle line 1930.
+        // Rule at src/isa/aarch64/inst.isle line 1929.
         let expr0_0 = OperandSize::Size32;
         return Some(expr0_0);
     }
@@ -3221,7 +3203,7 @@ pub fn constructor_trap_if_div_overflow<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1937.
+    // Rule at src/isa/aarch64/inst.isle line 1936.
     let expr0_0 = ALUOp::AddS;
     let expr1_0 = constructor_operand_size(ctx, pattern0_0)?;
     let expr2_0 = C::writable_zero_reg(ctx);
@@ -3290,7 +3272,7 @@ pub fn constructor_alu_rs_imm_logic_commutative<C: Context>(
                             C::imm_logic_from_imm64(ctx, pattern5_1, pattern7_0)
                         {
                             let pattern9_0 = arg3;
-                            // Rule at src/isa/aarch64/inst.isle line 1982.
+                            // Rule at src/isa/aarch64/inst.isle line 1981.
                             let expr0_0 = C::put_in_reg(ctx, pattern9_0);
                             let expr1_0 = constructor_alu_rr_imm_logic(
                                 ctx, pattern0_0, pattern1_0, expr0_0, pattern8_0,
@@ -3322,7 +3304,7 @@ pub fn constructor_alu_rs_imm_logic_commutative<C: Context>(
                                         C::lshl_from_imm64(ctx, pattern10_1, pattern12_0)
                                     {
                                         let pattern14_0 = arg3;
-                                        // Rule at src/isa/aarch64/inst.isle line 1988.
+                                        // Rule at src/isa/aarch64/inst.isle line 1987.
                                         let expr0_0 = C::put_in_reg(ctx, pattern14_0);
                                         let expr1_0 = C::put_in_reg(ctx, pattern7_0);
                                         let expr2_0 = constructor_alu_rrr_shift(
@@ -3360,7 +3342,7 @@ pub fn constructor_alu_rs_imm_logic_commutative<C: Context>(
                         if let Some(pattern9_0) =
                             C::imm_logic_from_imm64(ctx, pattern6_1, pattern8_0)
                         {
-                            // Rule at src/isa/aarch64/inst.isle line 1980.
+                            // Rule at src/isa/aarch64/inst.isle line 1979.
                             let expr0_0 = C::put_in_reg(ctx, pattern2_0);
                             let expr1_0 = constructor_alu_rr_imm_logic(
                                 ctx, pattern0_0, pattern1_0, expr0_0, pattern9_0,
@@ -3391,7 +3373,7 @@ pub fn constructor_alu_rs_imm_logic_commutative<C: Context>(
                                     if let Some(pattern14_0) =
                                         C::lshl_from_imm64(ctx, pattern11_1, pattern13_0)
                                     {
-                                        // Rule at src/isa/aarch64/inst.isle line 1986.
+                                        // Rule at src/isa/aarch64/inst.isle line 1985.
                                         let expr0_0 = C::put_in_reg(ctx, pattern2_0);
                                         let expr1_0 = C::put_in_reg(ctx, pattern8_0);
                                         let expr2_0 = constructor_alu_rrr_shift(
@@ -3413,7 +3395,7 @@ pub fn constructor_alu_rs_imm_logic_commutative<C: Context>(
             _ => {}
         }
     }
-    // Rule at src/isa/aarch64/inst.isle line 1976.
+    // Rule at src/isa/aarch64/inst.isle line 1975.
     let expr0_0 = C::put_in_reg(ctx, pattern2_0);
     let expr1_0 = C::put_in_reg(ctx, pattern3_0);
     let expr2_0 = constructor_alu_rrr(ctx, pattern0_0, pattern1_0, expr0_0, expr1_0)?;
@@ -3447,7 +3429,7 @@ pub fn constructor_alu_rs_imm_logic<C: Context>(
                         if let Some(pattern9_0) =
                             C::imm_logic_from_imm64(ctx, pattern6_1, pattern8_0)
                         {
-                            // Rule at src/isa/aarch64/inst.isle line 1996.
+                            // Rule at src/isa/aarch64/inst.isle line 1995.
                             let expr0_0 = C::put_in_reg(ctx, pattern2_0);
                             let expr1_0 = constructor_alu_rr_imm_logic(
                                 ctx, pattern0_0, pattern1_0, expr0_0, pattern9_0,
@@ -3478,7 +3460,7 @@ pub fn constructor_alu_rs_imm_logic<C: Context>(
                                     if let Some(pattern14_0) =
                                         C::lshl_from_imm64(ctx, pattern11_1, pattern13_0)
                                     {
-                                        // Rule at src/isa/aarch64/inst.isle line 1998.
+                                        // Rule at src/isa/aarch64/inst.isle line 1997.
                                         let expr0_0 = C::put_in_reg(ctx, pattern2_0);
                                         let expr1_0 = C::put_in_reg(ctx, pattern8_0);
                                         let expr2_0 = constructor_alu_rrr_shift(
@@ -3500,7 +3482,7 @@ pub fn constructor_alu_rs_imm_logic<C: Context>(
             _ => {}
         }
     }
-    // Rule at src/isa/aarch64/inst.isle line 1994.
+    // Rule at src/isa/aarch64/inst.isle line 1993.
     let expr0_0 = C::put_in_reg(ctx, pattern2_0);
     let expr1_0 = C::put_in_reg(ctx, pattern3_0);
     let expr2_0 = constructor_alu_rrr(ctx, pattern0_0, pattern1_0, expr0_0, expr1_0)?;
@@ -3519,7 +3501,7 @@ pub fn constructor_i128_alu_bitop<C: Context>(
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
     let pattern3_0 = arg3;
-    // Rule at src/isa/aarch64/inst.isle line 2006.
+    // Rule at src/isa/aarch64/inst.isle line 2005.
     let expr0_0 = C::put_in_regs(ctx, pattern2_0);
     let expr1_0: usize = 0;
     let expr2_0 = C::value_regs_get(ctx, expr0_0, expr1_0);
@@ -3546,7 +3528,7 @@ pub fn constructor_float_cmp_zero<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 2046.
+    // Rule at src/isa/aarch64/inst.isle line 2045.
     let expr0_0 = C::float_cc_cmp_zero_to_vec_misc_op(ctx, pattern0_0);
     let expr1_0 = constructor_vec_misc(ctx, &expr0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -3562,7 +3544,7 @@ pub fn constructor_float_cmp_zero_swap<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 2051.
+    // Rule at src/isa/aarch64/inst.isle line 2050.
     let expr0_0 = C::float_cc_cmp_zero_to_vec_misc_op_swap(ctx, pattern0_0);
     let expr1_0 = constructor_vec_misc(ctx, &expr0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -3572,7 +3554,7 @@ pub fn constructor_float_cmp_zero_swap<C: Context>(
 pub fn constructor_fcmeq0<C: Context>(ctx: &mut C, arg0: Reg, arg1: &VectorSize) -> Option<Reg> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/aarch64/inst.isle line 2056.
+    // Rule at src/isa/aarch64/inst.isle line 2055.
     let expr0_0 = VecMisc2::Fcmeq0;
     let expr1_0 = constructor_vec_misc(ctx, &expr0_0, pattern0_0, pattern1_0)?;
     return Some(expr1_0);
@@ -3588,7 +3570,7 @@ pub fn constructor_int_cmp_zero<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 2082.
+    // Rule at src/isa/aarch64/inst.isle line 2081.
     let expr0_0 = C::int_cc_cmp_zero_to_vec_misc_op(ctx, pattern0_0);
     let expr1_0 = constructor_vec_misc(ctx, &expr0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -3604,7 +3586,7 @@ pub fn constructor_int_cmp_zero_swap<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 2087.
+    // Rule at src/isa/aarch64/inst.isle line 2086.
     let expr0_0 = C::int_cc_cmp_zero_to_vec_misc_op_swap(ctx, pattern0_0);
     let expr1_0 = constructor_vec_misc(ctx, &expr0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -3614,7 +3596,7 @@ pub fn constructor_int_cmp_zero_swap<C: Context>(
 pub fn constructor_cmeq0<C: Context>(ctx: &mut C, arg0: Reg, arg1: &VectorSize) -> Option<Reg> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/aarch64/inst.isle line 2092.
+    // Rule at src/isa/aarch64/inst.isle line 2091.
     let expr0_0 = VecMisc2::Cmeq0;
     let expr1_0 = constructor_vec_misc(ctx, &expr0_0, pattern0_0, pattern1_0)?;
     return Some(expr1_0);
diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
index 10fe714730..577b004d6a 100644
--- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
@@ -1,27 +1,22 @@
 //! Lower a single Cranelift instruction into vcode.
 
+use super::lower::*;
 use crate::binemit::CodeOffset;
 use crate::ir::condcodes::FloatCC;
 use crate::ir::types::*;
 use crate::ir::Inst as IRInst;
 use crate::ir::{InstructionData, Opcode, TrapCode};
+use crate::isa::aarch64::abi::*;
+use crate::isa::aarch64::inst::*;
 use crate::isa::aarch64::settings as aarch64_settings;
 use crate::machinst::lower::*;
 use crate::machinst::*;
 use crate::settings::{Flags, TlsModel};
 use crate::{CodegenError, CodegenResult};
-
-use crate::isa::aarch64::abi::*;
-use crate::isa::aarch64::inst::*;
-
-use regalloc::Writable;
-
 use alloc::boxed::Box;
 use alloc::vec::Vec;
 use core::convert::TryFrom;
 
-use super::lower::*;
-
 /// Actually codegen an instruction's results into registers.
 pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
     ctx: &mut C,
@@ -766,7 +761,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
 
         Opcode::Trap | Opcode::ResumableTrap => {
             let trap_code = ctx.data(insn).trap_code().unwrap();
-            ctx.emit_safepoint(Inst::Udf { trap_code });
+            ctx.emit(Inst::Udf { trap_code });
         }
 
         Opcode::Trapif | Opcode::Trapff => {
@@ -797,7 +792,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                 cond
             };
 
-            ctx.emit_safepoint(Inst::TrapIf {
+            ctx.emit(Inst::TrapIf {
                 trap_code,
                 kind: CondBrKind::Cond(cond),
             });
@@ -1507,35 +1502,34 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
             let lane_type = ty.lane_type();
             let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
 
-            let mut match_long_pair =
-                |ext_low_op, ext_high_op| -> Option<(VecRRPairLongOp, regalloc::Reg)> {
-                    if let Some(lhs) = maybe_input_insn(ctx, inputs[0], ext_low_op) {
-                        if let Some(rhs) = maybe_input_insn(ctx, inputs[1], ext_high_op) {
-                            let lhs_inputs = insn_inputs(ctx, lhs);
-                            let rhs_inputs = insn_inputs(ctx, rhs);
-                            let low = put_input_in_reg(ctx, lhs_inputs[0], NarrowValueMode::None);
-                            let high = put_input_in_reg(ctx, rhs_inputs[0], NarrowValueMode::None);
-                            if low == high {
-                                match (lane_type, ext_low_op) {
-                                    (I16, Opcode::SwidenLow) => {
-                                        return Some((VecRRPairLongOp::Saddlp8, low))
-                                    }
-                                    (I32, Opcode::SwidenLow) => {
-                                        return Some((VecRRPairLongOp::Saddlp16, low))
-                                    }
-                                    (I16, Opcode::UwidenLow) => {
-                                        return Some((VecRRPairLongOp::Uaddlp8, low))
-                                    }
-                                    (I32, Opcode::UwidenLow) => {
-                                        return Some((VecRRPairLongOp::Uaddlp16, low))
-                                    }
-                                    _ => (),
-                                };
-                            }
+            let mut match_long_pair = |ext_low_op, ext_high_op| -> Option<(VecRRPairLongOp, Reg)> {
+                if let Some(lhs) = maybe_input_insn(ctx, inputs[0], ext_low_op) {
+                    if let Some(rhs) = maybe_input_insn(ctx, inputs[1], ext_high_op) {
+                        let lhs_inputs = insn_inputs(ctx, lhs);
+                        let rhs_inputs = insn_inputs(ctx, rhs);
+                        let low = put_input_in_reg(ctx, lhs_inputs[0], NarrowValueMode::None);
+                        let high = put_input_in_reg(ctx, rhs_inputs[0], NarrowValueMode::None);
+                        if low == high {
+                            match (lane_type, ext_low_op) {
+                                (I16, Opcode::SwidenLow) => {
+                                    return Some((VecRRPairLongOp::Saddlp8, low))
+                                }
+                                (I32, Opcode::SwidenLow) => {
+                                    return Some((VecRRPairLongOp::Saddlp16, low))
+                                }
+                                (I16, Opcode::UwidenLow) => {
+                                    return Some((VecRRPairLongOp::Uaddlp8, low))
+                                }
+                                (I32, Opcode::UwidenLow) => {
+                                    return Some((VecRRPairLongOp::Uaddlp16, low))
+                                }
+                                _ => (),
+                            };
                         }
                     }
-                    None
-                };
+                }
+                None
+            };
 
             if let Some((op, rn)) = match_long_pair(Opcode::SwidenLow, Opcode::SwidenHigh) {
                 ctx.emit(Inst::VecRRPairLong { op, rd, rn });
diff --git a/cranelift/codegen/src/isa/aarch64/mod.rs b/cranelift/codegen/src/isa/aarch64/mod.rs
index 2a1a4a42f3..ec1ab0b35e 100644
--- a/cranelift/codegen/src/isa/aarch64/mod.rs
+++ b/cranelift/codegen/src/isa/aarch64/mod.rs
@@ -11,7 +11,7 @@ use crate::result::CodegenResult;
 use crate::settings as shared_settings;
 use alloc::{boxed::Box, vec::Vec};
 use core::fmt;
-use regalloc::{PrettyPrint, RealRegUniverse};
+use regalloc2::MachineEnv;
 use target_lexicon::{Aarch64Architecture, Architecture, Triple};
 
 // New backend:
@@ -21,7 +21,7 @@ mod lower;
 mod lower_inst;
 mod settings;
 
-use inst::create_reg_universe;
+use inst::create_reg_env;
 
 use self::inst::EmitInfo;
 
@@ -30,7 +30,7 @@ pub struct AArch64Backend {
     triple: Triple,
     flags: shared_settings::Flags,
     isa_flags: aarch64_settings::Flags,
-    reg_universe: RealRegUniverse,
+    machine_env: MachineEnv,
 }
 
 impl AArch64Backend {
@@ -40,12 +40,12 @@ impl AArch64Backend {
         flags: shared_settings::Flags,
         isa_flags: aarch64_settings::Flags,
     ) -> AArch64Backend {
-        let reg_universe = create_reg_universe(&flags);
+        let machine_env = create_reg_env(&flags);
         AArch64Backend {
             triple,
             flags,
             isa_flags,
-            reg_universe,
+            machine_env,
         }
     }
 
@@ -55,10 +55,10 @@ impl AArch64Backend {
         &self,
         func: &Function,
         flags: shared_settings::Flags,
-    ) -> CodegenResult<VCode<inst::Inst>> {
+    ) -> CodegenResult<(VCode<inst::Inst>, regalloc2::Output)> {
         let emit_info = EmitInfo::new(flags.clone());
         let abi = Box::new(abi::AArch64ABICallee::new(func, flags, self.isa_flags())?);
-        compile::compile::<AArch64Backend>(func, self, abi, &self.reg_universe, emit_info)
+        compile::compile::<AArch64Backend>(func, self, abi, &self.machine_env, emit_info)
     }
 }
 
@@ -69,28 +69,27 @@ impl TargetIsa for AArch64Backend {
         want_disasm: bool,
     ) -> CodegenResult<MachCompileResult> {
         let flags = self.flags();
-        let vcode = self.compile_vcode(func, flags.clone())?;
+        let (vcode, regalloc_result) = self.compile_vcode(func, flags.clone())?;
 
-        let (buffer, bb_starts, bb_edges) = vcode.emit();
-        let frame_size = vcode.frame_size();
-        let stackslot_offsets = vcode.stackslot_offsets().clone();
+        let want_disasm = want_disasm || log::log_enabled!(log::Level::Debug);
+        let emit_result = vcode.emit(&regalloc_result, want_disasm, flags.machine_code_cfg_info());
+        let frame_size = emit_result.frame_size;
+        let value_labels_ranges = emit_result.value_labels_ranges;
+        let buffer = emit_result.buffer.finish();
+        let stackslot_offsets = emit_result.stackslot_offsets;
 
-        let disasm = if want_disasm {
-            Some(vcode.show_rru(Some(&create_reg_universe(flags))))
-        } else {
-            None
-        };
-
-        let buffer = buffer.finish();
+        if let Some(disasm) = emit_result.disasm.as_ref() {
+            log::debug!("disassembly:\n{}", disasm);
+        }
 
         Ok(MachCompileResult {
             buffer,
             frame_size,
-            disasm,
-            value_labels_ranges: Default::default(),
+            disasm: emit_result.disasm,
+            value_labels_ranges,
             stackslot_offsets,
-            bb_starts,
-            bb_edges,
+            bb_starts: emit_result.bb_offsets,
+            bb_edges: emit_result.bb_edges,
         })
     }
 
@@ -218,11 +217,11 @@ mod test {
         let buffer = backend.compile_function(&mut func, false).unwrap().buffer;
         let code = buffer.data();
 
-        // mov x1, #0x1234
-        // add w0, w0, w1
+        // mov x3, #0x1234
+        // add w0, w0, w3
         // ret
         let golden = vec![
-            0x81, 0x46, 0x82, 0xd2, 0x00, 0x00, 0x01, 0x0b, 0xc0, 0x03, 0x5f, 0xd6,
+            0x83, 0x46, 0x82, 0xd2, 0x00, 0x00, 0x03, 0x0b, 0xc0, 0x03, 0x5f, 0xd6,
         ];
 
         assert_eq!(code, &golden[..]);
@@ -273,23 +272,24 @@ mod test {
             .unwrap();
         let code = result.buffer.data();
 
-        // mov	x1, #0x1234                	// #4660
-        // add	w0, w0, w1
-        // mov	w1, w0
-        // cbnz	x1, 0x28
-        // mov	x1, #0x1234                	// #4660
-        // add	w1, w0, w1
-        // mov	w1, w1
-        // cbnz	x1, 0x18
-        // mov	w1, w0
-        // cbnz	x1, 0x18
-        // mov	x1, #0x1234                	// #4660
-        // sub	w0, w0, w1
+        // mov     x10, #0x1234                    // #4660
+        // add     w12, w0, w10
+        // mov     w11, w12
+        // cbnz    x11, 0x20
+        // mov     x13, #0x1234                    // #4660
+        // add     w15, w12, w13
+        // mov     w14, w15
+        // cbnz    x14, 0x10
+        // mov     w1, w12
+        // cbnz    x1, 0x10
+        // mov     x2, #0x1234                     // #4660
+        // sub     w0, w12, w2
         // ret
+
         let golden = vec![
-            129, 70, 130, 210, 0, 0, 1, 11, 225, 3, 0, 42, 161, 0, 0, 181, 129, 70, 130, 210, 1, 0,
-            1, 11, 225, 3, 1, 42, 161, 255, 255, 181, 225, 3, 0, 42, 97, 255, 255, 181, 129, 70,
-            130, 210, 0, 0, 1, 75, 192, 3, 95, 214,
+            138, 70, 130, 210, 12, 0, 10, 11, 235, 3, 12, 42, 171, 0, 0, 181, 141, 70, 130, 210,
+            143, 1, 13, 11, 238, 3, 15, 42, 174, 255, 255, 181, 225, 3, 12, 42, 97, 255, 255, 181,
+            130, 70, 130, 210, 128, 1, 2, 75, 192, 3, 95, 214,
         ];
 
         assert_eq!(code, &golden[..]);
diff --git a/cranelift/codegen/src/isa/mod.rs b/cranelift/codegen/src/isa/mod.rs
index 821b9627ea..691c90e32b 100644
--- a/cranelift/codegen/src/isa/mod.rs
+++ b/cranelift/codegen/src/isa/mod.rs
@@ -235,7 +235,10 @@ pub trait TargetIsa: fmt::Display + Send + Sync {
 
     #[cfg(feature = "unwind")]
     /// Map a regalloc::Reg to its corresponding DWARF register.
-    fn map_regalloc_reg_to_dwarf(&self, _: ::regalloc::Reg) -> Result<u16, RegisterMappingError> {
+    fn map_regalloc_reg_to_dwarf(
+        &self,
+        _: crate::machinst::Reg,
+    ) -> Result<u16, RegisterMappingError> {
         Err(RegisterMappingError::UnsupportedArchitecture)
     }
 
diff --git a/cranelift/codegen/src/isa/s390x/abi.rs b/cranelift/codegen/src/isa/s390x/abi.rs
index 1ab9a1f001..8038eb581c 100644
--- a/cranelift/codegen/src/isa/s390x/abi.rs
+++ b/cranelift/codegen/src/isa/s390x/abi.rs
@@ -66,11 +66,13 @@ use crate::isa;
 use crate::isa::s390x::inst::*;
 use crate::isa::unwind::UnwindInst;
 use crate::machinst::*;
+use crate::machinst::{RealReg, Reg, RegClass, Writable};
 use crate::settings;
 use crate::{CodegenError, CodegenResult};
 use alloc::boxed::Box;
 use alloc::vec::Vec;
-use regalloc::{RealReg, Reg, RegClass, Set, Writable};
+use regalloc2::PReg;
+use regalloc2::VReg;
 use smallvec::{smallvec, SmallVec};
 use std::convert::TryFrom;
 
@@ -235,7 +237,7 @@ impl ABIMachineSpec for S390xMachineDeps {
 
             if let Some(reg) = candidate {
                 ret.push(ABIArg::reg(
-                    reg.to_real_reg(),
+                    reg.to_real_reg().unwrap(),
                     param.value_type,
                     param.extension,
                     param.purpose,
@@ -279,7 +281,7 @@ impl ABIMachineSpec for S390xMachineDeps {
             debug_assert!(args_or_rets == ArgsOrRets::Args);
             if let Some(reg) = get_intreg_for_arg(next_gpr) {
                 ret.push(ABIArg::reg(
-                    reg.to_real_reg(),
+                    reg.to_real_reg().unwrap(),
                     types::I64,
                     ir::ArgumentExtension::None,
                     ir::ArgumentPurpose::Normal,
@@ -340,8 +342,11 @@ impl ABIMachineSpec for S390xMachineDeps {
         }
     }
 
-    fn gen_ret() -> Inst {
-        Inst::Ret { link: gpr(14) }
+    fn gen_ret(rets: Vec<Reg>) -> Inst {
+        Inst::Ret {
+            link: gpr(14),
+            rets,
+        }
     }
 
     fn gen_add_imm(into_reg: Writable<Reg>, from_reg: Reg, imm: u32) -> SmallInstVec<Inst> {
@@ -462,7 +467,7 @@ impl ABIMachineSpec for S390xMachineDeps {
         _call_conv: isa::CallConv,
         _setup_frame: bool,
         flags: &settings::Flags,
-        clobbered_callee_saves: &Vec<Writable<RealReg>>,
+        clobbered_callee_saves: &[Writable<RealReg>],
         fixed_frame_storage_size: u32,
         outgoing_args_size: u32,
     ) -> (u64, SmallVec<[Inst; 16]>) {
@@ -471,16 +476,15 @@ impl ABIMachineSpec for S390xMachineDeps {
         let mut clobbered_gpr = vec![];
 
         for &reg in clobbered_callee_saves.iter() {
-            match reg.to_reg().get_class() {
-                RegClass::I64 => clobbered_gpr.push(reg),
-                RegClass::F64 => clobbered_fpr.push(reg),
-                class => panic!("Unexpected RegClass: {:?}", class),
+            match reg.to_reg().class() {
+                RegClass::Int => clobbered_gpr.push(reg),
+                RegClass::Float => clobbered_fpr.push(reg),
             }
         }
 
         let mut first_clobbered_gpr = 16;
         for reg in clobbered_gpr {
-            let enc = reg.to_reg().get_hw_encoding();
+            let enc = reg.to_reg().hw_enc();
             if enc < first_clobbered_gpr {
                 first_clobbered_gpr = enc;
             }
@@ -499,7 +503,7 @@ impl ABIMachineSpec for S390xMachineDeps {
         if first_clobbered_gpr < 16 {
             let offset = 8 * first_clobbered_gpr as i64;
             insts.push(Inst::StoreMultiple64 {
-                rt: gpr(first_clobbered_gpr as u8),
+                rt: gpr(first_clobbered_gpr),
                 rt2: gpr(15),
                 mem: MemArg::reg_plus_off(stack_reg(), offset, MemFlags::trusted()),
             });
@@ -509,7 +513,7 @@ impl ABIMachineSpec for S390xMachineDeps {
                 insts.push(Inst::Unwind {
                     inst: UnwindInst::SaveReg {
                         clobber_offset: clobber_size as u32 + (i * 8) as u32,
-                        reg: gpr(i as u8).to_real_reg(),
+                        reg: gpr(i).to_real_reg().unwrap(),
                     },
                 });
             }
@@ -535,7 +539,7 @@ impl ABIMachineSpec for S390xMachineDeps {
         // Save FPRs.
         for (i, reg) in clobbered_fpr.iter().enumerate() {
             insts.push(Inst::FpuStore64 {
-                rd: reg.to_reg().to_reg(),
+                rd: reg.to_reg().into(),
                 mem: MemArg::reg_plus_off(
                     stack_reg(),
                     (i * 8) as i64 + outgoing_args_size as i64 + fixed_frame_storage_size as i64,
@@ -558,7 +562,7 @@ impl ABIMachineSpec for S390xMachineDeps {
     fn gen_clobber_restore(
         call_conv: isa::CallConv,
         _: &settings::Flags,
-        clobbers: &Set<Writable<RealReg>>,
+        clobbers: &[Writable<RealReg>],
         fixed_frame_storage_size: u32,
         outgoing_args_size: u32,
     ) -> SmallVec<[Inst; 16]> {
@@ -568,7 +572,7 @@ impl ABIMachineSpec for S390xMachineDeps {
         let (clobbered_gpr, clobbered_fpr) = get_regs_saved_in_prologue(call_conv, clobbers);
         let mut first_clobbered_gpr = 16;
         for reg in clobbered_gpr {
-            let enc = reg.to_reg().get_hw_encoding();
+            let enc = reg.to_reg().hw_enc();
             if enc < first_clobbered_gpr {
                 first_clobbered_gpr = enc;
             }
@@ -578,7 +582,7 @@ impl ABIMachineSpec for S390xMachineDeps {
         // Restore FPRs.
         for (i, reg) in clobbered_fpr.iter().enumerate() {
             insts.push(Inst::FpuLoad64 {
-                rd: Writable::from_reg(reg.to_reg().to_reg()),
+                rd: Writable::from_reg(reg.to_reg().into()),
                 mem: MemArg::reg_plus_off(
                     stack_reg(),
                     (i * 8) as i64 + outgoing_args_size as i64 + fixed_frame_storage_size as i64,
@@ -603,7 +607,7 @@ impl ABIMachineSpec for S390xMachineDeps {
                 offset += stack_size as i64;
             }
             insts.push(Inst::LoadMultiple64 {
-                rt: writable_gpr(first_clobbered_gpr as u8),
+                rt: writable_gpr(first_clobbered_gpr),
                 rt2: writable_gpr(15),
                 mem: MemArg::reg_plus_off(stack_reg(), offset, MemFlags::trusted()),
             });
@@ -620,55 +624,43 @@ impl ABIMachineSpec for S390xMachineDeps {
         tmp: Writable<Reg>,
         _callee_conv: isa::CallConv,
         _caller_conv: isa::CallConv,
-    ) -> SmallVec<[(InstIsSafepoint, Inst); 2]> {
+    ) -> SmallVec<[Inst; 2]> {
         let mut insts = SmallVec::new();
         match &dest {
-            &CallDest::ExtName(ref name, RelocDistance::Near) => insts.push((
-                InstIsSafepoint::Yes,
-                Inst::Call {
-                    link: writable_gpr(14),
-                    info: Box::new(CallInfo {
-                        dest: name.clone(),
-                        uses,
-                        defs,
-                        opcode,
-                    }),
-                },
-            )),
+            &CallDest::ExtName(ref name, RelocDistance::Near) => insts.push(Inst::Call {
+                link: writable_gpr(14),
+                info: Box::new(CallInfo {
+                    dest: name.clone(),
+                    uses,
+                    defs,
+                    opcode,
+                }),
+            }),
             &CallDest::ExtName(ref name, RelocDistance::Far) => {
-                insts.push((
-                    InstIsSafepoint::No,
-                    Inst::LoadExtNameFar {
-                        rd: tmp,
-                        name: Box::new(name.clone()),
-                        offset: 0,
-                    },
-                ));
-                insts.push((
-                    InstIsSafepoint::Yes,
-                    Inst::CallInd {
-                        link: writable_gpr(14),
-                        info: Box::new(CallIndInfo {
-                            rn: tmp.to_reg(),
-                            uses,
-                            defs,
-                            opcode,
-                        }),
-                    },
-                ));
-            }
-            &CallDest::Reg(reg) => insts.push((
-                InstIsSafepoint::Yes,
-                Inst::CallInd {
+                insts.push(Inst::LoadExtNameFar {
+                    rd: tmp,
+                    name: Box::new(name.clone()),
+                    offset: 0,
+                });
+                insts.push(Inst::CallInd {
                     link: writable_gpr(14),
                     info: Box::new(CallIndInfo {
-                        rn: *reg,
+                        rn: tmp.to_reg(),
                         uses,
                         defs,
                         opcode,
                     }),
-                },
-            )),
+                });
+            }
+            &CallDest::Reg(reg) => insts.push(Inst::CallInd {
+                link: writable_gpr(14),
+                info: Box::new(CallIndInfo {
+                    rn: *reg,
+                    uses,
+                    defs,
+                    opcode,
+                }),
+            }),
         }
 
         insts
@@ -686,9 +678,8 @@ impl ABIMachineSpec for S390xMachineDeps {
     fn get_number_of_spillslots_for_value(rc: RegClass) -> u32 {
         // We allocate in terms of 8-byte slots.
         match rc {
-            RegClass::I64 => 1,
-            RegClass::F64 => 1,
-            _ => panic!("Unexpected register class!"),
+            RegClass::Int => 1,
+            RegClass::Float => 1,
         }
     }
 
@@ -706,13 +697,13 @@ impl ABIMachineSpec for S390xMachineDeps {
         let mut caller_saved = Vec::new();
         for i in 0..15 {
             let x = writable_gpr(i);
-            if is_reg_clobbered_by_call(call_conv_of_callee, x.to_reg().to_real_reg()) {
+            if is_reg_clobbered_by_call(call_conv_of_callee, x.to_reg().to_real_reg().unwrap()) {
                 caller_saved.push(x);
             }
         }
         for i in 0..15 {
             let v = writable_fpr(i);
-            if is_reg_clobbered_by_call(call_conv_of_callee, v.to_reg().to_real_reg()) {
+            if is_reg_clobbered_by_call(call_conv_of_callee, v.to_reg().to_real_reg().unwrap()) {
                 caller_saved.push(v);
             }
         }
@@ -728,7 +719,7 @@ impl ABIMachineSpec for S390xMachineDeps {
 
     fn get_clobbered_callee_saves(
         call_conv: isa::CallConv,
-        regs: &Set<Writable<RealReg>>,
+        regs: &[Writable<RealReg>],
     ) -> Vec<Writable<RealReg>> {
         let mut regs: Vec<Writable<RealReg>> = regs
             .iter()
@@ -738,7 +729,7 @@ impl ABIMachineSpec for S390xMachineDeps {
 
         // Sort registers for deterministic code output. We can do an unstable
         // sort because the registers will be unique (there are no dups).
-        regs.sort_unstable_by_key(|r| r.to_reg().get_index());
+        regs.sort_unstable_by_key(|r| PReg::from(r.to_reg()).index());
         regs
     }
 
@@ -754,50 +745,47 @@ impl ABIMachineSpec for S390xMachineDeps {
 }
 
 fn is_reg_saved_in_prologue(_call_conv: isa::CallConv, r: RealReg) -> bool {
-    match r.get_class() {
-        RegClass::I64 => {
+    match r.class() {
+        RegClass::Int => {
             // r6 - r15 inclusive are callee-saves.
-            r.get_hw_encoding() >= 6 && r.get_hw_encoding() <= 15
+            r.hw_enc() >= 6 && r.hw_enc() <= 15
         }
-        RegClass::F64 => {
+        RegClass::Float => {
             // f8 - f15 inclusive are callee-saves.
-            r.get_hw_encoding() >= 8 && r.get_hw_encoding() <= 15
+            r.hw_enc() >= 8 && r.hw_enc() <= 15
         }
-        _ => panic!("Unexpected RegClass"),
     }
 }
 
 fn get_regs_saved_in_prologue(
     call_conv: isa::CallConv,
-    regs: &Set<Writable<RealReg>>,
+    regs: &[Writable<RealReg>],
 ) -> (Vec<Writable<RealReg>>, Vec<Writable<RealReg>>) {
     let mut int_saves = vec![];
     let mut fpr_saves = vec![];
-    for &reg in regs.iter() {
+    for &reg in regs {
         if is_reg_saved_in_prologue(call_conv, reg.to_reg()) {
-            match reg.to_reg().get_class() {
-                RegClass::I64 => int_saves.push(reg),
-                RegClass::F64 => fpr_saves.push(reg),
-                _ => panic!("Unexpected RegClass"),
+            match reg.to_reg().class() {
+                RegClass::Int => int_saves.push(reg),
+                RegClass::Float => fpr_saves.push(reg),
             }
         }
     }
     // Sort registers for deterministic code output.
-    int_saves.sort_by_key(|r| r.to_reg().get_index());
-    fpr_saves.sort_by_key(|r| r.to_reg().get_index());
+    int_saves.sort_by_key(|r| VReg::from(r.to_reg()).vreg());
+    fpr_saves.sort_by_key(|r| VReg::from(r.to_reg()).vreg());
     (int_saves, fpr_saves)
 }
 
 fn is_reg_clobbered_by_call(_call_conv: isa::CallConv, r: RealReg) -> bool {
-    match r.get_class() {
-        RegClass::I64 => {
+    match r.class() {
+        RegClass::Int => {
             // r0 - r5 inclusive are caller-saves.
-            r.get_hw_encoding() <= 5
+            r.hw_enc() <= 5
         }
-        RegClass::F64 => {
+        RegClass::Float => {
             // f0 - f7 inclusive are caller-saves.
-            r.get_hw_encoding() <= 7
+            r.hw_enc() <= 7
         }
-        _ => panic!("Unexpected RegClass"),
     }
 }
diff --git a/cranelift/codegen/src/isa/s390x/inst.isle b/cranelift/codegen/src/isa/s390x/inst.isle
index 09a41a4fe7..6c127fbc14 100644
--- a/cranelift/codegen/src/isa/s390x/inst.isle
+++ b/cranelift/codegen/src/isa/s390x/inst.isle
@@ -601,7 +601,8 @@
 
     ;; A machine return instruction.
     (Ret
-      (link Reg))
+      (link Reg)
+      (rets VecReg))
 
     ;; A placeholder instruction, generating no code, meaning that a function epilogue must be
     ;; inserted there.
@@ -696,11 +697,10 @@
     (VirtualSPOffsetAdj
       (offset i64))
 
-    ;; A definition of a value label.
-    (ValueLabelMarker
-      (reg Reg)
-      (label ValueLabel))
-
+    ;; Pseudoinstruction to keep a value alive.
+    (DummyUse
+     (reg Reg))
+    
     ;; An unwind pseudoinstruction describing the state of the
     ;; machine at this program point.
     (Unwind
diff --git a/cranelift/codegen/src/isa/s390x/inst/args.rs b/cranelift/codegen/src/isa/s390x/inst/args.rs
index 73f96c61a9..1f8e8ca623 100644
--- a/cranelift/codegen/src/isa/s390x/inst/args.rs
+++ b/cranelift/codegen/src/isa/s390x/inst/args.rs
@@ -7,8 +7,7 @@ use crate::ir::condcodes::{FloatCC, IntCC};
 use crate::ir::MemFlags;
 use crate::isa::s390x::inst::*;
 use crate::machinst::MachLabel;
-
-use regalloc::{PrettyPrint, RealRegUniverse, Reg};
+use crate::machinst::{PrettyPrint, Reg};
 
 use std::string::String;
 
@@ -113,6 +112,40 @@ impl MemArg {
     pub(crate) fn can_trap(&self) -> bool {
         !self.get_flags().notrap()
     }
+
+    /// Edit registers with allocations.
+    pub fn with_allocs(&self, allocs: &mut AllocationConsumer<'_>) -> Self {
+        match self {
+            &MemArg::BXD12 {
+                base,
+                index,
+                disp,
+                flags,
+            } => MemArg::BXD12 {
+                base: allocs.next(base),
+                index: allocs.next(index),
+                disp,
+                flags,
+            },
+            &MemArg::BXD20 {
+                base,
+                index,
+                disp,
+                flags,
+            } => MemArg::BXD20 {
+                base: allocs.next(base),
+                index: allocs.next(index),
+                disp,
+                flags,
+            },
+            &MemArg::RegOffset { reg, off, flags } => MemArg::RegOffset {
+                reg: allocs.next(reg),
+                off,
+                flags,
+            },
+            x => x.clone(),
+        }
+    }
 }
 
 //=============================================================================
@@ -183,49 +216,53 @@ impl Cond {
 }
 
 impl PrettyPrint for MemArg {
-    fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
+    fn pretty_print(&self, _: u8, allocs: &mut AllocationConsumer<'_>) -> String {
         match self {
             &MemArg::BXD12 {
                 base, index, disp, ..
             } => {
+                let base = allocs.next(base);
+                let index = allocs.next(index);
                 if base != zero_reg() {
                     if index != zero_reg() {
                         format!(
                             "{}({},{})",
-                            disp.show_rru(mb_rru),
-                            index.show_rru(mb_rru),
-                            base.show_rru(mb_rru)
+                            disp.pretty_print_default(),
+                            show_reg(index),
+                            show_reg(base),
                         )
                     } else {
-                        format!("{}({})", disp.show_rru(mb_rru), base.show_rru(mb_rru))
+                        format!("{}({})", disp.pretty_print_default(), show_reg(base))
                     }
                 } else {
                     if index != zero_reg() {
-                        format!("{}({},)", disp.show_rru(mb_rru), index.show_rru(mb_rru))
+                        format!("{}({},)", disp.pretty_print_default(), show_reg(index))
                     } else {
-                        format!("{}", disp.show_rru(mb_rru))
+                        format!("{}", disp.pretty_print_default())
                     }
                 }
             }
             &MemArg::BXD20 {
                 base, index, disp, ..
             } => {
+                let base = allocs.next(base);
+                let index = allocs.next(index);
                 if base != zero_reg() {
                     if index != zero_reg() {
                         format!(
                             "{}({},{})",
-                            disp.show_rru(mb_rru),
-                            index.show_rru(mb_rru),
-                            base.show_rru(mb_rru)
+                            disp.pretty_print_default(),
+                            show_reg(index),
+                            show_reg(base),
                         )
                     } else {
-                        format!("{}({})", disp.show_rru(mb_rru), base.show_rru(mb_rru))
+                        format!("{}({})", disp.pretty_print_default(), show_reg(base))
                     }
                 } else {
                     if index != zero_reg() {
-                        format!("{}({},)", disp.show_rru(mb_rru), index.show_rru(mb_rru))
+                        format!("{}({},)", disp.pretty_print_default(), show_reg(index))
                     } else {
-                        format!("{}", disp.show_rru(mb_rru))
+                        format!("{}", disp.pretty_print_default())
                     }
                 }
             }
@@ -244,7 +281,7 @@ impl PrettyPrint for MemArg {
 }
 
 impl PrettyPrint for Cond {
-    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+    fn pretty_print(&self, _: u8, _: &mut AllocationConsumer<'_>) -> String {
         let s = match self.mask {
             1 => "o",
             2 => "h",
diff --git a/cranelift/codegen/src/isa/s390x/inst/emit.rs b/cranelift/codegen/src/isa/s390x/inst/emit.rs
index 46067ed4b9..9a3dc3a76d 100644
--- a/cranelift/codegen/src/isa/s390x/inst/emit.rs
+++ b/cranelift/codegen/src/isa/s390x/inst/emit.rs
@@ -5,8 +5,10 @@ use crate::ir::MemFlags;
 use crate::ir::{SourceLoc, TrapCode};
 use crate::isa::s390x::inst::*;
 use crate::isa::s390x::settings as s390x_settings;
+use crate::machinst::reg::count_operands;
+use crate::machinst::{Reg, RegClass};
 use core::convert::TryFrom;
-use regalloc::{Reg, RegClass};
+use regalloc2::Allocation;
 
 /// Memory addressing mode finalization: convert "special" modes (e.g.,
 /// generic arbitrary stack offset) into real addressing modes, possibly by
@@ -125,7 +127,7 @@ pub fn mem_emit(
         true,
     );
     for inst in mem_insts.into_iter() {
-        inst.emit(sink, emit_info, state);
+        inst.emit(&[], sink, emit_info, state);
     }
 
     if add_trap && mem.can_trap() {
@@ -195,7 +197,7 @@ pub fn mem_rs_emit(
         false,
     );
     for inst in mem_insts.into_iter() {
-        inst.emit(sink, emit_info, state);
+        inst.emit(&[], sink, emit_info, state);
     }
 
     if add_trap && mem.can_trap() {
@@ -237,7 +239,7 @@ pub fn mem_imm8_emit(
 ) {
     let (mem_insts, mem) = mem_finalize(mem, state, true, true, false, false);
     for inst in mem_insts.into_iter() {
-        inst.emit(sink, emit_info, state);
+        inst.emit(&[], sink, emit_info, state);
     }
 
     if add_trap && mem.can_trap() {
@@ -275,7 +277,7 @@ pub fn mem_imm16_emit(
 ) {
     let (mem_insts, mem) = mem_finalize(mem, state, true, false, false, false);
     for inst in mem_insts.into_iter() {
-        inst.emit(sink, emit_info, state);
+        inst.emit(&[], sink, emit_info, state);
     }
 
     if add_trap && mem.can_trap() {
@@ -300,17 +302,17 @@ pub fn mem_imm16_emit(
 // Instructions and subcomponents: emission
 
 fn machreg_to_gpr(m: Reg) -> u8 {
-    assert_eq!(m.get_class(), RegClass::I64);
-    u8::try_from(m.to_real_reg().get_hw_encoding()).unwrap()
+    assert_eq!(m.class(), RegClass::Int);
+    u8::try_from(m.to_real_reg().unwrap().hw_enc()).unwrap()
 }
 
 fn machreg_to_fpr(m: Reg) -> u8 {
-    assert_eq!(m.get_class(), RegClass::F64);
-    u8::try_from(m.to_real_reg().get_hw_encoding()).unwrap()
+    assert_eq!(m.class(), RegClass::Float);
+    u8::try_from(m.to_real_reg().unwrap().hw_enc()).unwrap()
 }
 
 fn machreg_to_gpr_or_fpr(m: Reg) -> u8 {
-    u8::try_from(m.to_real_reg().get_hw_encoding()).unwrap()
+    u8::try_from(m.to_real_reg().unwrap().hw_enc()).unwrap()
 }
 
 /// E-type instructions.
@@ -936,7 +938,15 @@ impl MachInstEmit for Inst {
     type State = EmitState;
     type Info = EmitInfo;
 
-    fn emit(&self, sink: &mut MachBuffer<Inst>, emit_info: &Self::Info, state: &mut EmitState) {
+    fn emit(
+        &self,
+        allocs: &[Allocation],
+        sink: &mut MachBuffer<Inst>,
+        emit_info: &Self::Info,
+        state: &mut EmitState,
+    ) {
+        let mut allocs = AllocationConsumer::new(allocs);
+
         // Verify that we can emit this Inst in the current ISA
         let matches_isa_flags = |iset_requirement: &InstructionSet| -> bool {
             match iset_requirement {
@@ -965,6 +975,10 @@ impl MachInstEmit for Inst {
 
         match self {
             &Inst::AluRRR { alu_op, rd, rn, rm } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
+                let rm = allocs.next(rm);
+
                 let (opcode, have_rr) = match alu_op {
                     ALUOp::Add32 => (0xb9f8, true),        // ARK
                     ALUOp::Add64 => (0xb9e8, true),        // AGRK
@@ -992,7 +1006,7 @@ impl MachInstEmit for Inst {
                 };
                 if have_rr && rd.to_reg() == rn {
                     let inst = Inst::AluRR { alu_op, rd, rm };
-                    inst.emit(sink, emit_info, state);
+                    inst.emit(&[], sink, emit_info, state);
                 } else {
                     put(sink, &enc_rrf_ab(opcode, rd.to_reg(), rn, rm, 0));
                 }
@@ -1003,9 +1017,12 @@ impl MachInstEmit for Inst {
                 rn,
                 imm,
             } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
+
                 if rd.to_reg() == rn {
                     let inst = Inst::AluRSImm16 { alu_op, rd, imm };
-                    inst.emit(sink, emit_info, state);
+                    inst.emit(&[], sink, emit_info, state);
                 } else {
                     let opcode = match alu_op {
                         ALUOp::Add32 => 0xecd8, // AHIK
@@ -1016,6 +1033,9 @@ impl MachInstEmit for Inst {
                 }
             }
             &Inst::AluRR { alu_op, rd, rm } => {
+                let rd = allocs.next_writable(rd);
+                let rm = allocs.next(rm);
+
                 let (opcode, is_rre) = match alu_op {
                     ALUOp::Add32 => (0x1a, false),              // AR
                     ALUOp::Add64 => (0xb908, true),             // AGR
@@ -1051,6 +1071,9 @@ impl MachInstEmit for Inst {
                 rd,
                 ref mem,
             } => {
+                let rd = allocs.next_writable(rd);
+                let mem = mem.with_allocs(&mut allocs);
+
                 let (opcode_rx, opcode_rxy) = match alu_op {
                     ALUOp::Add32 => (Some(0x5a), Some(0xe35a)),        // A(Y)
                     ALUOp::Add32Ext16 => (Some(0x4a), Some(0xe34a)),   // AH(Y)
@@ -1083,10 +1106,12 @@ impl MachInstEmit for Inst {
                 };
                 let rd = rd.to_reg();
                 mem_emit(
-                    rd, mem, opcode_rx, opcode_rxy, None, true, sink, emit_info, state,
+                    rd, &mem, opcode_rx, opcode_rxy, None, true, sink, emit_info, state,
                 );
             }
             &Inst::AluRSImm16 { alu_op, rd, imm } => {
+                let rd = allocs.next_writable(rd);
+
                 let opcode = match alu_op {
                     ALUOp::Add32 => 0xa7a, // AHI
                     ALUOp::Add64 => 0xa7b, // AGHI
@@ -1097,6 +1122,8 @@ impl MachInstEmit for Inst {
                 put(sink, &enc_ri_a(opcode, rd.to_reg(), imm as u16));
             }
             &Inst::AluRSImm32 { alu_op, rd, imm } => {
+                let rd = allocs.next_writable(rd);
+
                 let opcode = match alu_op {
                     ALUOp::Add32 => 0xc29, // AFI
                     ALUOp::Add64 => 0xc28, // AGFI
@@ -1107,6 +1134,8 @@ impl MachInstEmit for Inst {
                 put(sink, &enc_ril_a(opcode, rd.to_reg(), imm as u32));
             }
             &Inst::AluRUImm32 { alu_op, rd, imm } => {
+                let rd = allocs.next_writable(rd);
+
                 let opcode = match alu_op {
                     ALUOp::AddLogical32 => 0xc2b, // ALFI
                     ALUOp::AddLogical64 => 0xc2a, // ALGFI
@@ -1117,6 +1146,8 @@ impl MachInstEmit for Inst {
                 put(sink, &enc_ril_a(opcode, rd.to_reg(), imm));
             }
             &Inst::AluRUImm16Shifted { alu_op, rd, imm } => {
+                let rd = allocs.next_writable(rd);
+
                 let opcode = match (alu_op, imm.shift) {
                     (ALUOp::And32, 0) => 0xa57, // NILL
                     (ALUOp::And32, 1) => 0xa56, // NILH
@@ -1135,6 +1166,8 @@ impl MachInstEmit for Inst {
                 put(sink, &enc_ri_a(opcode, rd.to_reg(), imm.bits));
             }
             &Inst::AluRUImm32Shifted { alu_op, rd, imm } => {
+                let rd = allocs.next_writable(rd);
+
                 let opcode = match (alu_op, imm.shift) {
                     (ALUOp::And32, 0) => 0xc0b, // NILF
                     (ALUOp::And64, 0) => 0xc0b, // NILF
@@ -1151,38 +1184,53 @@ impl MachInstEmit for Inst {
             }
 
             &Inst::SMulWide { rn, rm } => {
+                let rn = allocs.next(rn);
+                let rm = allocs.next(rm);
+
                 let opcode = 0xb9ec; // MGRK
                 put(sink, &enc_rrf_ab(opcode, gpr(0), rn, rm, 0));
             }
             &Inst::UMulWide { rn } => {
+                let rn = allocs.next(rn);
+
                 let opcode = 0xb986; // MLGR
                 put(sink, &enc_rre(opcode, gpr(0), rn));
             }
             &Inst::SDivMod32 { rn } => {
+                let rn = allocs.next(rn);
+
                 let opcode = 0xb91d; // DSGFR
                 let srcloc = state.cur_srcloc();
                 let trap_code = TrapCode::IntegerDivisionByZero;
                 put_with_trap(sink, &enc_rre(opcode, gpr(0), rn), srcloc, trap_code);
             }
             &Inst::SDivMod64 { rn } => {
+                let rn = allocs.next(rn);
+
                 let opcode = 0xb90d; // DSGR
                 let srcloc = state.cur_srcloc();
                 let trap_code = TrapCode::IntegerDivisionByZero;
                 put_with_trap(sink, &enc_rre(opcode, gpr(0), rn), srcloc, trap_code);
             }
             &Inst::UDivMod32 { rn } => {
+                let rn = allocs.next(rn);
+
                 let opcode = 0xb997; // DLR
                 let srcloc = state.cur_srcloc();
                 let trap_code = TrapCode::IntegerDivisionByZero;
                 put_with_trap(sink, &enc_rre(opcode, gpr(0), rn), srcloc, trap_code);
             }
             &Inst::UDivMod64 { rn } => {
+                let rn = allocs.next(rn);
+
                 let opcode = 0xb987; // DLGR
                 let srcloc = state.cur_srcloc();
                 let trap_code = TrapCode::IntegerDivisionByZero;
                 put_with_trap(sink, &enc_rre(opcode, gpr(0), rn), srcloc, trap_code);
             }
             &Inst::Flogr { rn } => {
+                let rn = allocs.next(rn);
+
                 let opcode = 0xb983; // FLOGR
                 put(sink, &enc_rre(opcode, gpr(0), rn));
             }
@@ -1194,6 +1242,10 @@ impl MachInstEmit for Inst {
                 shift_imm,
                 shift_reg,
             } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
+                let shift_reg = allocs.next(shift_reg);
+
                 let opcode = match shift_op {
                     ShiftOp::RotL32 => 0xeb1d, // RLL
                     ShiftOp::RotL64 => 0xeb1c, // RLLG
@@ -1218,6 +1270,9 @@ impl MachInstEmit for Inst {
                 end_bit,
                 rotate_amt,
             } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
+
                 let opcode = match op {
                     RxSBGOp::Insert => 0xec59, // RISBGN
                     RxSBGOp::And => 0xec54,    // RNSBG
@@ -1245,6 +1300,9 @@ impl MachInstEmit for Inst {
                 end_bit,
                 rotate_amt,
             } => {
+                let rd = allocs.next(rd);
+                let rn = allocs.next(rn);
+
                 let opcode = match op {
                     RxSBGOp::And => 0xec54, // RNSBG
                     RxSBGOp::Or => 0xec56,  // ROSBG
@@ -1265,6 +1323,9 @@ impl MachInstEmit for Inst {
             }
 
             &Inst::UnaryRR { op, rd, rn } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
+
                 match op {
                     UnaryOp::Abs32 => {
                         let opcode = 0x10; // LPR
@@ -1316,6 +1377,9 @@ impl MachInstEmit for Inst {
                 from_bits,
                 to_bits,
             } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
+
                 let opcode = match (signed, from_bits, to_bits) {
                     (_, 1, 32) => 0xb926,      // LBR
                     (_, 1, 64) => 0xb906,      // LGBR
@@ -1338,6 +1402,9 @@ impl MachInstEmit for Inst {
             }
 
             &Inst::CmpRR { op, rn, rm } => {
+                let rn = allocs.next(rn);
+                let rm = allocs.next(rm);
+
                 let (opcode, is_rre) = match op {
                     CmpOp::CmpS32 => (0x19, false),       // CR
                     CmpOp::CmpS64 => (0xb920, true),      // CGR
@@ -1354,6 +1421,9 @@ impl MachInstEmit for Inst {
                 }
             }
             &Inst::CmpRX { op, rn, ref mem } => {
+                let rn = allocs.next(rn);
+                let mem = mem.with_allocs(&mut allocs);
+
                 let (opcode_rx, opcode_rxy, opcode_ril) = match op {
                     CmpOp::CmpS32 => (Some(0x59), Some(0xe359), Some(0xc6d)), // C(Y), CRL
                     CmpOp::CmpS32Ext16 => (Some(0x49), Some(0xe379), Some(0xc65)), // CH(Y), CHRL
@@ -1367,10 +1437,12 @@ impl MachInstEmit for Inst {
                     CmpOp::CmpL64Ext32 => (None, Some(0xe331), Some(0xc6e)),  // CLGF, CLGFRL
                 };
                 mem_emit(
-                    rn, mem, opcode_rx, opcode_rxy, opcode_ril, true, sink, emit_info, state,
+                    rn, &mem, opcode_rx, opcode_rxy, opcode_ril, true, sink, emit_info, state,
                 );
             }
             &Inst::CmpRSImm16 { op, rn, imm } => {
+                let rn = allocs.next(rn);
+
                 let opcode = match op {
                     CmpOp::CmpS32 => 0xa7e, // CHI
                     CmpOp::CmpS64 => 0xa7f, // CGHI
@@ -1379,6 +1451,8 @@ impl MachInstEmit for Inst {
                 put(sink, &enc_ri_a(opcode, rn, imm as u16));
             }
             &Inst::CmpRSImm32 { op, rn, imm } => {
+                let rn = allocs.next(rn);
+
                 let opcode = match op {
                     CmpOp::CmpS32 => 0xc2d, // CFI
                     CmpOp::CmpS64 => 0xc2c, // CGFI
@@ -1387,6 +1461,8 @@ impl MachInstEmit for Inst {
                 put(sink, &enc_ril_a(opcode, rn, imm as u32));
             }
             &Inst::CmpRUImm32 { op, rn, imm } => {
+                let rn = allocs.next(rn);
+
                 let opcode = match op {
                     CmpOp::CmpL32 => 0xc2f, // CLFI
                     CmpOp::CmpL64 => 0xc2e, // CLGFI
@@ -1401,6 +1477,9 @@ impl MachInstEmit for Inst {
                 cond,
                 trap_code,
             } => {
+                let rn = allocs.next(rn);
+                let rm = allocs.next(rm);
+
                 let opcode = match op {
                     CmpOp::CmpS32 => 0xb972, // CRT
                     CmpOp::CmpS64 => 0xb960, // CGRT
@@ -1423,6 +1502,8 @@ impl MachInstEmit for Inst {
                 cond,
                 trap_code,
             } => {
+                let rn = allocs.next(rn);
+
                 let opcode = match op {
                     CmpOp::CmpS32 => 0xec72, // CIT
                     CmpOp::CmpS64 => 0xec70, // CGIT
@@ -1443,6 +1524,8 @@ impl MachInstEmit for Inst {
                 cond,
                 trap_code,
             } => {
+                let rn = allocs.next(rn);
+
                 let opcode = match op {
                     CmpOp::CmpL32 => 0xec73, // CLFIT
                     CmpOp::CmpL64 => 0xec71, // CLGIT
@@ -1463,6 +1546,10 @@ impl MachInstEmit for Inst {
                 rn,
                 ref mem,
             } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
+                let mem = mem.with_allocs(&mut allocs);
+
                 let opcode = match alu_op {
                     ALUOp::Add32 => 0xebf8,        // LAA
                     ALUOp::Add64 => 0xebe8,        // LAAG
@@ -1481,7 +1568,7 @@ impl MachInstEmit for Inst {
                 mem_rs_emit(
                     rd,
                     rn,
-                    mem,
+                    &mem,
                     None,
                     Some(opcode),
                     true,
@@ -1500,6 +1587,8 @@ impl MachInstEmit for Inst {
                 sink.bind_label(loop_label);
 
                 for inst in (&body).into_iter() {
+                    let op_count = count_operands(inst);
+                    let sub_allocs = allocs.next_n(op_count);
                     match &inst {
                         // Replace a CondBreak with a branch to done_label.
                         &Inst::CondBreak { cond } => {
@@ -1507,9 +1596,9 @@ impl MachInstEmit for Inst {
                                 target: done_label,
                                 cond: *cond,
                             };
-                            inst.emit(sink, emit_info, state);
+                            inst.emit(&sub_allocs[..], sink, emit_info, state);
                         }
-                        _ => inst.emit(sink, emit_info, state),
+                        _ => inst.emit(&sub_allocs[..], sink, emit_info, state),
                     };
                 }
 
@@ -1517,13 +1606,17 @@ impl MachInstEmit for Inst {
                     target: loop_label,
                     cond,
                 };
-                inst.emit(sink, emit_info, state);
+                inst.emit(&[], sink, emit_info, state);
 
                 // Emit label at the end of the loop.
                 sink.bind_label(done_label);
             }
             &Inst::CondBreak { .. } => unreachable!(), // Only valid inside a Loop.
             &Inst::AtomicCas32 { rd, rn, ref mem } | &Inst::AtomicCas64 { rd, rn, ref mem } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
+                let mem = mem.with_allocs(&mut allocs);
+
                 let (opcode_rs, opcode_rsy) = match self {
                     &Inst::AtomicCas32 { .. } => (Some(0xba), Some(0xeb14)), // CS(Y)
                     &Inst::AtomicCas64 { .. } => (None, Some(0xeb30)),       // CSG
@@ -1532,7 +1625,7 @@ impl MachInstEmit for Inst {
 
                 let rd = rd.to_reg();
                 mem_rs_emit(
-                    rd, rn, mem, opcode_rs, opcode_rsy, true, sink, emit_info, state,
+                    rd, rn, &mem, opcode_rs, opcode_rsy, true, sink, emit_info, state,
                 );
             }
             &Inst::Fence => {
@@ -1556,6 +1649,9 @@ impl MachInstEmit for Inst {
             | &Inst::LoadRev64 { rd, ref mem }
             | &Inst::FpuLoad32 { rd, ref mem }
             | &Inst::FpuLoad64 { rd, ref mem } => {
+                let rd = allocs.next_writable(rd);
+                let mem = mem.with_allocs(&mut allocs);
+
                 let (opcode_rx, opcode_rxy, opcode_ril) = match self {
                     &Inst::Load32 { .. } => (Some(0x58), Some(0xe358), Some(0xc4d)), // L(Y), LRL
                     &Inst::Load32ZExt8 { .. } => (None, Some(0xe394), None),         // LLC
@@ -1578,19 +1674,22 @@ impl MachInstEmit for Inst {
                 };
                 let rd = rd.to_reg();
                 mem_emit(
-                    rd, mem, opcode_rx, opcode_rxy, opcode_ril, true, sink, emit_info, state,
+                    rd, &mem, opcode_rx, opcode_rxy, opcode_ril, true, sink, emit_info, state,
                 );
             }
             &Inst::FpuLoadRev32 { rd, ref mem } | &Inst::FpuLoadRev64 { rd, ref mem } => {
+                let rd = allocs.next_writable(rd);
+                let mem = mem.with_allocs(&mut allocs);
+
                 let opcode = match self {
                     &Inst::FpuLoadRev32 { .. } => 0xe603, // VLEBRF
                     &Inst::FpuLoadRev64 { .. } => 0xe602, // VLEBRG
                     _ => unreachable!(),
                 };
 
-                let (mem_insts, mem) = mem_finalize(mem, state, true, false, false, true);
+                let (mem_insts, mem) = mem_finalize(&mem, state, true, false, false, true);
                 for inst in mem_insts.into_iter() {
-                    inst.emit(sink, emit_info, state);
+                    inst.emit(&[], sink, emit_info, state);
                 }
 
                 let srcloc = state.cur_srcloc();
@@ -1620,6 +1719,9 @@ impl MachInstEmit for Inst {
             | &Inst::StoreRev64 { rd, ref mem }
             | &Inst::FpuStore32 { rd, ref mem }
             | &Inst::FpuStore64 { rd, ref mem } => {
+                let rd = allocs.next(rd);
+                let mem = mem.with_allocs(&mut allocs);
+
                 let (opcode_rx, opcode_rxy, opcode_ril) = match self {
                     &Inst::Store8 { .. } => (Some(0x42), Some(0xe372), None), // STC(Y)
                     &Inst::Store16 { .. } => (Some(0x40), Some(0xe370), Some(0xc47)), // STH(Y), STHRL
@@ -1633,37 +1735,44 @@ impl MachInstEmit for Inst {
                     _ => unreachable!(),
                 };
                 mem_emit(
-                    rd, mem, opcode_rx, opcode_rxy, opcode_ril, true, sink, emit_info, state,
+                    rd, &mem, opcode_rx, opcode_rxy, opcode_ril, true, sink, emit_info, state,
                 );
             }
             &Inst::StoreImm8 { imm, ref mem } => {
+                let mem = mem.with_allocs(&mut allocs);
+
                 let opcode_si = 0x92; // MVI
                 let opcode_siy = 0xeb52; // MVIY
                 mem_imm8_emit(
-                    imm, mem, opcode_si, opcode_siy, true, sink, emit_info, state,
+                    imm, &mem, opcode_si, opcode_siy, true, sink, emit_info, state,
                 );
             }
             &Inst::StoreImm16 { imm, ref mem }
             | &Inst::StoreImm32SExt16 { imm, ref mem }
             | &Inst::StoreImm64SExt16 { imm, ref mem } => {
+                let mem = mem.with_allocs(&mut allocs);
+
                 let opcode = match self {
                     &Inst::StoreImm16 { .. } => 0xe544,       // MVHHI
                     &Inst::StoreImm32SExt16 { .. } => 0xe54c, // MVHI
                     &Inst::StoreImm64SExt16 { .. } => 0xe548, // MVGHI
                     _ => unreachable!(),
                 };
-                mem_imm16_emit(imm, mem, opcode, true, sink, emit_info, state);
+                mem_imm16_emit(imm, &mem, opcode, true, sink, emit_info, state);
             }
             &Inst::FpuStoreRev32 { rd, ref mem } | &Inst::FpuStoreRev64 { rd, ref mem } => {
+                let rd = allocs.next(rd);
+                let mem = mem.with_allocs(&mut allocs);
+
                 let opcode = match self {
                     &Inst::FpuStoreRev32 { .. } => 0xe60b, // VSTEBRF
                     &Inst::FpuStoreRev64 { .. } => 0xe60a, // VSTEBRG
                     _ => unreachable!(),
                 };
 
-                let (mem_insts, mem) = mem_finalize(mem, state, true, false, false, true);
+                let (mem_insts, mem) = mem_finalize(&mem, state, true, false, false, true);
                 for inst in mem_insts.into_iter() {
-                    inst.emit(sink, emit_info, state);
+                    inst.emit(&[], sink, emit_info, state);
                 }
 
                 let srcloc = state.cur_srcloc();
@@ -1682,6 +1791,8 @@ impl MachInstEmit for Inst {
             }
 
             &Inst::LoadMultiple64 { rt, rt2, ref mem } => {
+                let mem = mem.with_allocs(&mut allocs);
+
                 let opcode = 0xeb04; // LMG
                 let rt = rt.to_reg();
                 let rt2 = rt2.to_reg();
@@ -1698,6 +1809,8 @@ impl MachInstEmit for Inst {
                 );
             }
             &Inst::StoreMultiple64 { rt, rt2, ref mem } => {
+                let mem = mem.with_allocs(&mut allocs);
+
                 let opcode = 0xeb24; // STMG
                 mem_rs_emit(
                     rt,
@@ -1713,48 +1826,73 @@ impl MachInstEmit for Inst {
             }
 
             &Inst::LoadAddr { rd, ref mem } => {
+                let rd = allocs.next_writable(rd);
+                let mem = mem.with_allocs(&mut allocs);
+
                 let opcode_rx = Some(0x41); // LA
                 let opcode_rxy = Some(0xe371); // LAY
                 let opcode_ril = Some(0xc00); // LARL
                 let rd = rd.to_reg();
                 mem_emit(
-                    rd, mem, opcode_rx, opcode_rxy, opcode_ril, false, sink, emit_info, state,
+                    rd, &mem, opcode_rx, opcode_rxy, opcode_ril, false, sink, emit_info, state,
                 );
             }
 
             &Inst::Mov64 { rd, rm } => {
+                let rd = allocs.next_writable(rd);
+                let rm = allocs.next(rm);
+
                 let opcode = 0xb904; // LGR
                 put(sink, &enc_rre(opcode, rd.to_reg(), rm));
             }
             &Inst::Mov32 { rd, rm } => {
+                let rd = allocs.next_writable(rd);
+                let rm = allocs.next(rm);
+
                 let opcode = 0x18; // LR
                 put(sink, &enc_rr(opcode, rd.to_reg(), rm));
             }
             &Inst::Mov32Imm { rd, imm } => {
+                let rd = allocs.next_writable(rd);
+
                 let opcode = 0xc09; // IILF
                 put(sink, &enc_ril_a(opcode, rd.to_reg(), imm));
             }
             &Inst::Mov32SImm16 { rd, imm } => {
+                let rd = allocs.next_writable(rd);
+
                 let opcode = 0xa78; // LHI
                 put(sink, &enc_ri_a(opcode, rd.to_reg(), imm as u16));
             }
             &Inst::Mov64SImm16 { rd, imm } => {
+                let rd = allocs.next_writable(rd);
+
                 let opcode = 0xa79; // LGHI
                 put(sink, &enc_ri_a(opcode, rd.to_reg(), imm as u16));
             }
             &Inst::Mov64SImm32 { rd, imm } => {
+                let rd = allocs.next_writable(rd);
+
                 let opcode = 0xc01; // LGFI
                 put(sink, &enc_ril_a(opcode, rd.to_reg(), imm as u32));
             }
             &Inst::CMov32 { rd, cond, rm } => {
+                let rd = allocs.next_writable(rd);
+                let rm = allocs.next(rm);
+
                 let opcode = 0xb9f2; // LOCR
                 put(sink, &enc_rrf_cde(opcode, rd.to_reg(), rm, cond.bits(), 0));
             }
             &Inst::CMov64 { rd, cond, rm } => {
+                let rd = allocs.next_writable(rd);
+                let rm = allocs.next(rm);
+
                 let opcode = 0xb9e2; // LOCGR
                 put(sink, &enc_rrf_cde(opcode, rd.to_reg(), rm, cond.bits(), 0));
             }
             &Inst::CMov32SImm16 { rd, cond, imm } => {
+                let rd = allocs.next_writable(rd);
+
                 let opcode = 0xec42; // LOCHI
                 put(
                     sink,
@@ -1762,6 +1900,8 @@ impl MachInstEmit for Inst {
                 );
             }
             &Inst::CMov64SImm16 { rd, cond, imm } => {
+                let rd = allocs.next_writable(rd);
+
                 let opcode = 0xec46; // LOCGHI
                 put(
                     sink,
@@ -1769,6 +1909,8 @@ impl MachInstEmit for Inst {
                 );
             }
             &Inst::Mov64UImm16Shifted { rd, imm } => {
+                let rd = allocs.next_writable(rd);
+
                 let opcode = match imm.shift {
                     0 => 0xa5f, // LLILL
                     1 => 0xa5e, // LLILH
@@ -1779,6 +1921,8 @@ impl MachInstEmit for Inst {
                 put(sink, &enc_ri_a(opcode, rd.to_reg(), imm.bits));
             }
             &Inst::Mov64UImm32Shifted { rd, imm } => {
+                let rd = allocs.next_writable(rd);
+
                 let opcode = match imm.shift {
                     0 => 0xc0f, // LLILF
                     1 => 0xc0e, // LLIHF
@@ -1787,6 +1931,8 @@ impl MachInstEmit for Inst {
                 put(sink, &enc_ril_a(opcode, rd.to_reg(), imm.bits));
             }
             &Inst::Insert64UImm16Shifted { rd, imm } => {
+                let rd = allocs.next_writable(rd);
+
                 let opcode = match imm.shift {
                     0 => 0xa53, // IILL
                     1 => 0xa52, // IILH
@@ -1797,6 +1943,8 @@ impl MachInstEmit for Inst {
                 put(sink, &enc_ri_a(opcode, rd.to_reg(), imm.bits));
             }
             &Inst::Insert64UImm32Shifted { rd, imm } => {
+                let rd = allocs.next_writable(rd);
+
                 let opcode = match imm.shift {
                     0 => 0xc09, // IILF
                     1 => 0xc08, // IIHF
@@ -1809,6 +1957,8 @@ impl MachInstEmit for Inst {
                 ref name,
                 offset,
             } => {
+                let rd = allocs.next_writable(rd);
+
                 let opcode = 0xa75; // BRAS
                 let srcloc = state.cur_srcloc();
                 let reg = writable_spilltmp_reg().to_reg();
@@ -1823,38 +1973,58 @@ impl MachInstEmit for Inst {
                     rd,
                     mem: MemArg::reg(reg, MemFlags::trusted()),
                 };
-                inst.emit(sink, emit_info, state);
+                inst.emit(&[], sink, emit_info, state);
             }
 
             &Inst::FpuMove32 { rd, rn } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
+
                 let opcode = 0x38; // LER
                 put(sink, &enc_rr(opcode, rd.to_reg(), rn));
             }
             &Inst::FpuMove64 { rd, rn } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
+
                 let opcode = 0x28; // LDR
                 put(sink, &enc_rr(opcode, rd.to_reg(), rn));
             }
             &Inst::FpuCMov32 { rd, cond, rm } => {
+                let rd = allocs.next_writable(rd);
+                let rm = allocs.next(rm);
+
                 let opcode = 0xa74; // BCR
                 put(sink, &enc_ri_c(opcode, cond.invert().bits(), 4 + 2));
                 let opcode = 0x38; // LER
                 put(sink, &enc_rr(opcode, rd.to_reg(), rm));
             }
             &Inst::FpuCMov64 { rd, cond, rm } => {
+                let rd = allocs.next_writable(rd);
+                let rm = allocs.next(rm);
+
                 let opcode = 0xa74; // BCR
                 put(sink, &enc_ri_c(opcode, cond.invert().bits(), 4 + 2));
                 let opcode = 0x28; // LDR
                 put(sink, &enc_rr(opcode, rd.to_reg(), rm));
             }
             &Inst::MovToFpr { rd, rn } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
+
                 let opcode = 0xb3c1; // LDGR
                 put(sink, &enc_rre(opcode, rd.to_reg(), rn));
             }
             &Inst::MovFromFpr { rd, rn } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
+
                 let opcode = 0xb3cd; // LGDR
                 put(sink, &enc_rre(opcode, rd.to_reg(), rn));
             }
             &Inst::LoadFpuConst32 { rd, const_data } => {
+                let rd = allocs.next_writable(rd);
+
                 let opcode = 0xa75; // BRAS
                 let reg = writable_spilltmp_reg().to_reg();
                 put(sink, &enc_ri_b(opcode, reg, 8));
@@ -1863,9 +2033,11 @@ impl MachInstEmit for Inst {
                     rd,
                     mem: MemArg::reg(reg, MemFlags::trusted()),
                 };
-                inst.emit(sink, emit_info, state);
+                inst.emit(&[], sink, emit_info, state);
             }
             &Inst::LoadFpuConst64 { rd, const_data } => {
+                let rd = allocs.next_writable(rd);
+
                 let opcode = 0xa75; // BRAS
                 let reg = writable_spilltmp_reg().to_reg();
                 put(sink, &enc_ri_b(opcode, reg, 12));
@@ -1874,14 +2046,21 @@ impl MachInstEmit for Inst {
                     rd,
                     mem: MemArg::reg(reg, MemFlags::trusted()),
                 };
-                inst.emit(sink, emit_info, state);
+                inst.emit(&[], sink, emit_info, state);
             }
 
             &Inst::FpuCopysign { rd, rn, rm } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
+                let rm = allocs.next(rm);
+
                 let opcode = 0xb372; // CPSDR
                 put(sink, &enc_rrf_ab(opcode, rd.to_reg(), rn, rm, 0));
             }
             &Inst::FpuRR { fpu_op, rd, rn } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
+
                 let opcode = match fpu_op {
                     FPUOp1::Abs32 => 0xb300,     // LPEBR
                     FPUOp1::Abs64 => 0xb310,     // LPDBR
@@ -1897,6 +2076,9 @@ impl MachInstEmit for Inst {
                 put(sink, &enc_rre(opcode, rd.to_reg(), rn));
             }
             &Inst::FpuRRR { fpu_op, rd, rm } => {
+                let rd = allocs.next_writable(rd);
+                let rm = allocs.next(rm);
+
                 let opcode = match fpu_op {
                     FPUOp2::Add32 => 0xb30a, // AEBR
                     FPUOp2::Add64 => 0xb31a, // ADBR
@@ -1911,6 +2093,10 @@ impl MachInstEmit for Inst {
                 put(sink, &enc_rre(opcode, rd.to_reg(), rm));
             }
             &Inst::FpuRRRR { fpu_op, rd, rn, rm } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
+                let rm = allocs.next(rm);
+
                 let opcode = match fpu_op {
                     FPUOp3::MAdd32 => 0xb30e, // MAEBR
                     FPUOp3::MAdd64 => 0xb31e, // MADBR
@@ -1920,6 +2106,9 @@ impl MachInstEmit for Inst {
                 put(sink, &enc_rrd(opcode, rd.to_reg(), rm, rn));
             }
             &Inst::FpuToInt { op, rd, rn } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
+
                 let opcode = match op {
                     FpuToIntOp::F32ToI32 => 0xb398, // CFEBRA
                     FpuToIntOp::F32ToU32 => 0xb39c, // CLFEBR
@@ -1933,6 +2122,9 @@ impl MachInstEmit for Inst {
                 put(sink, &enc_rrf_cde(opcode, rd.to_reg(), rn, 5, 0));
             }
             &Inst::IntToFpu { op, rd, rn } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
+
                 let opcode = match op {
                     IntToFpuOp::I32ToF32 => 0xb394, // CEFBRA
                     IntToFpuOp::U32ToF32 => 0xb390, // CELFBR
@@ -1946,6 +2138,9 @@ impl MachInstEmit for Inst {
                 put(sink, &enc_rrf_cde(opcode, rd.to_reg(), rn, 0, 0));
             }
             &Inst::FpuRound { op, rd, rn } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
+
                 let (opcode, m3) = match op {
                     FpuRoundMode::Minus32 => (0xb357, 7),   // FIEBR
                     FpuRoundMode::Minus64 => (0xb35f, 7),   // FIDBR
@@ -1959,6 +2154,10 @@ impl MachInstEmit for Inst {
                 put(sink, &enc_rrf_cde(opcode, rd.to_reg(), rn, m3, 0));
             }
             &Inst::FpuVecRRR { fpu_op, rd, rn, rm } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
+                let rm = allocs.next(rm);
+
                 let (opcode, m4) = match fpu_op {
                     FPUOp2::Max32 => (0xe7ef, 2), // VFMAX
                     FPUOp2::Max64 => (0xe7ef, 3), // VFMAX
@@ -1969,15 +2168,23 @@ impl MachInstEmit for Inst {
                 put(sink, &enc_vrr(opcode, rd.to_reg(), rn, rm, m4, 8, 1));
             }
             &Inst::FpuCmp32 { rn, rm } => {
+                let rn = allocs.next(rn);
+                let rm = allocs.next(rm);
+
                 let opcode = 0xb309; // CEBR
                 put(sink, &enc_rre(opcode, rn, rm));
             }
             &Inst::FpuCmp64 { rn, rm } => {
+                let rn = allocs.next(rn);
+                let rm = allocs.next(rm);
+
                 let opcode = 0xb319; // CDBR
                 put(sink, &enc_rre(opcode, rn, rm));
             }
 
             &Inst::Call { link, ref info } => {
+                let link = allocs.next_writable(link);
+
                 let opcode = 0xc05; // BRASL
                 let reloc = Reloc::S390xPCRel32Dbl;
                 let srcloc = state.cur_srcloc();
@@ -1998,17 +2205,22 @@ impl MachInstEmit for Inst {
                 }
             }
             &Inst::CallInd { link, ref info } => {
+                let link = allocs.next_writable(link);
+                let rn = allocs.next(info.rn);
+
                 let opcode = 0x0d; // BASR
                 let srcloc = state.cur_srcloc();
                 if let Some(s) = state.take_stack_map() {
                     sink.add_stack_map(StackMapExtent::UpcomingBytes(2), s);
                 }
-                put(sink, &enc_rr(opcode, link.to_reg(), info.rn));
+                put(sink, &enc_rr(opcode, link.to_reg(), rn));
                 if info.opcode.is_call() {
                     sink.add_call_site(srcloc, info.opcode);
                 }
             }
-            &Inst::Ret { link } => {
+            &Inst::Ret { link, .. } => {
+                let link = allocs.next(link);
+
                 let opcode = 0x07; // BCR
                 put(sink, &enc_rr(opcode, gpr(15), link));
             }
@@ -2025,6 +2237,8 @@ impl MachInstEmit for Inst {
                 put(sink, &enc_ril_c(opcode, 15, 0));
             }
             &Inst::IndirectBr { rn, .. } => {
+                let rn = allocs.next(rn);
+
                 let opcode = 0x07; // BCR
                 put(sink, &enc_rr(opcode, gpr(15), rn));
             }
@@ -2079,6 +2293,8 @@ impl MachInstEmit for Inst {
                 put_with_trap(sink, &enc_e(0x0000), srcloc, trap_code);
             }
             &Inst::JTSequence { ridx, ref targets } => {
+                let ridx = allocs.next(ridx);
+
                 let table_label = sink.get_label();
 
                 // This sequence is *one* instruction in the vcode, and is expanded only here at
@@ -2093,7 +2309,7 @@ impl MachInstEmit for Inst {
                         target: table_label,
                     },
                 };
-                inst.emit(sink, emit_info, state);
+                inst.emit(&[], sink, emit_info, state);
 
                 // Set temp to target address by adding the value of the jump table entry.
                 let inst = Inst::AluRX {
@@ -2101,7 +2317,7 @@ impl MachInstEmit for Inst {
                     rd: rtmp,
                     mem: MemArg::reg_plus_reg(rtmp.to_reg(), ridx, MemFlags::trusted()),
                 };
-                inst.emit(sink, emit_info, state);
+                inst.emit(&[], sink, emit_info, state);
 
                 // Branch to computed address. (`targets` here is only used for successor queries
                 // and is not needed for emission.)
@@ -2109,7 +2325,7 @@ impl MachInstEmit for Inst {
                     rn: rtmp.to_reg(),
                     targets: vec![],
                 };
-                inst.emit(sink, emit_info, state);
+                inst.emit(&[], sink, emit_info, state);
 
                 // Emit jump table (table of 32-bit offsets).
                 // The first entry is the default target, which is not emitted
@@ -2138,13 +2354,11 @@ impl MachInstEmit for Inst {
                 state.virtual_sp_offset += offset;
             }
 
-            &Inst::ValueLabelMarker { .. } => {
-                // Nothing; this is only used to compute debug info.
-            }
-
             &Inst::Unwind { ref inst } => {
                 sink.add_unwind(inst.clone());
             }
+
+            &Inst::DummyUse { .. } => {}
         }
 
         let end_off = sink.cur_offset();
@@ -2153,7 +2367,8 @@ impl MachInstEmit for Inst {
         state.clear_post_insn();
     }
 
-    fn pretty_print(&self, mb_rru: Option<&RealRegUniverse>, state: &mut EmitState) -> String {
-        self.print_with_state(mb_rru, state)
+    fn pretty_print_inst(&self, allocs: &[Allocation], state: &mut EmitState) -> String {
+        let mut allocs = AllocationConsumer::new(allocs);
+        self.print_with_state(state, &mut allocs)
     }
 }
diff --git a/cranelift/codegen/src/isa/s390x/inst/emit_tests.rs b/cranelift/codegen/src/isa/s390x/inst/emit_tests.rs
index ed2310e288..225ea132dd 100644
--- a/cranelift/codegen/src/isa/s390x/inst/emit_tests.rs
+++ b/cranelift/codegen/src/isa/s390x/inst/emit_tests.rs
@@ -1585,7 +1585,7 @@ fn test_s390x_binemit() {
             op: CmpOp::CmpS32,
             rn: gpr(1),
             mem: MemArg::Label {
-                target: MachLabel::from_block(1),
+                target: MachLabel::from_block(BlockIndex::new(1)),
             },
         },
         "C61D00000003",
@@ -1624,7 +1624,7 @@ fn test_s390x_binemit() {
             op: CmpOp::CmpS32Ext16,
             rn: gpr(1),
             mem: MemArg::Label {
-                target: MachLabel::from_block(1),
+                target: MachLabel::from_block(BlockIndex::new(1)),
             },
         },
         "C61500000003",
@@ -1649,7 +1649,7 @@ fn test_s390x_binemit() {
             op: CmpOp::CmpS64,
             rn: gpr(1),
             mem: MemArg::Label {
-                target: MachLabel::from_block(1),
+                target: MachLabel::from_block(BlockIndex::new(1)),
             },
         },
         "C61800000003",
@@ -1674,7 +1674,7 @@ fn test_s390x_binemit() {
             op: CmpOp::CmpS64Ext16,
             rn: gpr(1),
             mem: MemArg::Label {
-                target: MachLabel::from_block(1),
+                target: MachLabel::from_block(BlockIndex::new(1)),
             },
         },
         "C61400000003",
@@ -1699,7 +1699,7 @@ fn test_s390x_binemit() {
             op: CmpOp::CmpS64Ext32,
             rn: gpr(1),
             mem: MemArg::Label {
-                target: MachLabel::from_block(1),
+                target: MachLabel::from_block(BlockIndex::new(1)),
             },
         },
         "C61C00000003",
@@ -1738,7 +1738,7 @@ fn test_s390x_binemit() {
             op: CmpOp::CmpL32,
             rn: gpr(1),
             mem: MemArg::Label {
-                target: MachLabel::from_block(1),
+                target: MachLabel::from_block(BlockIndex::new(1)),
             },
         },
         "C61F00000003",
@@ -1749,7 +1749,7 @@ fn test_s390x_binemit() {
             op: CmpOp::CmpL32Ext16,
             rn: gpr(1),
             mem: MemArg::Label {
-                target: MachLabel::from_block(1),
+                target: MachLabel::from_block(BlockIndex::new(1)),
             },
         },
         "C61700000003",
@@ -1774,7 +1774,7 @@ fn test_s390x_binemit() {
             op: CmpOp::CmpL64,
             rn: gpr(1),
             mem: MemArg::Label {
-                target: MachLabel::from_block(1),
+                target: MachLabel::from_block(BlockIndex::new(1)),
             },
         },
         "C61A00000003",
@@ -1785,7 +1785,7 @@ fn test_s390x_binemit() {
             op: CmpOp::CmpL64Ext16,
             rn: gpr(1),
             mem: MemArg::Label {
-                target: MachLabel::from_block(1),
+                target: MachLabel::from_block(BlockIndex::new(1)),
             },
         },
         "C61600000003",
@@ -1810,7 +1810,7 @@ fn test_s390x_binemit() {
             op: CmpOp::CmpL64Ext32,
             rn: gpr(1),
             mem: MemArg::Label {
-                target: MachLabel::from_block(1),
+                target: MachLabel::from_block(BlockIndex::new(1)),
             },
         },
         "C61E00000003",
@@ -4536,7 +4536,7 @@ fn test_s390x_binemit() {
         Inst::Load32 {
             rd: writable_gpr(1),
             mem: MemArg::Label {
-                target: MachLabel::from_block(1),
+                target: MachLabel::from_block(BlockIndex::new(1)),
             },
         },
         "C41D00000003",
@@ -4546,7 +4546,7 @@ fn test_s390x_binemit() {
         Inst::Load32SExt16 {
             rd: writable_gpr(1),
             mem: MemArg::Label {
-                target: MachLabel::from_block(1),
+                target: MachLabel::from_block(BlockIndex::new(1)),
             },
         },
         "C41500000003",
@@ -4556,7 +4556,7 @@ fn test_s390x_binemit() {
         Inst::Load32ZExt16 {
             rd: writable_gpr(1),
             mem: MemArg::Label {
-                target: MachLabel::from_block(1),
+                target: MachLabel::from_block(BlockIndex::new(1)),
             },
         },
         "C41200000003",
@@ -4566,7 +4566,7 @@ fn test_s390x_binemit() {
         Inst::Load64 {
             rd: writable_gpr(1),
             mem: MemArg::Label {
-                target: MachLabel::from_block(1),
+                target: MachLabel::from_block(BlockIndex::new(1)),
             },
         },
         "C41800000003",
@@ -4576,7 +4576,7 @@ fn test_s390x_binemit() {
         Inst::Load64SExt16 {
             rd: writable_gpr(1),
             mem: MemArg::Label {
-                target: MachLabel::from_block(1),
+                target: MachLabel::from_block(BlockIndex::new(1)),
             },
         },
         "C41400000003",
@@ -4586,7 +4586,7 @@ fn test_s390x_binemit() {
         Inst::Load64ZExt16 {
             rd: writable_gpr(1),
             mem: MemArg::Label {
-                target: MachLabel::from_block(1),
+                target: MachLabel::from_block(BlockIndex::new(1)),
             },
         },
         "C41600000003",
@@ -4596,7 +4596,7 @@ fn test_s390x_binemit() {
         Inst::Load64SExt32 {
             rd: writable_gpr(1),
             mem: MemArg::Label {
-                target: MachLabel::from_block(1),
+                target: MachLabel::from_block(BlockIndex::new(1)),
             },
         },
         "C41C00000003",
@@ -4606,7 +4606,7 @@ fn test_s390x_binemit() {
         Inst::Load64ZExt32 {
             rd: writable_gpr(1),
             mem: MemArg::Label {
-                target: MachLabel::from_block(1),
+                target: MachLabel::from_block(BlockIndex::new(1)),
             },
         },
         "C41E00000003",
@@ -5790,7 +5790,7 @@ fn test_s390x_binemit() {
         Inst::Store16 {
             rd: gpr(1),
             mem: MemArg::Label {
-                target: MachLabel::from_block(1),
+                target: MachLabel::from_block(BlockIndex::new(1)),
             },
         },
         "C41700000003",
@@ -5800,7 +5800,7 @@ fn test_s390x_binemit() {
         Inst::Store32 {
             rd: gpr(1),
             mem: MemArg::Label {
-                target: MachLabel::from_block(1),
+                target: MachLabel::from_block(BlockIndex::new(1)),
             },
         },
         "C41F00000003",
@@ -5810,7 +5810,7 @@ fn test_s390x_binemit() {
         Inst::Store64 {
             rd: gpr(1),
             mem: MemArg::Label {
-                target: MachLabel::from_block(1),
+                target: MachLabel::from_block(BlockIndex::new(1)),
             },
         },
         "C41B00000003",
@@ -6035,7 +6035,7 @@ fn test_s390x_binemit() {
         Inst::LoadAddr {
             rd: writable_gpr(1),
             mem: MemArg::Label {
-                target: MachLabel::from_block(1),
+                target: MachLabel::from_block(BlockIndex::new(1)),
             },
         },
         "C01000000003",
@@ -6499,7 +6499,7 @@ fn test_s390x_binemit() {
 
     insns.push((
         Inst::Jump {
-            dest: MachLabel::from_block(0),
+            dest: MachLabel::from_block(BlockIndex::new(0)),
         },
         "C0F400000000",
         "jg label0",
@@ -6507,7 +6507,7 @@ fn test_s390x_binemit() {
 
     insns.push((
         Inst::OneWayCondBr {
-            target: MachLabel::from_block(0),
+            target: MachLabel::from_block(BlockIndex::new(0)),
             cond: Cond::from_mask(1),
         },
         "C01400000000",
@@ -6515,7 +6515,7 @@ fn test_s390x_binemit() {
     ));
     insns.push((
         Inst::OneWayCondBr {
-            target: MachLabel::from_block(0),
+            target: MachLabel::from_block(BlockIndex::new(0)),
             cond: Cond::from_mask(2),
         },
         "C02400000000",
@@ -6523,7 +6523,7 @@ fn test_s390x_binemit() {
     ));
     insns.push((
         Inst::OneWayCondBr {
-            target: MachLabel::from_block(0),
+            target: MachLabel::from_block(BlockIndex::new(0)),
             cond: Cond::from_mask(3),
         },
         "C03400000000",
@@ -6531,7 +6531,7 @@ fn test_s390x_binemit() {
     ));
     insns.push((
         Inst::OneWayCondBr {
-            target: MachLabel::from_block(0),
+            target: MachLabel::from_block(BlockIndex::new(0)),
             cond: Cond::from_mask(4),
         },
         "C04400000000",
@@ -6539,7 +6539,7 @@ fn test_s390x_binemit() {
     ));
     insns.push((
         Inst::OneWayCondBr {
-            target: MachLabel::from_block(0),
+            target: MachLabel::from_block(BlockIndex::new(0)),
             cond: Cond::from_mask(5),
         },
         "C05400000000",
@@ -6547,7 +6547,7 @@ fn test_s390x_binemit() {
     ));
     insns.push((
         Inst::OneWayCondBr {
-            target: MachLabel::from_block(0),
+            target: MachLabel::from_block(BlockIndex::new(0)),
             cond: Cond::from_mask(6),
         },
         "C06400000000",
@@ -6555,7 +6555,7 @@ fn test_s390x_binemit() {
     ));
     insns.push((
         Inst::OneWayCondBr {
-            target: MachLabel::from_block(0),
+            target: MachLabel::from_block(BlockIndex::new(0)),
             cond: Cond::from_mask(7),
         },
         "C07400000000",
@@ -6563,7 +6563,7 @@ fn test_s390x_binemit() {
     ));
     insns.push((
         Inst::OneWayCondBr {
-            target: MachLabel::from_block(0),
+            target: MachLabel::from_block(BlockIndex::new(0)),
             cond: Cond::from_mask(8),
         },
         "C08400000000",
@@ -6571,7 +6571,7 @@ fn test_s390x_binemit() {
     ));
     insns.push((
         Inst::OneWayCondBr {
-            target: MachLabel::from_block(0),
+            target: MachLabel::from_block(BlockIndex::new(0)),
             cond: Cond::from_mask(9),
         },
         "C09400000000",
@@ -6579,7 +6579,7 @@ fn test_s390x_binemit() {
     ));
     insns.push((
         Inst::OneWayCondBr {
-            target: MachLabel::from_block(0),
+            target: MachLabel::from_block(BlockIndex::new(0)),
             cond: Cond::from_mask(10),
         },
         "C0A400000000",
@@ -6587,7 +6587,7 @@ fn test_s390x_binemit() {
     ));
     insns.push((
         Inst::OneWayCondBr {
-            target: MachLabel::from_block(0),
+            target: MachLabel::from_block(BlockIndex::new(0)),
             cond: Cond::from_mask(11),
         },
         "C0B400000000",
@@ -6595,7 +6595,7 @@ fn test_s390x_binemit() {
     ));
     insns.push((
         Inst::OneWayCondBr {
-            target: MachLabel::from_block(0),
+            target: MachLabel::from_block(BlockIndex::new(0)),
             cond: Cond::from_mask(12),
         },
         "C0C400000000",
@@ -6603,7 +6603,7 @@ fn test_s390x_binemit() {
     ));
     insns.push((
         Inst::OneWayCondBr {
-            target: MachLabel::from_block(0),
+            target: MachLabel::from_block(BlockIndex::new(0)),
             cond: Cond::from_mask(13),
         },
         "C0D400000000",
@@ -6611,7 +6611,7 @@ fn test_s390x_binemit() {
     ));
     insns.push((
         Inst::OneWayCondBr {
-            target: MachLabel::from_block(0),
+            target: MachLabel::from_block(BlockIndex::new(0)),
             cond: Cond::from_mask(14),
         },
         "C0E400000000",
@@ -6620,8 +6620,8 @@ fn test_s390x_binemit() {
 
     insns.push((
         Inst::CondBr {
-            taken: MachLabel::from_block(0),
-            not_taken: MachLabel::from_block(0),
+            taken: MachLabel::from_block(BlockIndex::new(0)),
+            not_taken: MachLabel::from_block(BlockIndex::new(0)),
             cond: Cond::from_mask(1),
         },
         "C01400000000C0F4FFFFFFFD",
@@ -6629,8 +6629,8 @@ fn test_s390x_binemit() {
     ));
     insns.push((
         Inst::CondBr {
-            taken: MachLabel::from_block(0),
-            not_taken: MachLabel::from_block(0),
+            taken: MachLabel::from_block(BlockIndex::new(0)),
+            not_taken: MachLabel::from_block(BlockIndex::new(0)),
             cond: Cond::from_mask(2),
         },
         "C02400000000C0F4FFFFFFFD",
@@ -6638,8 +6638,8 @@ fn test_s390x_binemit() {
     ));
     insns.push((
         Inst::CondBr {
-            taken: MachLabel::from_block(0),
-            not_taken: MachLabel::from_block(0),
+            taken: MachLabel::from_block(BlockIndex::new(0)),
+            not_taken: MachLabel::from_block(BlockIndex::new(0)),
             cond: Cond::from_mask(3),
         },
         "C03400000000C0F4FFFFFFFD",
@@ -6647,8 +6647,8 @@ fn test_s390x_binemit() {
     ));
     insns.push((
         Inst::CondBr {
-            taken: MachLabel::from_block(0),
-            not_taken: MachLabel::from_block(0),
+            taken: MachLabel::from_block(BlockIndex::new(0)),
+            not_taken: MachLabel::from_block(BlockIndex::new(0)),
             cond: Cond::from_mask(4),
         },
         "C04400000000C0F4FFFFFFFD",
@@ -6656,8 +6656,8 @@ fn test_s390x_binemit() {
     ));
     insns.push((
         Inst::CondBr {
-            taken: MachLabel::from_block(0),
-            not_taken: MachLabel::from_block(0),
+            taken: MachLabel::from_block(BlockIndex::new(0)),
+            not_taken: MachLabel::from_block(BlockIndex::new(0)),
             cond: Cond::from_mask(5),
         },
         "C05400000000C0F4FFFFFFFD",
@@ -6665,8 +6665,8 @@ fn test_s390x_binemit() {
     ));
     insns.push((
         Inst::CondBr {
-            taken: MachLabel::from_block(0),
-            not_taken: MachLabel::from_block(0),
+            taken: MachLabel::from_block(BlockIndex::new(0)),
+            not_taken: MachLabel::from_block(BlockIndex::new(0)),
             cond: Cond::from_mask(6),
         },
         "C06400000000C0F4FFFFFFFD",
@@ -6674,8 +6674,8 @@ fn test_s390x_binemit() {
     ));
     insns.push((
         Inst::CondBr {
-            taken: MachLabel::from_block(0),
-            not_taken: MachLabel::from_block(0),
+            taken: MachLabel::from_block(BlockIndex::new(0)),
+            not_taken: MachLabel::from_block(BlockIndex::new(0)),
             cond: Cond::from_mask(7),
         },
         "C07400000000C0F4FFFFFFFD",
@@ -6683,8 +6683,8 @@ fn test_s390x_binemit() {
     ));
     insns.push((
         Inst::CondBr {
-            taken: MachLabel::from_block(0),
-            not_taken: MachLabel::from_block(0),
+            taken: MachLabel::from_block(BlockIndex::new(0)),
+            not_taken: MachLabel::from_block(BlockIndex::new(0)),
             cond: Cond::from_mask(8),
         },
         "C08400000000C0F4FFFFFFFD",
@@ -6692,8 +6692,8 @@ fn test_s390x_binemit() {
     ));
     insns.push((
         Inst::CondBr {
-            taken: MachLabel::from_block(0),
-            not_taken: MachLabel::from_block(0),
+            taken: MachLabel::from_block(BlockIndex::new(0)),
+            not_taken: MachLabel::from_block(BlockIndex::new(0)),
             cond: Cond::from_mask(9),
         },
         "C09400000000C0F4FFFFFFFD",
@@ -6701,8 +6701,8 @@ fn test_s390x_binemit() {
     ));
     insns.push((
         Inst::CondBr {
-            taken: MachLabel::from_block(0),
-            not_taken: MachLabel::from_block(0),
+            taken: MachLabel::from_block(BlockIndex::new(0)),
+            not_taken: MachLabel::from_block(BlockIndex::new(0)),
             cond: Cond::from_mask(10),
         },
         "C0A400000000C0F4FFFFFFFD",
@@ -6710,8 +6710,8 @@ fn test_s390x_binemit() {
     ));
     insns.push((
         Inst::CondBr {
-            taken: MachLabel::from_block(0),
-            not_taken: MachLabel::from_block(0),
+            taken: MachLabel::from_block(BlockIndex::new(0)),
+            not_taken: MachLabel::from_block(BlockIndex::new(0)),
             cond: Cond::from_mask(11),
         },
         "C0B400000000C0F4FFFFFFFD",
@@ -6719,8 +6719,8 @@ fn test_s390x_binemit() {
     ));
     insns.push((
         Inst::CondBr {
-            taken: MachLabel::from_block(0),
-            not_taken: MachLabel::from_block(0),
+            taken: MachLabel::from_block(BlockIndex::new(0)),
+            not_taken: MachLabel::from_block(BlockIndex::new(0)),
             cond: Cond::from_mask(12),
         },
         "C0C400000000C0F4FFFFFFFD",
@@ -6728,8 +6728,8 @@ fn test_s390x_binemit() {
     ));
     insns.push((
         Inst::CondBr {
-            taken: MachLabel::from_block(0),
-            not_taken: MachLabel::from_block(0),
+            taken: MachLabel::from_block(BlockIndex::new(0)),
+            not_taken: MachLabel::from_block(BlockIndex::new(0)),
             cond: Cond::from_mask(13),
         },
         "C0D400000000C0F4FFFFFFFD",
@@ -6737,8 +6737,8 @@ fn test_s390x_binemit() {
     ));
     insns.push((
         Inst::CondBr {
-            taken: MachLabel::from_block(0),
-            not_taken: MachLabel::from_block(0),
+            taken: MachLabel::from_block(BlockIndex::new(0)),
+            not_taken: MachLabel::from_block(BlockIndex::new(0)),
             cond: Cond::from_mask(14),
         },
         "C0E400000000C0F4FFFFFFFD",
@@ -6782,7 +6782,14 @@ fn test_s390x_binemit() {
         "basr %r14, %r1",
     ));
 
-    insns.push((Inst::Ret { link: gpr(14) }, "07FE", "br %r14"));
+    insns.push((
+        Inst::Ret {
+            link: gpr(14),
+            rets: vec![],
+        },
+        "07FE",
+        "br %r14",
+    ));
 
     insns.push((Inst::Debugtrap, "0001", "debugtrap"));
 
@@ -8246,7 +8253,6 @@ fn test_s390x_binemit() {
     isa_flag_builder.enable("arch13").unwrap();
     let isa_flags = s390x_settings::Flags::new(&flags, isa_flag_builder);
 
-    let rru = create_reg_universe(&flags);
     let emit_info = EmitInfo::new(flags, isa_flags);
     for (insn, expected_encoding, expected_printing) in insns {
         println!(
@@ -8255,7 +8261,8 @@ fn test_s390x_binemit() {
         );
 
         // Check the printed text is as expected.
-        let actual_printing = insn.show_rru(Some(&rru));
+        let actual_printing =
+            insn.print_with_state(&mut EmitState::default(), &mut AllocationConsumer::new(&[]));
         assert_eq!(expected_printing, actual_printing);
 
         let mut buffer = MachBuffer::new();
@@ -8265,7 +8272,7 @@ fn test_s390x_binemit() {
         buffer.bind_label(label0);
 
         // Emit the instruction.
-        insn.emit(&mut buffer, &emit_info, &mut Default::default());
+        insn.emit(&[], &mut buffer, &emit_info, &mut Default::default());
 
         // Label 1 after the instruction.
         let label1 = buffer.get_label();
diff --git a/cranelift/codegen/src/isa/s390x/inst/imms.rs b/cranelift/codegen/src/isa/s390x/inst/imms.rs
index b1a459ea68..c3922c337b 100644
--- a/cranelift/codegen/src/isa/s390x/inst/imms.rs
+++ b/cranelift/codegen/src/isa/s390x/inst/imms.rs
@@ -1,6 +1,6 @@
 //! S390x ISA definitions: immediate constants.
 
-use regalloc::{PrettyPrint, RealRegUniverse};
+use crate::machinst::{AllocationConsumer, PrettyPrint};
 use std::string::String;
 
 /// An unsigned 12-bit immediate.
@@ -207,25 +207,25 @@ impl UImm32Shifted {
 }
 
 impl PrettyPrint for UImm12 {
-    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+    fn pretty_print(&self, _: u8, _: &mut AllocationConsumer<'_>) -> String {
         format!("{}", self.value)
     }
 }
 
 impl PrettyPrint for SImm20 {
-    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+    fn pretty_print(&self, _: u8, _: &mut AllocationConsumer<'_>) -> String {
         format!("{}", self.value)
     }
 }
 
 impl PrettyPrint for UImm16Shifted {
-    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+    fn pretty_print(&self, _: u8, _: &mut AllocationConsumer<'_>) -> String {
         format!("{}", self.bits)
     }
 }
 
 impl PrettyPrint for UImm32Shifted {
-    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+    fn pretty_print(&self, _: u8, _: &mut AllocationConsumer<'_>) -> String {
         format!("{}", self.bits)
     }
 }
diff --git a/cranelift/codegen/src/isa/s390x/inst/mod.rs b/cranelift/codegen/src/isa/s390x/inst/mod.rs
index adff4a4d95..c5f7f72f43 100644
--- a/cranelift/codegen/src/isa/s390x/inst/mod.rs
+++ b/cranelift/codegen/src/isa/s390x/inst/mod.rs
@@ -4,19 +4,15 @@
 #![allow(dead_code)]
 
 use crate::binemit::{Addend, CodeOffset, Reloc};
-use crate::ir::{types, ExternalName, Opcode, Type, ValueLabel};
+use crate::ir::{types, ExternalName, Opcode, Type};
 use crate::machinst::*;
 use crate::{settings, CodegenError, CodegenResult};
-
-use regalloc::{PrettyPrint, RegUsageCollector, RegUsageMapper};
-use regalloc::{RealRegUniverse, Reg, RegClass, SpillSlot, VirtualReg, Writable};
-
 use alloc::boxed::Box;
 use alloc::vec::Vec;
 use core::convert::TryFrom;
+use regalloc2::VReg;
 use smallvec::{smallvec, SmallVec};
 use std::string::{String, ToString};
-
 pub mod regs;
 pub use self::regs::*;
 pub mod imms;
@@ -196,7 +192,6 @@ impl Inst {
             | Inst::Loop { .. }
             | Inst::CondBreak { .. }
             | Inst::VirtualSPOffsetAdj { .. }
-            | Inst::ValueLabelMarker { .. }
             | Inst::Unwind { .. } => InstructionSet::Base,
 
             // These depend on the opcode
@@ -216,13 +211,15 @@ impl Inst {
             | Inst::FpuStoreRev32 { .. }
             | Inst::FpuLoadRev64 { .. }
             | Inst::FpuStoreRev64 { .. } => InstructionSet::VXRS_EXT2,
+
+            Inst::DummyUse { .. } => InstructionSet::Base,
         }
     }
 
     /// Create a 64-bit move instruction.
     pub fn mov64(to_reg: Writable<Reg>, from_reg: Reg) -> Inst {
-        assert!(to_reg.to_reg().get_class() == from_reg.get_class());
-        if from_reg.get_class() == RegClass::I64 {
+        assert!(to_reg.to_reg().class() == from_reg.class());
+        if from_reg.class() == RegClass::Int {
             Inst::Mov64 {
                 rd: to_reg,
                 rm: from_reg,
@@ -237,7 +234,7 @@ impl Inst {
 
     /// Create a 32-bit move instruction.
     pub fn mov32(to_reg: Writable<Reg>, from_reg: Reg) -> Inst {
-        if from_reg.get_class() == RegClass::I64 {
+        if from_reg.class() == RegClass::Int {
             Inst::Mov32 {
                 rd: to_reg,
                 rm: from_reg,
@@ -352,140 +349,132 @@ impl Inst {
 //=============================================================================
 // Instructions: get_regs
 
-fn memarg_regs(memarg: &MemArg, collector: &mut RegUsageCollector) {
+fn memarg_operands<F: Fn(VReg) -> VReg>(memarg: &MemArg, collector: &mut OperandCollector<'_, F>) {
     match memarg {
         &MemArg::BXD12 { base, index, .. } | &MemArg::BXD20 { base, index, .. } => {
-            if base != zero_reg() {
-                collector.add_use(base);
-            }
-            if index != zero_reg() {
-                collector.add_use(index);
-            }
+            collector.reg_use(base);
+            collector.reg_use(index);
         }
         &MemArg::Label { .. } | &MemArg::Symbol { .. } => {}
         &MemArg::RegOffset { reg, .. } => {
-            collector.add_use(reg);
-        }
-        &MemArg::InitialSPOffset { .. } | &MemArg::NominalSPOffset { .. } => {
-            collector.add_use(stack_reg());
+            collector.reg_use(reg);
         }
+        &MemArg::InitialSPOffset { .. } | &MemArg::NominalSPOffset { .. } => {}
     }
 }
 
-fn s390x_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
+fn s390x_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCollector<'_, F>) {
     match inst {
         &Inst::AluRRR { rd, rn, rm, .. } => {
-            collector.add_def(rd);
-            collector.add_use(rn);
-            collector.add_use(rm);
+            collector.reg_def(rd);
+            collector.reg_use(rn);
+            collector.reg_use(rm);
         }
         &Inst::AluRRSImm16 { rd, rn, .. } => {
-            collector.add_def(rd);
-            collector.add_use(rn);
+            collector.reg_def(rd);
+            collector.reg_use(rn);
         }
         &Inst::AluRR { rd, rm, .. } => {
-            collector.add_mod(rd);
-            collector.add_use(rm);
+            collector.reg_mod(rd);
+            collector.reg_use(rm);
         }
         &Inst::AluRX { rd, ref mem, .. } => {
-            collector.add_mod(rd);
-            memarg_regs(mem, collector);
+            collector.reg_mod(rd);
+            memarg_operands(mem, collector);
         }
         &Inst::AluRSImm16 { rd, .. } => {
-            collector.add_mod(rd);
+            collector.reg_mod(rd);
         }
         &Inst::AluRSImm32 { rd, .. } => {
-            collector.add_mod(rd);
+            collector.reg_mod(rd);
         }
         &Inst::AluRUImm32 { rd, .. } => {
-            collector.add_mod(rd);
+            collector.reg_mod(rd);
         }
         &Inst::AluRUImm16Shifted { rd, .. } => {
-            collector.add_mod(rd);
+            collector.reg_mod(rd);
         }
         &Inst::AluRUImm32Shifted { rd, .. } => {
-            collector.add_mod(rd);
+            collector.reg_mod(rd);
         }
         &Inst::SMulWide { rn, rm, .. } => {
-            collector.add_def(writable_gpr(0));
-            collector.add_def(writable_gpr(1));
-            collector.add_use(rn);
-            collector.add_use(rm);
+            collector.reg_use(rn);
+            collector.reg_use(rm);
+            collector.reg_def(writable_gpr(0));
+            collector.reg_def(writable_gpr(1));
         }
         &Inst::UMulWide { rn, .. } => {
-            collector.add_def(writable_gpr(0));
-            collector.add_mod(writable_gpr(1));
-            collector.add_use(rn);
+            collector.reg_use(rn);
+            collector.reg_def(writable_gpr(0));
+            collector.reg_mod(writable_gpr(1));
         }
         &Inst::SDivMod32 { rn, .. } | &Inst::SDivMod64 { rn, .. } => {
-            collector.add_def(writable_gpr(0));
-            collector.add_mod(writable_gpr(1));
-            collector.add_use(rn);
+            collector.reg_use(rn);
+            collector.reg_def(writable_gpr(0));
+            collector.reg_mod(writable_gpr(1));
         }
         &Inst::UDivMod32 { rn, .. } | &Inst::UDivMod64 { rn, .. } => {
-            collector.add_mod(writable_gpr(0));
-            collector.add_mod(writable_gpr(1));
-            collector.add_use(rn);
+            collector.reg_use(rn);
+            collector.reg_mod(writable_gpr(0));
+            collector.reg_mod(writable_gpr(1));
         }
         &Inst::Flogr { rn, .. } => {
-            collector.add_def(writable_gpr(0));
-            collector.add_def(writable_gpr(1));
-            collector.add_use(rn);
+            collector.reg_use(rn);
+            collector.reg_def(writable_gpr(0));
+            collector.reg_def(writable_gpr(1));
         }
         &Inst::ShiftRR {
             rd, rn, shift_reg, ..
         } => {
-            collector.add_def(rd);
-            collector.add_use(rn);
-            if shift_reg != zero_reg() {
-                collector.add_use(shift_reg);
-            }
+            collector.reg_def(rd);
+            collector.reg_use(rn);
+            collector.reg_use(shift_reg);
         }
         &Inst::RxSBG { rd, rn, .. } => {
-            collector.add_mod(rd);
-            collector.add_use(rn);
+            collector.reg_mod(rd);
+            collector.reg_use(rn);
         }
         &Inst::RxSBGTest { rd, rn, .. } => {
-            collector.add_use(rd);
-            collector.add_use(rn);
+            collector.reg_use(rd);
+            collector.reg_use(rn);
         }
         &Inst::UnaryRR { rd, rn, .. } => {
-            collector.add_def(rd);
-            collector.add_use(rn);
+            collector.reg_def(rd);
+            collector.reg_use(rn);
         }
         &Inst::CmpRR { rn, rm, .. } => {
-            collector.add_use(rn);
-            collector.add_use(rm);
+            collector.reg_use(rn);
+            collector.reg_use(rm);
         }
         &Inst::CmpRX { rn, ref mem, .. } => {
-            collector.add_use(rn);
-            memarg_regs(mem, collector);
+            collector.reg_use(rn);
+            memarg_operands(mem, collector);
         }
         &Inst::CmpRSImm16 { rn, .. } => {
-            collector.add_use(rn);
+            collector.reg_use(rn);
         }
         &Inst::CmpRSImm32 { rn, .. } => {
-            collector.add_use(rn);
+            collector.reg_use(rn);
         }
         &Inst::CmpRUImm32 { rn, .. } => {
-            collector.add_use(rn);
+            collector.reg_use(rn);
         }
         &Inst::CmpTrapRR { rn, rm, .. } => {
-            collector.add_use(rn);
-            collector.add_use(rm);
+            collector.reg_use(rn);
+            collector.reg_use(rm);
         }
         &Inst::CmpTrapRSImm16 { rn, .. } => {
-            collector.add_use(rn);
+            collector.reg_use(rn);
         }
         &Inst::CmpTrapRUImm16 { rn, .. } => {
-            collector.add_use(rn);
+            collector.reg_use(rn);
         }
         &Inst::AtomicRmw {
             rd, rn, ref mem, ..
         } => {
-            collector.add_def(rd);
-            collector.add_use(rn);
-            memarg_regs(mem, collector);
+            collector.reg_def(rd);
+            collector.reg_use(rn);
+            memarg_operands(mem, collector);
         }
         &Inst::AtomicCas32 {
             rd, rn, ref mem, ..
@@ -493,9 +482,9 @@ fn s390x_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
         | &Inst::AtomicCas64 {
             rd, rn, ref mem, ..
         } => {
-            collector.add_mod(rd);
-            collector.add_use(rn);
-            memarg_regs(mem, collector);
+            collector.reg_mod(rd);
+            collector.reg_use(rn);
+            memarg_operands(mem, collector);
         }
         &Inst::Fence => {}
         &Inst::Load32 { rd, ref mem, .. }
@@ -513,8 +502,8 @@ fn s390x_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
         | &Inst::LoadRev16 { rd, ref mem, .. }
         | &Inst::LoadRev32 { rd, ref mem, .. }
         | &Inst::LoadRev64 { rd, ref mem, .. } => {
-            collector.add_def(rd);
-            memarg_regs(mem, collector);
+            collector.reg_def(rd);
+            memarg_operands(mem, collector);
         }
         &Inst::Store8 { rd, ref mem, .. }
         | &Inst::Store16 { rd, ref mem, .. }
@@ -523,42 +512,42 @@ fn s390x_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
         | &Inst::StoreRev16 { rd, ref mem, .. }
         | &Inst::StoreRev32 { rd, ref mem, .. }
         | &Inst::StoreRev64 { rd, ref mem, .. } => {
-            collector.add_use(rd);
-            memarg_regs(mem, collector);
+            collector.reg_use(rd);
+            memarg_operands(mem, collector);
         }
         &Inst::StoreImm8 { ref mem, .. }
         | &Inst::StoreImm16 { ref mem, .. }
         | &Inst::StoreImm32SExt16 { ref mem, .. }
         | &Inst::StoreImm64SExt16 { ref mem, .. } => {
-            memarg_regs(mem, collector);
+            memarg_operands(mem, collector);
         }
         &Inst::LoadMultiple64 {
             rt, rt2, ref mem, ..
         } => {
-            let first_regnum = rt.to_reg().get_hw_encoding();
-            let last_regnum = rt2.to_reg().get_hw_encoding();
+            memarg_operands(mem, collector);
+            let first_regnum = rt.to_reg().to_real_reg().unwrap().hw_enc();
+            let last_regnum = rt2.to_reg().to_real_reg().unwrap().hw_enc();
             for regnum in first_regnum..last_regnum + 1 {
-                collector.add_def(writable_gpr(regnum));
+                collector.reg_def(writable_gpr(regnum));
             }
-            memarg_regs(mem, collector);
         }
         &Inst::StoreMultiple64 {
             rt, rt2, ref mem, ..
         } => {
-            let first_regnum = rt.get_hw_encoding();
-            let last_regnum = rt2.get_hw_encoding();
+            memarg_operands(mem, collector);
+            let first_regnum = rt.to_real_reg().unwrap().hw_enc();
+            let last_regnum = rt2.to_real_reg().unwrap().hw_enc();
             for regnum in first_regnum..last_regnum + 1 {
-                collector.add_use(gpr(regnum));
+                collector.reg_use(gpr(regnum));
             }
-            memarg_regs(mem, collector);
         }
         &Inst::Mov64 { rd, rm } => {
-            collector.add_def(rd);
-            collector.add_use(rm);
+            collector.reg_def(rd);
+            collector.reg_use(rm);
         }
         &Inst::Mov32 { rd, rm } => {
-            collector.add_def(rd);
-            collector.add_use(rm);
+            collector.reg_def(rd);
+            collector.reg_use(rm);
         }
         &Inst::Mov32Imm { rd, .. }
         | &Inst::Mov32SImm16 { rd, .. }
@@ -566,123 +555,126 @@ fn s390x_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
         | &Inst::Mov64SImm32 { rd, .. }
         | &Inst::Mov64UImm16Shifted { rd, .. }
         | &Inst::Mov64UImm32Shifted { rd, .. } => {
-            collector.add_def(rd);
+            collector.reg_def(rd);
         }
         &Inst::CMov32 { rd, rm, .. } | &Inst::CMov64 { rd, rm, .. } => {
-            collector.add_mod(rd);
-            collector.add_use(rm);
+            collector.reg_mod(rd);
+            collector.reg_use(rm);
         }
         &Inst::CMov32SImm16 { rd, .. } | &Inst::CMov64SImm16 { rd, .. } => {
-            collector.add_mod(rd);
+            collector.reg_mod(rd);
         }
         &Inst::Insert64UImm16Shifted { rd, .. } | &Inst::Insert64UImm32Shifted { rd, .. } => {
-            collector.add_mod(rd);
+            collector.reg_mod(rd);
         }
         &Inst::FpuMove32 { rd, rn } | &Inst::FpuMove64 { rd, rn } => {
-            collector.add_def(rd);
-            collector.add_use(rn);
+            collector.reg_def(rd);
+            collector.reg_use(rn);
         }
         &Inst::FpuCMov32 { rd, rm, .. } | &Inst::FpuCMov64 { rd, rm, .. } => {
-            collector.add_mod(rd);
-            collector.add_use(rm);
+            collector.reg_mod(rd);
+            collector.reg_use(rm);
         }
         &Inst::MovToFpr { rd, rn } | &Inst::MovFromFpr { rd, rn } => {
-            collector.add_def(rd);
-            collector.add_use(rn);
+            collector.reg_def(rd);
+            collector.reg_use(rn);
         }
         &Inst::FpuRR { rd, rn, .. } => {
-            collector.add_def(rd);
-            collector.add_use(rn);
+            collector.reg_def(rd);
+            collector.reg_use(rn);
         }
         &Inst::FpuRRR { rd, rm, .. } => {
-            collector.add_mod(rd);
-            collector.add_use(rm);
+            collector.reg_mod(rd);
+            collector.reg_use(rm);
         }
         &Inst::FpuRRRR { rd, rn, rm, .. } => {
-            collector.add_mod(rd);
-            collector.add_use(rn);
-            collector.add_use(rm);
+            collector.reg_mod(rd);
+            collector.reg_use(rn);
+            collector.reg_use(rm);
         }
         &Inst::FpuCopysign { rd, rn, rm, .. } => {
-            collector.add_def(rd);
-            collector.add_use(rn);
-            collector.add_use(rm);
+            collector.reg_def(rd);
+            collector.reg_use(rn);
+            collector.reg_use(rm);
         }
         &Inst::FpuCmp32 { rn, rm } | &Inst::FpuCmp64 { rn, rm } => {
-            collector.add_use(rn);
-            collector.add_use(rm);
+            collector.reg_use(rn);
+            collector.reg_use(rm);
         }
         &Inst::FpuLoad32 { rd, ref mem, .. } => {
-            collector.add_def(rd);
-            memarg_regs(mem, collector);
+            collector.reg_def(rd);
+            memarg_operands(mem, collector);
         }
         &Inst::FpuLoad64 { rd, ref mem, .. } => {
-            collector.add_def(rd);
-            memarg_regs(mem, collector);
+            collector.reg_def(rd);
+            memarg_operands(mem, collector);
         }
         &Inst::FpuStore32 { rd, ref mem, .. } => {
-            collector.add_use(rd);
-            memarg_regs(mem, collector);
+            collector.reg_use(rd);
+            memarg_operands(mem, collector);
         }
         &Inst::FpuStore64 { rd, ref mem, .. } => {
-            collector.add_use(rd);
-            memarg_regs(mem, collector);
+            collector.reg_use(rd);
+            memarg_operands(mem, collector);
         }
         &Inst::FpuLoadRev32 { rd, ref mem, .. } => {
-            collector.add_def(rd);
-            memarg_regs(mem, collector);
+            collector.reg_def(rd);
+            memarg_operands(mem, collector);
         }
         &Inst::FpuLoadRev64 { rd, ref mem, .. } => {
-            collector.add_def(rd);
-            memarg_regs(mem, collector);
+            collector.reg_def(rd);
+            memarg_operands(mem, collector);
         }
         &Inst::FpuStoreRev32 { rd, ref mem, .. } => {
-            collector.add_use(rd);
-            memarg_regs(mem, collector);
+            collector.reg_use(rd);
+            memarg_operands(mem, collector);
         }
         &Inst::FpuStoreRev64 { rd, ref mem, .. } => {
-            collector.add_use(rd);
-            memarg_regs(mem, collector);
+            collector.reg_use(rd);
+            memarg_operands(mem, collector);
         }
         &Inst::LoadFpuConst32 { rd, .. } | &Inst::LoadFpuConst64 { rd, .. } => {
-            collector.add_def(rd);
+            collector.reg_def(rd);
         }
         &Inst::FpuToInt { rd, rn, .. } => {
-            collector.add_def(rd);
-            collector.add_use(rn);
+            collector.reg_def(rd);
+            collector.reg_use(rn);
         }
         &Inst::IntToFpu { rd, rn, .. } => {
-            collector.add_def(rd);
-            collector.add_use(rn);
+            collector.reg_def(rd);
+            collector.reg_use(rn);
         }
         &Inst::FpuRound { rd, rn, .. } => {
-            collector.add_def(rd);
-            collector.add_use(rn);
+            collector.reg_def(rd);
+            collector.reg_use(rn);
         }
         &Inst::FpuVecRRR { rd, rn, rm, .. } => {
-            collector.add_def(rd);
-            collector.add_use(rn);
-            collector.add_use(rm);
+            collector.reg_def(rd);
+            collector.reg_use(rn);
+            collector.reg_use(rm);
         }
         &Inst::Extend { rd, rn, .. } => {
-            collector.add_def(rd);
-            collector.add_use(rn);
+            collector.reg_def(rd);
+            collector.reg_use(rn);
         }
         &Inst::Call { link, ref info } => {
-            collector.add_def(link);
-            collector.add_uses(&*info.uses);
-            collector.add_defs(&*info.defs);
+            collector.reg_def(link);
+            collector.reg_uses(&*info.uses);
+            collector.reg_defs(&*info.defs);
         }
         &Inst::CallInd { link, ref info } => {
-            collector.add_def(link);
-            collector.add_uses(&*info.uses);
-            collector.add_defs(&*info.defs);
-            collector.add_use(info.rn);
+            collector.reg_def(link);
+            collector.reg_use(info.rn);
+            collector.reg_uses(&*info.uses);
+            collector.reg_defs(&*info.defs);
+        }
+        &Inst::Ret { link, ref rets } => {
+            collector.reg_use(link);
+            collector.reg_uses(&rets[..]);
         }
-        &Inst::Ret { .. } => {}
         &Inst::Jump { .. } | &Inst::EpiloguePlaceholder => {}
         &Inst::IndirectBr { rn, .. } => {
-            collector.add_use(rn);
+            collector.reg_use(rn);
         }
         &Inst::CondBr { .. } | &Inst::OneWayCondBr { .. } => {}
         &Inst::Nop0 | Inst::Nop2 => {}
@@ -690,769 +682,26 @@ fn s390x_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
         &Inst::Trap { .. } => {}
         &Inst::TrapIf { .. } => {}
         &Inst::JTSequence { ridx, .. } => {
-            collector.add_use(ridx);
+            collector.reg_use(ridx);
         }
         &Inst::LoadExtNameFar { rd, .. } => {
-            collector.add_def(rd);
+            collector.reg_def(rd);
         }
         &Inst::LoadAddr { rd, ref mem } => {
-            collector.add_def(rd);
-            memarg_regs(mem, collector);
+            collector.reg_def(rd);
+            memarg_operands(mem, collector);
         }
         &Inst::Loop { ref body, .. } => {
             for inst in body.iter() {
-                s390x_get_regs(inst, collector);
+                s390x_get_operands(inst, collector);
             }
         }
         &Inst::CondBreak { .. } => {}
         &Inst::VirtualSPOffsetAdj { .. } => {}
-        &Inst::ValueLabelMarker { reg, .. } => {
-            collector.add_use(reg);
-        }
         &Inst::Unwind { .. } => {}
-    }
-}
-
-//=============================================================================
-// Instructions: map_regs
-
-pub fn s390x_map_regs<RM: RegMapper>(inst: &mut Inst, mapper: &RM) {
-    fn map_mem<RM: RegMapper>(m: &RM, mem: &mut MemArg) {
-        match mem {
-            &mut MemArg::BXD12 {
-                ref mut base,
-                ref mut index,
-                ..
-            }
-            | &mut MemArg::BXD20 {
-                ref mut base,
-                ref mut index,
-                ..
-            } => {
-                if *base != zero_reg() {
-                    m.map_use(base);
-                }
-                if *index != zero_reg() {
-                    m.map_use(index);
-                }
-            }
-            &mut MemArg::Label { .. } | &mut MemArg::Symbol { .. } => {}
-            &mut MemArg::RegOffset { ref mut reg, .. } => m.map_use(reg),
-            &mut MemArg::InitialSPOffset { .. } | &mut MemArg::NominalSPOffset { .. } => {}
-        };
-    }
-
-    match inst {
-        &mut Inst::AluRRR {
-            ref mut rd,
-            ref mut rn,
-            ref mut rm,
-            ..
-        } => {
-            mapper.map_def(rd);
-            mapper.map_use(rn);
-            mapper.map_use(rm);
+        &Inst::DummyUse { reg } => {
+            collector.reg_use(reg);
         }
-        &mut Inst::AluRRSImm16 {
-            ref mut rd,
-            ref mut rn,
-            ..
-        } => {
-            mapper.map_def(rd);
-            mapper.map_use(rn);
-        }
-        &mut Inst::AluRX {
-            ref mut rd,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_mod(rd);
-            map_mem(mapper, mem);
-        }
-        &mut Inst::AluRR {
-            ref mut rd,
-            ref mut rm,
-            ..
-        } => {
-            mapper.map_mod(rd);
-            mapper.map_use(rm);
-        }
-        &mut Inst::AluRSImm16 { ref mut rd, .. } => {
-            mapper.map_mod(rd);
-        }
-        &mut Inst::AluRSImm32 { ref mut rd, .. } => {
-            mapper.map_mod(rd);
-        }
-        &mut Inst::AluRUImm32 { ref mut rd, .. } => {
-            mapper.map_mod(rd);
-        }
-        &mut Inst::AluRUImm16Shifted { ref mut rd, .. } => {
-            mapper.map_mod(rd);
-        }
-        &mut Inst::AluRUImm32Shifted { ref mut rd, .. } => {
-            mapper.map_mod(rd);
-        }
-        &mut Inst::SMulWide {
-            ref mut rn,
-            ref mut rm,
-            ..
-        } => {
-            mapper.map_use(rn);
-            mapper.map_use(rm);
-        }
-        &mut Inst::UMulWide { ref mut rn, .. } => {
-            mapper.map_use(rn);
-        }
-        &mut Inst::SDivMod32 { ref mut rn, .. } => {
-            mapper.map_use(rn);
-        }
-        &mut Inst::SDivMod64 { ref mut rn, .. } => {
-            mapper.map_use(rn);
-        }
-        &mut Inst::UDivMod32 { ref mut rn, .. } => {
-            mapper.map_use(rn);
-        }
-        &mut Inst::UDivMod64 { ref mut rn, .. } => {
-            mapper.map_use(rn);
-        }
-        &mut Inst::Flogr { ref mut rn, .. } => {
-            mapper.map_use(rn);
-        }
-        &mut Inst::ShiftRR {
-            ref mut rd,
-            ref mut rn,
-            ref mut shift_reg,
-            ..
-        } => {
-            mapper.map_def(rd);
-            mapper.map_use(rn);
-            if *shift_reg != zero_reg() {
-                mapper.map_use(shift_reg);
-            }
-        }
-        &mut Inst::RxSBG {
-            ref mut rd,
-            ref mut rn,
-            ..
-        } => {
-            mapper.map_mod(rd);
-            mapper.map_use(rn);
-        }
-        &mut Inst::RxSBGTest {
-            ref mut rd,
-            ref mut rn,
-            ..
-        } => {
-            mapper.map_use(rd);
-            mapper.map_use(rn);
-        }
-        &mut Inst::UnaryRR {
-            ref mut rd,
-            ref mut rn,
-            ..
-        } => {
-            mapper.map_def(rd);
-            mapper.map_use(rn);
-        }
-        &mut Inst::CmpRR {
-            ref mut rn,
-            ref mut rm,
-            ..
-        } => {
-            mapper.map_use(rn);
-            mapper.map_use(rm);
-        }
-        &mut Inst::CmpRX {
-            ref mut rn,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_use(rn);
-            map_mem(mapper, mem);
-        }
-        &mut Inst::CmpRSImm16 { ref mut rn, .. } => {
-            mapper.map_use(rn);
-        }
-        &mut Inst::CmpRSImm32 { ref mut rn, .. } => {
-            mapper.map_use(rn);
-        }
-        &mut Inst::CmpRUImm32 { ref mut rn, .. } => {
-            mapper.map_use(rn);
-        }
-        &mut Inst::CmpTrapRR {
-            ref mut rn,
-            ref mut rm,
-            ..
-        } => {
-            mapper.map_use(rn);
-            mapper.map_use(rm);
-        }
-        &mut Inst::CmpTrapRSImm16 { ref mut rn, .. } => {
-            mapper.map_use(rn);
-        }
-        &mut Inst::CmpTrapRUImm16 { ref mut rn, .. } => {
-            mapper.map_use(rn);
-        }
-
-        &mut Inst::AtomicRmw {
-            ref mut rd,
-            ref mut rn,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_def(rd);
-            mapper.map_use(rn);
-            map_mem(mapper, mem);
-        }
-        &mut Inst::AtomicCas32 {
-            ref mut rd,
-            ref mut rn,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_mod(rd);
-            mapper.map_use(rn);
-            map_mem(mapper, mem);
-        }
-        &mut Inst::AtomicCas64 {
-            ref mut rd,
-            ref mut rn,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_mod(rd);
-            mapper.map_use(rn);
-            map_mem(mapper, mem);
-        }
-        &mut Inst::Fence => {}
-
-        &mut Inst::Load32 {
-            ref mut rd,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_def(rd);
-            map_mem(mapper, mem);
-        }
-        &mut Inst::Load32ZExt8 {
-            ref mut rd,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_def(rd);
-            map_mem(mapper, mem);
-        }
-        &mut Inst::Load32SExt8 {
-            ref mut rd,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_def(rd);
-            map_mem(mapper, mem);
-        }
-        &mut Inst::Load32ZExt16 {
-            ref mut rd,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_def(rd);
-            map_mem(mapper, mem);
-        }
-        &mut Inst::Load32SExt16 {
-            ref mut rd,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_def(rd);
-            map_mem(mapper, mem);
-        }
-        &mut Inst::Load64 {
-            ref mut rd,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_def(rd);
-            map_mem(mapper, mem);
-        }
-        &mut Inst::Load64ZExt8 {
-            ref mut rd,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_def(rd);
-            map_mem(mapper, mem);
-        }
-        &mut Inst::Load64SExt8 {
-            ref mut rd,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_def(rd);
-            map_mem(mapper, mem);
-        }
-        &mut Inst::Load64ZExt16 {
-            ref mut rd,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_def(rd);
-            map_mem(mapper, mem);
-        }
-        &mut Inst::Load64SExt16 {
-            ref mut rd,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_def(rd);
-            map_mem(mapper, mem);
-        }
-        &mut Inst::Load64ZExt32 {
-            ref mut rd,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_def(rd);
-            map_mem(mapper, mem);
-        }
-        &mut Inst::Load64SExt32 {
-            ref mut rd,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_def(rd);
-            map_mem(mapper, mem);
-        }
-        &mut Inst::LoadRev16 {
-            ref mut rd,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_def(rd);
-            map_mem(mapper, mem);
-        }
-        &mut Inst::LoadRev32 {
-            ref mut rd,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_def(rd);
-            map_mem(mapper, mem);
-        }
-        &mut Inst::LoadRev64 {
-            ref mut rd,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_def(rd);
-            map_mem(mapper, mem);
-        }
-
-        &mut Inst::Store8 {
-            ref mut rd,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_use(rd);
-            map_mem(mapper, mem);
-        }
-        &mut Inst::Store16 {
-            ref mut rd,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_use(rd);
-            map_mem(mapper, mem);
-        }
-        &mut Inst::Store32 {
-            ref mut rd,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_use(rd);
-            map_mem(mapper, mem);
-        }
-        &mut Inst::Store64 {
-            ref mut rd,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_use(rd);
-            map_mem(mapper, mem);
-        }
-        &mut Inst::StoreImm8 { ref mut mem, .. } => {
-            map_mem(mapper, mem);
-        }
-        &mut Inst::StoreImm16 { ref mut mem, .. } => {
-            map_mem(mapper, mem);
-        }
-        &mut Inst::StoreImm32SExt16 { ref mut mem, .. } => {
-            map_mem(mapper, mem);
-        }
-        &mut Inst::StoreImm64SExt16 { ref mut mem, .. } => {
-            map_mem(mapper, mem);
-        }
-        &mut Inst::StoreRev16 {
-            ref mut rd,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_use(rd);
-            map_mem(mapper, mem);
-        }
-        &mut Inst::StoreRev32 {
-            ref mut rd,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_use(rd);
-            map_mem(mapper, mem);
-        }
-        &mut Inst::StoreRev64 {
-            ref mut rd,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_use(rd);
-            map_mem(mapper, mem);
-        }
-        &mut Inst::LoadMultiple64 { .. } => {
-            // This instruction accesses all registers between rt and rt2,
-            // so it cannot be remapped.  But this does not matter since
-            // the instruction is only ever used after register allocation.
-            unreachable!();
-        }
-        &mut Inst::StoreMultiple64 { .. } => {
-            // This instruction accesses all registers between rt and rt2,
-            // so it cannot be remapped.  But this does not matter since
-            // the instruction is only ever used after register allocation.
-            unreachable!();
-        }
-
-        &mut Inst::Mov64 {
-            ref mut rd,
-            ref mut rm,
-        } => {
-            mapper.map_def(rd);
-            mapper.map_use(rm);
-        }
-        &mut Inst::Mov32 {
-            ref mut rd,
-            ref mut rm,
-        } => {
-            mapper.map_def(rd);
-            mapper.map_use(rm);
-        }
-        &mut Inst::Mov32Imm { ref mut rd, .. } => {
-            mapper.map_def(rd);
-        }
-        &mut Inst::Mov32SImm16 { ref mut rd, .. } => {
-            mapper.map_def(rd);
-        }
-        &mut Inst::Mov64SImm16 { ref mut rd, .. } => {
-            mapper.map_def(rd);
-        }
-        &mut Inst::Mov64SImm32 { ref mut rd, .. } => {
-            mapper.map_def(rd);
-        }
-        &mut Inst::Mov64UImm16Shifted { ref mut rd, .. } => {
-            mapper.map_def(rd);
-        }
-        &mut Inst::Mov64UImm32Shifted { ref mut rd, .. } => {
-            mapper.map_def(rd);
-        }
-        &mut Inst::Insert64UImm16Shifted { ref mut rd, .. } => {
-            mapper.map_mod(rd);
-        }
-        &mut Inst::Insert64UImm32Shifted { ref mut rd, .. } => {
-            mapper.map_mod(rd);
-        }
-        &mut Inst::CMov64 {
-            ref mut rd,
-            ref mut rm,
-            ..
-        } => {
-            mapper.map_mod(rd);
-            mapper.map_use(rm);
-        }
-        &mut Inst::CMov32 {
-            ref mut rd,
-            ref mut rm,
-            ..
-        } => {
-            mapper.map_mod(rd);
-            mapper.map_use(rm);
-        }
-        &mut Inst::CMov32SImm16 { ref mut rd, .. } => {
-            mapper.map_mod(rd);
-        }
-        &mut Inst::CMov64SImm16 { ref mut rd, .. } => {
-            mapper.map_mod(rd);
-        }
-        &mut Inst::FpuMove32 {
-            ref mut rd,
-            ref mut rn,
-        } => {
-            mapper.map_def(rd);
-            mapper.map_use(rn);
-        }
-        &mut Inst::FpuMove64 {
-            ref mut rd,
-            ref mut rn,
-        } => {
-            mapper.map_def(rd);
-            mapper.map_use(rn);
-        }
-        &mut Inst::FpuCMov64 {
-            ref mut rd,
-            ref mut rm,
-            ..
-        } => {
-            mapper.map_mod(rd);
-            mapper.map_use(rm);
-        }
-        &mut Inst::FpuCMov32 {
-            ref mut rd,
-            ref mut rm,
-            ..
-        } => {
-            mapper.map_mod(rd);
-            mapper.map_use(rm);
-        }
-        &mut Inst::MovToFpr {
-            ref mut rd,
-            ref mut rn,
-        } => {
-            mapper.map_def(rd);
-            mapper.map_use(rn);
-        }
-        &mut Inst::MovFromFpr {
-            ref mut rd,
-            ref mut rn,
-        } => {
-            mapper.map_def(rd);
-            mapper.map_use(rn);
-        }
-        &mut Inst::FpuRR {
-            ref mut rd,
-            ref mut rn,
-            ..
-        } => {
-            mapper.map_def(rd);
-            mapper.map_use(rn);
-        }
-        &mut Inst::FpuRRR {
-            ref mut rd,
-            ref mut rm,
-            ..
-        } => {
-            mapper.map_mod(rd);
-            mapper.map_use(rm);
-        }
-        &mut Inst::FpuRRRR {
-            ref mut rd,
-            ref mut rn,
-            ref mut rm,
-            ..
-        } => {
-            mapper.map_mod(rd);
-            mapper.map_use(rn);
-            mapper.map_use(rm);
-        }
-        &mut Inst::FpuCopysign {
-            ref mut rd,
-            ref mut rn,
-            ref mut rm,
-            ..
-        } => {
-            mapper.map_def(rd);
-            mapper.map_use(rn);
-            mapper.map_use(rm);
-        }
-        &mut Inst::FpuCmp32 {
-            ref mut rn,
-            ref mut rm,
-        } => {
-            mapper.map_use(rn);
-            mapper.map_use(rm);
-        }
-        &mut Inst::FpuCmp64 {
-            ref mut rn,
-            ref mut rm,
-        } => {
-            mapper.map_use(rn);
-            mapper.map_use(rm);
-        }
-        &mut Inst::FpuLoad32 {
-            ref mut rd,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_def(rd);
-            map_mem(mapper, mem);
-        }
-        &mut Inst::FpuLoad64 {
-            ref mut rd,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_def(rd);
-            map_mem(mapper, mem);
-        }
-        &mut Inst::FpuStore32 {
-            ref mut rd,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_use(rd);
-            map_mem(mapper, mem);
-        }
-        &mut Inst::FpuStore64 {
-            ref mut rd,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_use(rd);
-            map_mem(mapper, mem);
-        }
-        &mut Inst::FpuLoadRev32 {
-            ref mut rd,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_def(rd);
-            map_mem(mapper, mem);
-        }
-        &mut Inst::FpuLoadRev64 {
-            ref mut rd,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_def(rd);
-            map_mem(mapper, mem);
-        }
-        &mut Inst::FpuStoreRev32 {
-            ref mut rd,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_use(rd);
-            map_mem(mapper, mem);
-        }
-        &mut Inst::FpuStoreRev64 {
-            ref mut rd,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_use(rd);
-            map_mem(mapper, mem);
-        }
-        &mut Inst::LoadFpuConst32 { ref mut rd, .. } => {
-            mapper.map_def(rd);
-        }
-        &mut Inst::LoadFpuConst64 { ref mut rd, .. } => {
-            mapper.map_def(rd);
-        }
-        &mut Inst::FpuToInt {
-            ref mut rd,
-            ref mut rn,
-            ..
-        } => {
-            mapper.map_def(rd);
-            mapper.map_use(rn);
-        }
-        &mut Inst::IntToFpu {
-            ref mut rd,
-            ref mut rn,
-            ..
-        } => {
-            mapper.map_def(rd);
-            mapper.map_use(rn);
-        }
-        &mut Inst::FpuRound {
-            ref mut rd,
-            ref mut rn,
-            ..
-        } => {
-            mapper.map_def(rd);
-            mapper.map_use(rn);
-        }
-        &mut Inst::FpuVecRRR {
-            ref mut rd,
-            ref mut rn,
-            ref mut rm,
-            ..
-        } => {
-            mapper.map_def(rd);
-            mapper.map_use(rn);
-            mapper.map_use(rm);
-        }
-        &mut Inst::Extend {
-            ref mut rd,
-            ref mut rn,
-            ..
-        } => {
-            mapper.map_def(rd);
-            mapper.map_use(rn);
-        }
-        &mut Inst::Call {
-            ref mut link,
-            ref mut info,
-        } => {
-            mapper.map_def(link);
-            for r in info.uses.iter_mut() {
-                mapper.map_use(r);
-            }
-            for r in info.defs.iter_mut() {
-                mapper.map_def(r);
-            }
-        }
-        &mut Inst::CallInd {
-            ref mut link,
-            ref mut info,
-            ..
-        } => {
-            mapper.map_def(link);
-            for r in info.uses.iter_mut() {
-                mapper.map_use(r);
-            }
-            for r in info.defs.iter_mut() {
-                mapper.map_def(r);
-            }
-            mapper.map_use(&mut info.rn);
-        }
-        &mut Inst::Ret { .. } => {}
-        &mut Inst::EpiloguePlaceholder => {}
-        &mut Inst::Jump { .. } => {}
-        &mut Inst::IndirectBr { ref mut rn, .. } => {
-            mapper.map_use(rn);
-        }
-        &mut Inst::CondBr { .. } | &mut Inst::OneWayCondBr { .. } => {}
-        &mut Inst::Debugtrap | &mut Inst::Trap { .. } | &mut Inst::TrapIf { .. } => {}
-        &mut Inst::Nop0 | &mut Inst::Nop2 => {}
-        &mut Inst::JTSequence { ref mut ridx, .. } => {
-            mapper.map_use(ridx);
-        }
-        &mut Inst::LoadExtNameFar { ref mut rd, .. } => {
-            mapper.map_def(rd);
-        }
-        &mut Inst::LoadAddr {
-            ref mut rd,
-            ref mut mem,
-        } => {
-            mapper.map_def(rd);
-            map_mem(mapper, mem);
-        }
-        &mut Inst::Loop { ref mut body, .. } => {
-            for inst in body.iter_mut() {
-                s390x_map_regs(inst, mapper);
-            }
-        }
-        &mut Inst::CondBreak { .. } => {}
-        &mut Inst::VirtualSPOffsetAdj { .. } => {}
-        &mut Inst::ValueLabelMarker { ref mut reg, .. } => {
-            mapper.map_use(reg);
-        }
-        &mut Inst::Unwind { .. } => {}
     }
 }
 
@@ -1462,12 +711,8 @@ pub fn s390x_map_regs<RM: RegMapper>(inst: &mut Inst, mapper: &RM) {
 impl MachInst for Inst {
     type LabelUse = LabelUse;
 
-    fn get_regs(&self, collector: &mut RegUsageCollector) {
-        s390x_get_regs(self, collector)
-    }
-
-    fn map_regs<RUM: RegUsageMapper>(&mut self, mapper: &RUM) {
-        s390x_map_regs(self, mapper);
+    fn get_operands<F: Fn(VReg) -> VReg>(&self, collector: &mut OperandCollector<'_, F>) {
+        s390x_get_operands(self, collector);
     }
 
     fn is_move(&self) -> Option<(Writable<Reg>, Reg)> {
@@ -1505,18 +750,16 @@ impl MachInst for Inst {
         }
     }
 
-    fn stack_op_info(&self) -> Option<MachInstStackOpInfo> {
+    fn is_safepoint(&self) -> bool {
         match self {
-            &Inst::VirtualSPOffsetAdj { offset } => Some(MachInstStackOpInfo::NomSPAdj(offset)),
-            &Inst::Store64 {
-                rd,
-                mem: MemArg::NominalSPOffset { off },
-            } => Some(MachInstStackOpInfo::StoreNomSPOff(rd, off)),
-            &Inst::Load64 {
-                rd,
-                mem: MemArg::NominalSPOffset { off },
-            } => Some(MachInstStackOpInfo::LoadNomSPOff(rd.to_reg(), off)),
-            _ => None,
+            &Inst::Call { .. }
+            | &Inst::CallInd { .. }
+            | &Inst::Trap { .. }
+            | Inst::TrapIf { .. }
+            | &Inst::CmpTrapRR { .. }
+            | &Inst::CmpTrapRSImm16 { .. }
+            | &Inst::CmpTrapRUImm16 { .. } => true,
+            _ => false,
         }
     }
 
@@ -1575,30 +818,26 @@ impl MachInst for Inst {
         }
     }
 
-    fn maybe_direct_reload(&self, _reg: VirtualReg, _slot: SpillSlot) -> Option<Inst> {
-        None
-    }
-
     fn rc_for_type(ty: Type) -> CodegenResult<(&'static [RegClass], &'static [Type])> {
         match ty {
-            types::I8 => Ok((&[RegClass::I64], &[types::I8])),
-            types::I16 => Ok((&[RegClass::I64], &[types::I16])),
-            types::I32 => Ok((&[RegClass::I64], &[types::I32])),
-            types::I64 => Ok((&[RegClass::I64], &[types::I64])),
-            types::B1 => Ok((&[RegClass::I64], &[types::B1])),
-            types::B8 => Ok((&[RegClass::I64], &[types::B8])),
-            types::B16 => Ok((&[RegClass::I64], &[types::B16])),
-            types::B32 => Ok((&[RegClass::I64], &[types::B32])),
-            types::B64 => Ok((&[RegClass::I64], &[types::B64])),
+            types::I8 => Ok((&[RegClass::Int], &[types::I8])),
+            types::I16 => Ok((&[RegClass::Int], &[types::I16])),
+            types::I32 => Ok((&[RegClass::Int], &[types::I32])),
+            types::I64 => Ok((&[RegClass::Int], &[types::I64])),
+            types::B1 => Ok((&[RegClass::Int], &[types::B1])),
+            types::B8 => Ok((&[RegClass::Int], &[types::B8])),
+            types::B16 => Ok((&[RegClass::Int], &[types::B16])),
+            types::B32 => Ok((&[RegClass::Int], &[types::B32])),
+            types::B64 => Ok((&[RegClass::Int], &[types::B64])),
             types::R32 => panic!("32-bit reftype pointer should never be seen on s390x"),
-            types::R64 => Ok((&[RegClass::I64], &[types::R64])),
-            types::F32 => Ok((&[RegClass::F64], &[types::F32])),
-            types::F64 => Ok((&[RegClass::F64], &[types::F64])),
-            types::I128 => Ok((&[RegClass::I64, RegClass::I64], &[types::I64, types::I64])),
-            types::B128 => Ok((&[RegClass::I64, RegClass::I64], &[types::B64, types::B64])),
+            types::R64 => Ok((&[RegClass::Int], &[types::R64])),
+            types::F32 => Ok((&[RegClass::Float], &[types::F32])),
+            types::F64 => Ok((&[RegClass::Float], &[types::F64])),
+            types::I128 => Ok((&[RegClass::Int, RegClass::Int], &[types::I64, types::I64])),
+            types::B128 => Ok((&[RegClass::Int, RegClass::Int], &[types::B64, types::B64])),
             // FIXME: We don't really have IFLAGS, but need to allow it here
             // for now to support the SelectifSpectreGuard instruction.
-            types::IFLAGS => Ok((&[RegClass::I64], &[types::I64])),
+            types::IFLAGS => Ok((&[RegClass::Int], &[types::I64])),
             _ => Err(CodegenError::Unsupported(format!(
                 "Unexpected SSA-value type: {}",
                 ty
@@ -1606,6 +845,13 @@ impl MachInst for Inst {
         }
     }
 
+    fn canonical_type_for_rc(rc: RegClass) -> Type {
+        match rc {
+            RegClass::Int => types::I64,
+            RegClass::Float => types::F64,
+        }
+    }
+
     fn gen_jump(target: MachLabel) -> Inst {
         Inst::Jump { dest: target }
     }
@@ -1622,18 +868,11 @@ impl MachInst for Inst {
     }
 
     fn ref_type_regclass(_: &settings::Flags) -> RegClass {
-        RegClass::I64
+        RegClass::Int
     }
 
-    fn gen_value_label_marker(label: ValueLabel, reg: Reg) -> Self {
-        Inst::ValueLabelMarker { label, reg }
-    }
-
-    fn defines_value_label(&self) -> Option<(ValueLabel, Reg)> {
-        match self {
-            Inst::ValueLabelMarker { label, reg } => Some((*label, *reg)),
-            _ => None,
-        }
+    fn gen_dummy_use(reg: Reg) -> Inst {
+        Inst::DummyUse { reg }
     }
 }
 
@@ -1642,7 +881,6 @@ impl MachInst for Inst {
 
 fn mem_finalize_for_show(
     mem: &MemArg,
-    mb_rru: Option<&RealRegUniverse>,
     state: &EmitState,
     have_d12: bool,
     have_d20: bool,
@@ -1652,7 +890,9 @@ fn mem_finalize_for_show(
     let (mem_insts, mem) = mem_finalize(mem, state, have_d12, have_d20, have_pcrel, have_index);
     let mut mem_str = mem_insts
         .into_iter()
-        .map(|inst| inst.show_rru(mb_rru))
+        .map(|inst| {
+            inst.print_with_state(&mut EmitState::default(), &mut AllocationConsumer::new(&[]))
+        })
         .collect::<Vec<_>>()
         .join(" ; ");
     if !mem_str.is_empty() {
@@ -1662,18 +902,25 @@ fn mem_finalize_for_show(
     (mem_str, mem)
 }
 
-impl PrettyPrint for Inst {
-    fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
-        self.pretty_print(mb_rru, &mut EmitState::default())
-    }
-}
-
 impl Inst {
-    fn print_with_state(&self, mb_rru: Option<&RealRegUniverse>, state: &mut EmitState) -> String {
+    fn print_with_state(
+        &self,
+        state: &mut EmitState,
+        allocs: &mut AllocationConsumer<'_>,
+    ) -> String {
+        // N.B.: order of consumption of `allocs` must match the order
+        // in `s390x_get_operands()`.
+
+        let mut empty_allocs = AllocationConsumer::new(&[]);
+
         match self {
             &Inst::Nop0 => "nop-zero-len".to_string(),
             &Inst::Nop2 => "nop".to_string(),
             &Inst::AluRRR { alu_op, rd, rn, rm } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
+                let rm = allocs.next(rm);
+
                 let (op, have_rr) = match alu_op {
                     ALUOp::Add32 => ("ark", true),
                     ALUOp::Add64 => ("agrk", true),
@@ -1701,11 +948,11 @@ impl Inst {
                 };
                 if have_rr && rd.to_reg() == rn {
                     let inst = Inst::AluRR { alu_op, rd, rm };
-                    return inst.print_with_state(mb_rru, state);
+                    return inst.print_with_state(state, &mut empty_allocs);
                 }
-                let rd = rd.to_reg().show_rru(mb_rru);
-                let rn = rn.show_rru(mb_rru);
-                let rm = rm.show_rru(mb_rru);
+                let rd = pretty_print_reg(rd.to_reg(), &mut empty_allocs);
+                let rn = pretty_print_reg(rn, &mut empty_allocs);
+                let rm = pretty_print_reg(rm, &mut empty_allocs);
                 format!("{} {}, {}, {}", op, rd, rn, rm)
             }
             &Inst::AluRRSImm16 {
@@ -1714,17 +961,20 @@ impl Inst {
                 rn,
                 imm,
             } => {
+                let rd = allocs.next_writable(rd);
+                let rn = allocs.next(rn);
+
                 if rd.to_reg() == rn {
                     let inst = Inst::AluRSImm16 { alu_op, rd, imm };
-                    return inst.print_with_state(mb_rru, state);
+                    return inst.print_with_state(state, &mut empty_allocs);
                 }
                 let op = match alu_op {
                     ALUOp::Add32 => "ahik",
                     ALUOp::Add64 => "aghik",
                     _ => unreachable!(),
                 };
-                let rd = rd.to_reg().show_rru(mb_rru);
-                let rn = rn.show_rru(mb_rru);
+                let rd = pretty_print_reg(rd.to_reg(), &mut empty_allocs);
+                let rn = pretty_print_reg(rn, &mut empty_allocs);
                 format!("{} {}, {}, {}", op, rd, rn, imm)
             }
             &Inst::AluRR { alu_op, rd, rm } => {
@@ -1752,8 +1002,8 @@ impl Inst {
                     ALUOp::Xor64 => "xgr",
                     _ => unreachable!(),
                 };
-                let rd = rd.to_reg().show_rru(mb_rru);
-                let rm = rm.show_rru(mb_rru);
+                let rd = pretty_print_reg(rd.to_reg(), allocs);
+                let rm = pretty_print_reg(rm, allocs);
                 format!("{} {}, {}", op, rd, rm)
             }
             &Inst::AluRX {
@@ -1792,24 +1042,23 @@ impl Inst {
                     _ => unreachable!(),
                 };
 
+                let rd = pretty_print_reg(rd.to_reg(), allocs);
+                let mem = mem.with_allocs(allocs);
                 let (mem_str, mem) = mem_finalize_for_show(
-                    mem,
-                    mb_rru,
+                    &mem,
                     state,
                     opcode_rx.is_some(),
                     opcode_rxy.is_some(),
                     false,
                     true,
                 );
-
                 let op = match &mem {
                     &MemArg::BXD12 { .. } => opcode_rx,
                     &MemArg::BXD20 { .. } => opcode_rxy,
                     _ => unreachable!(),
                 };
+                let mem = mem.pretty_print_default();
 
-                let rd = rd.to_reg().show_rru(mb_rru);
-                let mem = mem.show_rru(mb_rru);
                 format!("{}{} {}, {}", mem_str, op.unwrap(), rd, mem)
             }
             &Inst::AluRSImm16 { alu_op, rd, imm } => {
@@ -1820,7 +1069,7 @@ impl Inst {
                     ALUOp::Mul64 => "mghi",
                     _ => unreachable!(),
                 };
-                let rd = rd.to_reg().show_rru(mb_rru);
+                let rd = pretty_print_reg(rd.to_reg(), allocs);
                 format!("{} {}, {}", op, rd, imm)
             }
             &Inst::AluRSImm32 { alu_op, rd, imm } => {
@@ -1831,7 +1080,7 @@ impl Inst {
                     ALUOp::Mul64 => "msgfi",
                     _ => unreachable!(),
                 };
-                let rd = rd.to_reg().show_rru(mb_rru);
+                let rd = pretty_print_reg(rd.to_reg(), allocs);
                 format!("{} {}, {}", op, rd, imm)
             }
             &Inst::AluRUImm32 { alu_op, rd, imm } => {
@@ -1842,7 +1091,7 @@ impl Inst {
                     ALUOp::SubLogical64 => "slgfi",
                     _ => unreachable!(),
                 };
-                let rd = rd.to_reg().show_rru(mb_rru);
+                let rd = pretty_print_reg(rd.to_reg(), allocs);
                 format!("{} {}, {}", op, rd, imm)
             }
             &Inst::AluRUImm16Shifted { alu_op, rd, imm } => {
@@ -1861,7 +1110,7 @@ impl Inst {
                     (ALUOp::Orr64, 3) => "oihh",
                     _ => unreachable!(),
                 };
-                let rd = rd.to_reg().show_rru(mb_rru);
+                let rd = pretty_print_reg(rd.to_reg(), allocs);
                 format!("{} {}, {}", op, rd, imm.bits)
             }
             &Inst::AluRUImm32Shifted { alu_op, rd, imm } => {
@@ -1877,50 +1126,57 @@ impl Inst {
                     (ALUOp::Xor64, 1) => "xihf",
                     _ => unreachable!(),
                 };
-                let rd = rd.to_reg().show_rru(mb_rru);
+                let rd = pretty_print_reg(rd.to_reg(), allocs);
                 format!("{} {}, {}", op, rd, imm.bits)
             }
             &Inst::SMulWide { rn, rm } => {
                 let op = "mgrk";
-                let rd = gpr(0).show_rru(mb_rru);
-                let rn = rn.show_rru(mb_rru);
-                let rm = rm.show_rru(mb_rru);
+                let rn = pretty_print_reg(rn, allocs);
+                let rm = pretty_print_reg(rm, allocs);
+                let rd = pretty_print_reg(gpr(0), allocs);
+                let _r1 = allocs.next(gpr(1));
                 format!("{} {}, {}, {}", op, rd, rn, rm)
             }
             &Inst::UMulWide { rn } => {
                 let op = "mlgr";
-                let rd = gpr(0).show_rru(mb_rru);
-                let rn = rn.show_rru(mb_rru);
+                let rn = pretty_print_reg(rn, allocs);
+                let rd = pretty_print_reg(gpr(0), allocs);
+                let _r1 = allocs.next(gpr(1));
                 format!("{} {}, {}", op, rd, rn)
             }
             &Inst::SDivMod32 { rn, .. } => {
                 let op = "dsgfr";
-                let rd = gpr(0).show_rru(mb_rru);
-                let rn = rn.show_rru(mb_rru);
+                let rn = pretty_print_reg(rn, allocs);
+                let rd = pretty_print_reg(gpr(0), allocs);
+                let _r1 = allocs.next(gpr(1));
                 format!("{} {}, {}", op, rd, rn)
             }
             &Inst::SDivMod64 { rn, .. } => {
                 let op = "dsgr";
-                let rd = gpr(0).show_rru(mb_rru);
-                let rn = rn.show_rru(mb_rru);
+                let rn = pretty_print_reg(rn, allocs);
+                let rd = pretty_print_reg(gpr(0), allocs);
+                let _r1 = allocs.next(gpr(1));
                 format!("{} {}, {}", op, rd, rn)
             }
             &Inst::UDivMod32 { rn, .. } => {
                 let op = "dlr";
-                let rd = gpr(0).show_rru(mb_rru);
-                let rn = rn.show_rru(mb_rru);
+                let rn = pretty_print_reg(rn, allocs);
+                let rd = pretty_print_reg(gpr(0), allocs);
+                let _r1 = allocs.next(gpr(1));
                 format!("{} {}, {}", op, rd, rn)
             }
             &Inst::UDivMod64 { rn, .. } => {
                 let op = "dlgr";
-                let rd = gpr(0).show_rru(mb_rru);
-                let rn = rn.show_rru(mb_rru);
+                let rn = pretty_print_reg(rn, allocs);
+                let rd = pretty_print_reg(gpr(0), allocs);
+                let _r1 = allocs.next(gpr(1));
                 format!("{} {}, {}", op, rd, rn)
             }
             &Inst::Flogr { rn } => {
                 let op = "flogr";
-                let rd = gpr(0).show_rru(mb_rru);
-                let rn = rn.show_rru(mb_rru);
+                let rn = pretty_print_reg(rn, allocs);
+                let rd = pretty_print_reg(gpr(0), allocs);
+                let _r1 = allocs.next(gpr(1));
                 format!("{} {}, {}", op, rd, rn)
             }
             &Inst::ShiftRR {
@@ -1940,10 +1196,10 @@ impl Inst {
                     ShiftOp::AShR32 => "srak",
                     ShiftOp::AShR64 => "srag",
                 };
-                let rd = rd.to_reg().show_rru(mb_rru);
-                let rn = rn.show_rru(mb_rru);
+                let rd = pretty_print_reg(rd.to_reg(), allocs);
+                let rn = pretty_print_reg(rn, allocs);
                 let shift_reg = if shift_reg != zero_reg() {
-                    format!("({})", shift_reg.show_rru(mb_rru))
+                    format!("({})", pretty_print_reg(shift_reg, allocs))
                 } else {
                     "".to_string()
                 };
@@ -1963,8 +1219,8 @@ impl Inst {
                     RxSBGOp::Or => "rosbg",
                     RxSBGOp::Xor => "rxsbg",
                 };
-                let rd = rd.to_reg().show_rru(mb_rru);
-                let rn = rn.show_rru(mb_rru);
+                let rd = pretty_print_reg(rd.to_reg(), allocs);
+                let rn = pretty_print_reg(rn, allocs);
                 format!(
                     "{} {}, {}, {}, {}, {}",
                     op,
@@ -1989,8 +1245,8 @@ impl Inst {
                     RxSBGOp::Xor => "rxsbg",
                     _ => unreachable!(),
                 };
-                let rd = rd.show_rru(mb_rru);
-                let rn = rn.show_rru(mb_rru);
+                let rd = pretty_print_reg(rd, allocs);
+                let rn = pretty_print_reg(rn, allocs);
                 format!(
                     "{} {}, {}, {}, {}, {}",
                     op,
@@ -2014,8 +1270,8 @@ impl Inst {
                     UnaryOp::BSwap32 => ("lrvr", ""),
                     UnaryOp::BSwap64 => ("lrvgr", ""),
                 };
-                let rd = rd.to_reg().show_rru(mb_rru);
-                let rn = rn.show_rru(mb_rru);
+                let rd = pretty_print_reg(rd.to_reg(), allocs);
+                let rn = pretty_print_reg(rn, allocs);
                 format!("{} {}, {}{}", op, rd, rn, extra)
             }
             &Inst::CmpRR { op, rn, rm } => {
@@ -2028,8 +1284,8 @@ impl Inst {
                     CmpOp::CmpL64Ext32 => "clgfr",
                     _ => unreachable!(),
                 };
-                let rn = rn.show_rru(mb_rru);
-                let rm = rm.show_rru(mb_rru);
+                let rn = pretty_print_reg(rn, allocs);
+                let rm = pretty_print_reg(rm, allocs);
                 format!("{} {}, {}", op, rn, rm)
             }
             &Inst::CmpRX { op, rn, ref mem } => {
@@ -2046,25 +1302,24 @@ impl Inst {
                     CmpOp::CmpL64Ext32 => (None, Some("clgf"), Some("clgfrl")),
                 };
 
+                let rn = pretty_print_reg(rn, allocs);
+                let mem = mem.with_allocs(allocs);
                 let (mem_str, mem) = mem_finalize_for_show(
-                    mem,
-                    mb_rru,
+                    &mem,
                     state,
                     opcode_rx.is_some(),
                     opcode_rxy.is_some(),
                     opcode_ril.is_some(),
                     true,
                 );
-
                 let op = match &mem {
                     &MemArg::BXD12 { .. } => opcode_rx,
                     &MemArg::BXD20 { .. } => opcode_rxy,
                     &MemArg::Label { .. } | &MemArg::Symbol { .. } => opcode_ril,
                     _ => unreachable!(),
                 };
+                let mem = mem.pretty_print_default();
 
-                let rn = rn.show_rru(mb_rru);
-                let mem = mem.show_rru(mb_rru);
                 format!("{}{} {}, {}", mem_str, op.unwrap(), rn, mem)
             }
             &Inst::CmpRSImm16 { op, rn, imm } => {
@@ -2073,7 +1328,7 @@ impl Inst {
                     CmpOp::CmpS64 => "cghi",
                     _ => unreachable!(),
                 };
-                let rn = rn.show_rru(mb_rru);
+                let rn = pretty_print_reg(rn, allocs);
                 format!("{} {}, {}", op, rn, imm)
             }
             &Inst::CmpRSImm32 { op, rn, imm } => {
@@ -2082,7 +1337,7 @@ impl Inst {
                     CmpOp::CmpS64 => "cgfi",
                     _ => unreachable!(),
                 };
-                let rn = rn.show_rru(mb_rru);
+                let rn = pretty_print_reg(rn, allocs);
                 format!("{} {}, {}", op, rn, imm)
             }
             &Inst::CmpRUImm32 { op, rn, imm } => {
@@ -2091,7 +1346,7 @@ impl Inst {
                     CmpOp::CmpL64 => "clgfi",
                     _ => unreachable!(),
                 };
-                let rn = rn.show_rru(mb_rru);
+                let rn = pretty_print_reg(rn, allocs);
                 format!("{} {}, {}", op, rn, imm)
             }
             &Inst::CmpTrapRR {
@@ -2104,9 +1359,9 @@ impl Inst {
                     CmpOp::CmpL64 => "clgrt",
                     _ => unreachable!(),
                 };
-                let rn = rn.show_rru(mb_rru);
-                let rm = rm.show_rru(mb_rru);
-                let cond = cond.show_rru(mb_rru);
+                let rn = pretty_print_reg(rn, allocs);
+                let rm = pretty_print_reg(rm, allocs);
+                let cond = cond.pretty_print_default();
                 format!("{}{} {}, {}", op, cond, rn, rm)
             }
             &Inst::CmpTrapRSImm16 {
@@ -2117,8 +1372,8 @@ impl Inst {
                     CmpOp::CmpS64 => "cgit",
                     _ => unreachable!(),
                 };
-                let rn = rn.show_rru(mb_rru);
-                let cond = cond.show_rru(mb_rru);
+                let rn = pretty_print_reg(rn, allocs);
+                let cond = cond.pretty_print_default();
                 format!("{}{} {}, {}", op, cond, rn, imm)
             }
             &Inst::CmpTrapRUImm16 {
@@ -2129,8 +1384,8 @@ impl Inst {
                     CmpOp::CmpL64 => "clgit",
                     _ => unreachable!(),
                 };
-                let rn = rn.show_rru(mb_rru);
-                let cond = cond.show_rru(mb_rru);
+                let rn = pretty_print_reg(rn, allocs);
+                let cond = cond.pretty_print_default();
                 format!("{}{} {}, {}", op, cond, rn, imm)
             }
             &Inst::AtomicRmw {
@@ -2153,12 +1408,11 @@ impl Inst {
                     _ => unreachable!(),
                 };
 
-                let (mem_str, mem) =
-                    mem_finalize_for_show(mem, mb_rru, state, false, true, false, false);
-
-                let rd = rd.to_reg().show_rru(mb_rru);
-                let rn = rn.show_rru(mb_rru);
-                let mem = mem.show_rru(mb_rru);
+                let rd = pretty_print_reg(rd.to_reg(), allocs);
+                let rn = pretty_print_reg(rn, allocs);
+                let mem = mem.with_allocs(allocs);
+                let (mem_str, mem) = mem_finalize_for_show(&mem, state, false, true, false, false);
+                let mem = mem.pretty_print_default();
                 format!("{}{} {}, {}, {}", mem_str, op, rd, rn, mem)
             }
             &Inst::AtomicCas32 { rd, rn, ref mem } | &Inst::AtomicCas64 { rd, rn, ref mem } => {
@@ -2168,25 +1422,24 @@ impl Inst {
                     _ => unreachable!(),
                 };
 
+                let rd = pretty_print_reg(rd.to_reg(), allocs);
+                let rn = pretty_print_reg(rn, allocs);
+                let mem = mem.with_allocs(allocs);
                 let (mem_str, mem) = mem_finalize_for_show(
-                    mem,
-                    mb_rru,
+                    &mem,
                     state,
                     opcode_rs.is_some(),
                     opcode_rsy.is_some(),
                     false,
                     false,
                 );
-
                 let op = match &mem {
                     &MemArg::BXD12 { .. } => opcode_rs,
                     &MemArg::BXD20 { .. } => opcode_rsy,
                     _ => unreachable!(),
                 };
+                let mem = mem.pretty_print_default();
 
-                let rd = rd.to_reg().show_rru(mb_rru);
-                let rn = rn.show_rru(mb_rru);
-                let mem = mem.show_rru(mb_rru);
                 format!("{}{} {}, {}, {}", mem_str, op.unwrap(), rd, rn, mem)
             }
             &Inst::Fence => "bcr 14, 0".to_string(),
@@ -2228,38 +1481,35 @@ impl Inst {
                     _ => unreachable!(),
                 };
 
+                let rd = pretty_print_reg(rd.to_reg(), allocs);
+                let mem = mem.with_allocs(allocs);
                 let (mem_str, mem) = mem_finalize_for_show(
-                    mem,
-                    mb_rru,
+                    &mem,
                     state,
                     opcode_rx.is_some(),
                     opcode_rxy.is_some(),
                     opcode_ril.is_some(),
                     true,
                 );
-
                 let op = match &mem {
                     &MemArg::BXD12 { .. } => opcode_rx,
                     &MemArg::BXD20 { .. } => opcode_rxy,
                     &MemArg::Label { .. } | &MemArg::Symbol { .. } => opcode_ril,
                     _ => unreachable!(),
                 };
-
-                let rd = rd.to_reg().show_rru(mb_rru);
-                let mem = mem.show_rru(mb_rru);
+                let mem = mem.pretty_print_default();
                 format!("{}{} {}, {}", mem_str, op.unwrap(), rd, mem)
             }
             &Inst::FpuLoadRev32 { rd, ref mem } | &Inst::FpuLoadRev64 { rd, ref mem } => {
-                let (mem_str, mem) =
-                    mem_finalize_for_show(mem, mb_rru, state, true, false, false, true);
-
+                let rd = pretty_print_reg(rd.to_reg(), allocs);
+                let mem = mem.with_allocs(allocs);
+                let (mem_str, mem) = mem_finalize_for_show(&mem, state, true, false, false, true);
                 let op = match self {
                     &Inst::FpuLoadRev32 { .. } => "vlebrf",
                     &Inst::FpuLoadRev64 { .. } => "vlebrg",
                     _ => unreachable!(),
                 };
-                let rd = rd.to_reg().show_rru(mb_rru);
-                let mem = mem.show_rru(mb_rru);
+                let mem = mem.pretty_print_default();
                 format!("{}{} {}, {}, 0", mem_str, op, rd, mem)
             }
             &Inst::Store8 { rd, ref mem }
@@ -2284,111 +1534,110 @@ impl Inst {
                     _ => unreachable!(),
                 };
 
+                let rd = pretty_print_reg(rd, allocs);
+                let mem = mem.with_allocs(allocs);
                 let (mem_str, mem) = mem_finalize_for_show(
-                    mem,
-                    mb_rru,
+                    &mem,
                     state,
                     opcode_rx.is_some(),
                     opcode_rxy.is_some(),
                     opcode_ril.is_some(),
                     true,
                 );
-
                 let op = match &mem {
                     &MemArg::BXD12 { .. } => opcode_rx,
                     &MemArg::BXD20 { .. } => opcode_rxy,
                     &MemArg::Label { .. } | &MemArg::Symbol { .. } => opcode_ril,
                     _ => unreachable!(),
                 };
+                let mem = mem.pretty_print_default();
 
-                let rd = rd.show_rru(mb_rru);
-                let mem = mem.show_rru(mb_rru);
                 format!("{}{} {}, {}", mem_str, op.unwrap(), rd, mem)
             }
             &Inst::StoreImm8 { imm, ref mem } => {
-                let (mem_str, mem) =
-                    mem_finalize_for_show(mem, mb_rru, state, true, true, false, false);
+                let mem = mem.with_allocs(allocs);
+                let (mem_str, mem) = mem_finalize_for_show(&mem, state, true, true, false, false);
                 let op = match &mem {
                     &MemArg::BXD12 { .. } => "mvi",
                     &MemArg::BXD20 { .. } => "mviy",
                     _ => unreachable!(),
                 };
+                let mem = mem.pretty_print_default();
 
-                let mem = mem.show_rru(mb_rru);
                 format!("{}{} {}, {}", mem_str, op, mem, imm)
             }
             &Inst::StoreImm16 { imm, ref mem }
             | &Inst::StoreImm32SExt16 { imm, ref mem }
             | &Inst::StoreImm64SExt16 { imm, ref mem } => {
-                let (mem_str, mem) =
-                    mem_finalize_for_show(mem, mb_rru, state, false, true, false, false);
+                let mem = mem.with_allocs(allocs);
+                let (mem_str, mem) = mem_finalize_for_show(&mem, state, false, true, false, false);
                 let op = match self {
                     &Inst::StoreImm16 { .. } => "mvhhi",
                     &Inst::StoreImm32SExt16 { .. } => "mvhi",
                     &Inst::StoreImm64SExt16 { .. } => "mvghi",
                     _ => unreachable!(),
                 };
+                let mem = mem.pretty_print_default();
 
-                let mem = mem.show_rru(mb_rru);
                 format!("{}{} {}, {}", mem_str, op, mem, imm)
             }
             &Inst::FpuStoreRev32 { rd, ref mem } | &Inst::FpuStoreRev64 { rd, ref mem } => {
-                let (mem_str, mem) =
-                    mem_finalize_for_show(mem, mb_rru, state, true, false, false, true);
-
+                let rd = pretty_print_reg(rd, allocs);
+                let mem = mem.with_allocs(allocs);
+                let (mem_str, mem) = mem_finalize_for_show(&mem, state, true, false, false, true);
                 let op = match self {
                     &Inst::FpuStoreRev32 { .. } => "vstebrf",
                     &Inst::FpuStoreRev64 { .. } => "vstebrg",
                     _ => unreachable!(),
                 };
-                let rd = rd.show_rru(mb_rru);
-                let mem = mem.show_rru(mb_rru);
+                let mem = mem.pretty_print_default();
+
                 format!("{}{} {}, {}, 0", mem_str, op, rd, mem)
             }
             &Inst::LoadMultiple64 { rt, rt2, ref mem } => {
-                let (mem_str, mem) =
-                    mem_finalize_for_show(mem, mb_rru, state, false, true, false, false);
-                let rt = rt.show_rru(mb_rru);
-                let rt2 = rt2.show_rru(mb_rru);
-                let mem = mem.show_rru(mb_rru);
+                let mem = mem.with_allocs(allocs);
+                let (mem_str, mem) = mem_finalize_for_show(&mem, state, false, true, false, false);
+                let rt = pretty_print_reg(rt.to_reg(), &mut empty_allocs);
+                let rt2 = pretty_print_reg(rt2.to_reg(), &mut empty_allocs);
+                let mem = mem.pretty_print_default();
                 format!("{}lmg {}, {}, {}", mem_str, rt, rt2, mem)
             }
             &Inst::StoreMultiple64 { rt, rt2, ref mem } => {
-                let (mem_str, mem) =
-                    mem_finalize_for_show(mem, mb_rru, state, false, true, false, false);
-                let rt = rt.show_rru(mb_rru);
-                let rt2 = rt2.show_rru(mb_rru);
-                let mem = mem.show_rru(mb_rru);
+                let mem = mem.with_allocs(allocs);
+                let (mem_str, mem) = mem_finalize_for_show(&mem, state, false, true, false, false);
+                let rt = pretty_print_reg(rt, &mut empty_allocs);
+                let rt2 = pretty_print_reg(rt2, &mut empty_allocs);
+                let mem = mem.pretty_print_default();
                 format!("{}stmg {}, {}, {}", mem_str, rt, rt2, mem)
             }
             &Inst::Mov64 { rd, rm } => {
-                let rd = rd.to_reg().show_rru(mb_rru);
-                let rm = rm.show_rru(mb_rru);
+                let rd = pretty_print_reg(rd.to_reg(), allocs);
+                let rm = pretty_print_reg(rm, allocs);
                 format!("lgr {}, {}", rd, rm)
             }
             &Inst::Mov32 { rd, rm } => {
-                let rd = rd.to_reg().show_rru(mb_rru);
-                let rm = rm.show_rru(mb_rru);
+                let rd = pretty_print_reg(rd.to_reg(), allocs);
+                let rm = pretty_print_reg(rm, allocs);
                 format!("lr {}, {}", rd, rm)
             }
             &Inst::Mov32Imm { rd, ref imm } => {
-                let rd = rd.to_reg().show_rru(mb_rru);
+                let rd = pretty_print_reg(rd.to_reg(), allocs);
                 format!("iilf {}, {}", rd, imm)
             }
             &Inst::Mov32SImm16 { rd, ref imm } => {
-                let rd = rd.to_reg().show_rru(mb_rru);
+                let rd = pretty_print_reg(rd.to_reg(), allocs);
                 format!("lhi {}, {}", rd, imm)
             }
             &Inst::Mov64SImm16 { rd, ref imm } => {
-                let rd = rd.to_reg().show_rru(mb_rru);
+                let rd = pretty_print_reg(rd.to_reg(), allocs);
                 format!("lghi {}, {}", rd, imm)
             }
             &Inst::Mov64SImm32 { rd, ref imm } => {
-                let rd = rd.to_reg().show_rru(mb_rru);
+                let rd = pretty_print_reg(rd.to_reg(), allocs);
                 format!("lgfi {}, {}", rd, imm)
             }
             &Inst::Mov64UImm16Shifted { rd, ref imm } => {
-                let rd = rd.to_reg().show_rru(mb_rru);
+                let rd = pretty_print_reg(rd.to_reg(), allocs);
                 let op = match imm.shift {
                     0 => "llill",
                     1 => "llilh",
@@ -2399,7 +1648,7 @@ impl Inst {
                 format!("{} {}, {}", op, rd, imm.bits)
             }
             &Inst::Mov64UImm32Shifted { rd, ref imm } => {
-                let rd = rd.to_reg().show_rru(mb_rru);
+                let rd = pretty_print_reg(rd.to_reg(), allocs);
                 let op = match imm.shift {
                     0 => "llilf",
                     1 => "llihf",
@@ -2408,7 +1657,7 @@ impl Inst {
                 format!("{} {}, {}", op, rd, imm.bits)
             }
             &Inst::Insert64UImm16Shifted { rd, ref imm } => {
-                let rd = rd.to_reg().show_rru(mb_rru);
+                let rd = pretty_print_reg(rd.to_reg(), allocs);
                 let op = match imm.shift {
                     0 => "iill",
                     1 => "iilh",
@@ -2419,7 +1668,7 @@ impl Inst {
                 format!("{} {}, {}", op, rd, imm.bits)
             }
             &Inst::Insert64UImm32Shifted { rd, ref imm } => {
-                let rd = rd.to_reg().show_rru(mb_rru);
+                let rd = pretty_print_reg(rd.to_reg(), allocs);
                 let op = match imm.shift {
                     0 => "iilf",
                     1 => "iihf",
@@ -2428,57 +1677,57 @@ impl Inst {
                 format!("{} {}, {}", op, rd, imm.bits)
             }
             &Inst::CMov32 { rd, cond, rm } => {
-                let rd = rd.to_reg().show_rru(mb_rru);
-                let rm = rm.show_rru(mb_rru);
-                let cond = cond.show_rru(mb_rru);
+                let rd = pretty_print_reg(rd.to_reg(), allocs);
+                let rm = pretty_print_reg(rm, allocs);
+                let cond = cond.pretty_print_default();
                 format!("locr{} {}, {}", cond, rd, rm)
             }
             &Inst::CMov64 { rd, cond, rm } => {
-                let rd = rd.to_reg().show_rru(mb_rru);
-                let rm = rm.show_rru(mb_rru);
-                let cond = cond.show_rru(mb_rru);
+                let rd = pretty_print_reg(rd.to_reg(), allocs);
+                let rm = pretty_print_reg(rm, allocs);
+                let cond = cond.pretty_print_default();
                 format!("locgr{} {}, {}", cond, rd, rm)
             }
             &Inst::CMov32SImm16 { rd, cond, ref imm } => {
-                let rd = rd.to_reg().show_rru(mb_rru);
-                let cond = cond.show_rru(mb_rru);
+                let rd = pretty_print_reg(rd.to_reg(), allocs);
+                let cond = cond.pretty_print_default();
                 format!("lochi{} {}, {}", cond, rd, imm)
             }
             &Inst::CMov64SImm16 { rd, cond, ref imm } => {
-                let rd = rd.to_reg().show_rru(mb_rru);
-                let cond = cond.show_rru(mb_rru);
+                let rd = pretty_print_reg(rd.to_reg(), allocs);
+                let cond = cond.pretty_print_default();
                 format!("locghi{} {}, {}", cond, rd, imm)
             }
             &Inst::FpuMove32 { rd, rn } => {
-                let rd = rd.to_reg().show_rru(mb_rru);
-                let rn = rn.show_rru(mb_rru);
+                let rd = pretty_print_reg(rd.to_reg(), allocs);
+                let rn = pretty_print_reg(rn, allocs);
                 format!("ler {}, {}", rd, rn)
             }
             &Inst::FpuMove64 { rd, rn } => {
-                let rd = rd.to_reg().show_rru(mb_rru);
-                let rn = rn.show_rru(mb_rru);
+                let rd = pretty_print_reg(rd.to_reg(), allocs);
+                let rn = pretty_print_reg(rn, allocs);
                 format!("ldr {}, {}", rd, rn)
             }
             &Inst::FpuCMov32 { rd, cond, rm } => {
-                let rd = rd.to_reg().show_rru(mb_rru);
-                let rm = rm.show_rru(mb_rru);
-                let cond = cond.invert().show_rru(mb_rru);
+                let rd = pretty_print_reg(rd.to_reg(), allocs);
+                let rm = pretty_print_reg(rm, allocs);
+                let cond = cond.invert().pretty_print_default();
                 format!("j{} 6 ; ler {}, {}", cond, rd, rm)
             }
             &Inst::FpuCMov64 { rd, cond, rm } => {
-                let rd = rd.to_reg().show_rru(mb_rru);
-                let rm = rm.show_rru(mb_rru);
-                let cond = cond.invert().show_rru(mb_rru);
+                let rd = pretty_print_reg(rd.to_reg(), allocs);
+                let rm = pretty_print_reg(rm, allocs);
+                let cond = cond.invert().pretty_print_default();
                 format!("j{} 6 ; ldr {}, {}", cond, rd, rm)
             }
             &Inst::MovToFpr { rd, rn } => {
-                let rd = rd.to_reg().show_rru(mb_rru);
-                let rn = rn.show_rru(mb_rru);
+                let rd = pretty_print_reg(rd.to_reg(), allocs);
+                let rn = pretty_print_reg(rn, allocs);
                 format!("ldgr {}, {}", rd, rn)
             }
             &Inst::MovFromFpr { rd, rn } => {
-                let rd = rd.to_reg().show_rru(mb_rru);
-                let rn = rn.show_rru(mb_rru);
+                let rd = pretty_print_reg(rd.to_reg(), allocs);
+                let rn = pretty_print_reg(rn, allocs);
                 format!("lgdr {}, {}", rd, rn)
             }
             &Inst::FpuRR { fpu_op, rd, rn } => {
@@ -2494,8 +1743,8 @@ impl Inst {
                     FPUOp1::Cvt32To64 => "ldebr",
                     FPUOp1::Cvt64To32 => "ledbr",
                 };
-                let rd = rd.to_reg().show_rru(mb_rru);
-                let rn = rn.show_rru(mb_rru);
+                let rd = pretty_print_reg(rd.to_reg(), allocs);
+                let rn = pretty_print_reg(rn, allocs);
                 format!("{} {}, {}", op, rd, rn)
             }
             &Inst::FpuRRR { fpu_op, rd, rm } => {
@@ -2510,8 +1759,8 @@ impl Inst {
                     FPUOp2::Div64 => "ddbr",
                     _ => unimplemented!(),
                 };
-                let rd = rd.to_reg().show_rru(mb_rru);
-                let rm = rm.show_rru(mb_rru);
+                let rd = pretty_print_reg(rd.to_reg(), allocs);
+                let rm = pretty_print_reg(rm, allocs);
                 format!("{} {}, {}", op, rd, rm)
             }
             &Inst::FpuRRRR { fpu_op, rd, rn, rm } => {
@@ -2521,30 +1770,30 @@ impl Inst {
                     FPUOp3::MSub32 => "msebr",
                     FPUOp3::MSub64 => "msdbr",
                 };
-                let rd = rd.to_reg().show_rru(mb_rru);
-                let rn = rn.show_rru(mb_rru);
-                let rm = rm.show_rru(mb_rru);
+                let rd = pretty_print_reg(rd.to_reg(), allocs);
+                let rn = pretty_print_reg(rn, allocs);
+                let rm = pretty_print_reg(rm, allocs);
                 format!("{} {}, {}, {}", op, rd, rn, rm)
             }
             &Inst::FpuCopysign { rd, rn, rm } => {
-                let rd = rd.to_reg().show_rru(mb_rru);
-                let rn = rn.show_rru(mb_rru);
-                let rm = rm.show_rru(mb_rru);
+                let rd = pretty_print_reg(rd.to_reg(), allocs);
+                let rn = pretty_print_reg(rn, allocs);
+                let rm = pretty_print_reg(rm, allocs);
                 format!("cpsdr {}, {}, {}", rd, rm, rn)
             }
             &Inst::FpuCmp32 { rn, rm } => {
-                let rn = rn.show_rru(mb_rru);
-                let rm = rm.show_rru(mb_rru);
+                let rn = pretty_print_reg(rn, allocs);
+                let rm = pretty_print_reg(rm, allocs);
                 format!("cebr {}, {}", rn, rm)
             }
             &Inst::FpuCmp64 { rn, rm } => {
-                let rn = rn.show_rru(mb_rru);
-                let rm = rm.show_rru(mb_rru);
+                let rn = pretty_print_reg(rn, allocs);
+                let rm = pretty_print_reg(rm, allocs);
                 format!("cdbr {}, {}", rn, rm)
             }
             &Inst::LoadFpuConst32 { rd, const_data } => {
-                let rd = rd.to_reg().show_rru(mb_rru);
-                let tmp = writable_spilltmp_reg().to_reg().show_rru(mb_rru);
+                let rd = pretty_print_reg(rd.to_reg(), allocs);
+                let tmp = pretty_print_reg(writable_spilltmp_reg().to_reg(), &mut empty_allocs);
                 format!(
                     "bras {}, 8 ; data.f32 {} ; le {}, 0({})",
                     tmp,
@@ -2554,8 +1803,8 @@ impl Inst {
                 )
             }
             &Inst::LoadFpuConst64 { rd, const_data } => {
-                let rd = rd.to_reg().show_rru(mb_rru);
-                let tmp = writable_spilltmp_reg().to_reg().show_rru(mb_rru);
+                let rd = pretty_print_reg(rd.to_reg(), allocs);
+                let tmp = pretty_print_reg(writable_spilltmp_reg().to_reg(), &mut empty_allocs);
                 format!(
                     "bras {}, 12 ; data.f64 {} ; ld {}, 0({})",
                     tmp,
@@ -2575,8 +1824,8 @@ impl Inst {
                     FpuToIntOp::F64ToI64 => "cgdbra",
                     FpuToIntOp::F64ToU64 => "clgdbr",
                 };
-                let rd = rd.to_reg().show_rru(mb_rru);
-                let rn = rn.show_rru(mb_rru);
+                let rd = pretty_print_reg(rd.to_reg(), allocs);
+                let rn = pretty_print_reg(rn, allocs);
                 format!("{} {}, 5, {}, 0", op, rd, rn)
             }
             &Inst::IntToFpu { op, rd, rn } => {
@@ -2590,8 +1839,8 @@ impl Inst {
                     IntToFpuOp::I64ToF64 => "cdgbra",
                     IntToFpuOp::U64ToF64 => "cdlgbr",
                 };
-                let rd = rd.to_reg().show_rru(mb_rru);
-                let rn = rn.show_rru(mb_rru);
+                let rd = pretty_print_reg(rd.to_reg(), allocs);
+                let rn = pretty_print_reg(rn, allocs);
                 format!("{} {}, 0, {}, 0", op, rd, rn)
             }
             &Inst::FpuRound { op, rd, rn } => {
@@ -2605,8 +1854,8 @@ impl Inst {
                     FpuRoundMode::Nearest32 => ("fiebr", 4),
                     FpuRoundMode::Nearest64 => ("fidbr", 4),
                 };
-                let rd = rd.to_reg().show_rru(mb_rru);
-                let rn = rn.show_rru(mb_rru);
+                let rd = pretty_print_reg(rd.to_reg(), allocs);
+                let rn = pretty_print_reg(rn, allocs);
                 format!("{} {}, {}, {}", op, rd, rn, m3)
             }
             &Inst::FpuVecRRR { fpu_op, rd, rn, rm } => {
@@ -2617,9 +1866,9 @@ impl Inst {
                     FPUOp2::Min64 => "wfmindb",
                     _ => unimplemented!(),
                 };
-                let rd = rd.to_reg().show_rru(mb_rru);
-                let rn = rn.show_rru(mb_rru);
-                let rm = rm.show_rru(mb_rru);
+                let rd = pretty_print_reg(rd.to_reg(), allocs);
+                let rn = pretty_print_reg(rn, allocs);
+                let rm = pretty_print_reg(rm, allocs);
                 format!("{} {}, {}, {}, 1", op, rd, rn, rm)
             }
             &Inst::Extend {
@@ -2629,8 +1878,8 @@ impl Inst {
                 from_bits,
                 to_bits,
             } => {
-                let rd = rd.to_reg().show_rru(mb_rru);
-                let rn = rn.show_rru(mb_rru);
+                let rd = pretty_print_reg(rd.to_reg(), allocs);
+                let rn = pretty_print_reg(rn, allocs);
                 let op = match (signed, from_bits, to_bits) {
                     (_, 1, 32) => "llcr",
                     (_, 1, 64) => "llgcr",
@@ -2649,16 +1898,16 @@ impl Inst {
                 format!("{} {}, {}", op, rd, rn)
             }
             &Inst::Call { link, ref info, .. } => {
-                let link = link.show_rru(mb_rru);
+                let link = pretty_print_reg(link.to_reg(), allocs);
                 format!("brasl {}, {}", link, info.dest)
             }
             &Inst::CallInd { link, ref info, .. } => {
-                let link = link.show_rru(mb_rru);
-                let rn = info.rn.show_rru(mb_rru);
+                let link = pretty_print_reg(link.to_reg(), allocs);
+                let rn = pretty_print_reg(info.rn, allocs);
                 format!("basr {}, {}", link, rn)
             }
-            &Inst::Ret { link } => {
-                let link = link.show_rru(mb_rru);
+            &Inst::Ret { link, .. } => {
+                let link = pretty_print_reg(link, allocs);
                 format!("br {}", link)
             }
             &Inst::EpiloguePlaceholder => "epilogue placeholder".to_string(),
@@ -2667,7 +1916,7 @@ impl Inst {
                 format!("jg {}", dest)
             }
             &Inst::IndirectBr { rn, .. } => {
-                let rn = rn.show_rru(mb_rru);
+                let rn = pretty_print_reg(rn, allocs);
                 format!("br {}", rn)
             }
             &Inst::CondBr {
@@ -2677,23 +1926,23 @@ impl Inst {
             } => {
                 let taken = taken.to_string();
                 let not_taken = not_taken.to_string();
-                let cond = cond.show_rru(mb_rru);
+                let cond = cond.pretty_print_default();
                 format!("jg{} {} ; jg {}", cond, taken, not_taken)
             }
             &Inst::OneWayCondBr { target, cond } => {
                 let target = target.to_string();
-                let cond = cond.show_rru(mb_rru);
+                let cond = cond.pretty_print_default();
                 format!("jg{} {}", cond, target)
             }
             &Inst::Debugtrap => "debugtrap".to_string(),
             &Inst::Trap { .. } => "trap".to_string(),
             &Inst::TrapIf { cond, .. } => {
-                let cond = cond.invert().show_rru(mb_rru);
+                let cond = cond.invert().pretty_print_default();
                 format!("j{} 6 ; trap", cond)
             }
             &Inst::JTSequence { ridx, ref targets } => {
-                let ridx = ridx.show_rru(mb_rru);
-                let rtmp = writable_spilltmp_reg().to_reg().show_rru(mb_rru);
+                let ridx = pretty_print_reg(ridx, allocs);
+                let rtmp = pretty_print_reg(writable_spilltmp_reg().to_reg(), &mut empty_allocs);
                 // The first entry is the default target, which is not emitted
                 // into the jump table, so we skip it here.  It is only in the
                 // list so MachTerminator will see the potential target.
@@ -2717,50 +1966,51 @@ impl Inst {
                 ref name,
                 offset,
             } => {
-                let rd = rd.show_rru(mb_rru);
-                let tmp = writable_spilltmp_reg().to_reg().show_rru(mb_rru);
+                let rd = pretty_print_reg(rd.to_reg(), allocs);
+                let tmp = pretty_print_reg(writable_spilltmp_reg().to_reg(), &mut empty_allocs);
                 format!(
                     "bras {}, 12 ; data {} + {} ; lg {}, 0({})",
                     tmp, name, offset, rd, tmp
                 )
             }
             &Inst::LoadAddr { rd, ref mem } => {
-                let (mem_str, mem) =
-                    mem_finalize_for_show(mem, mb_rru, state, true, true, true, true);
-
+                let rd = pretty_print_reg(rd.to_reg(), allocs);
+                let mem = mem.with_allocs(allocs);
+                let (mem_str, mem) = mem_finalize_for_show(&mem, state, true, true, true, true);
                 let op = match &mem {
                     &MemArg::BXD12 { .. } => "la",
                     &MemArg::BXD20 { .. } => "lay",
                     &MemArg::Label { .. } | &MemArg::Symbol { .. } => "larl",
                     _ => unreachable!(),
                 };
-                let rd = rd.show_rru(mb_rru);
-                let mem = mem.show_rru(mb_rru);
+                let mem = mem.pretty_print_default();
+
                 format!("{}{} {}, {}", mem_str, op, rd, mem)
             }
             &Inst::Loop { ref body, cond } => {
                 let body = body
                     .into_iter()
-                    .map(|inst| inst.show_rru(mb_rru))
+                    .map(|inst| inst.print_with_state(state, allocs))
                     .collect::<Vec<_>>()
                     .join(" ; ");
-                let cond = cond.show_rru(mb_rru);
+                let cond = cond.pretty_print_default();
                 format!("0: {} ; jg{} 0b ; 1:", body, cond)
             }
             &Inst::CondBreak { cond } => {
-                let cond = cond.show_rru(mb_rru);
+                let cond = cond.pretty_print_default();
                 format!("jg{} 1f", cond)
             }
             &Inst::VirtualSPOffsetAdj { offset } => {
                 state.virtual_sp_offset += offset;
                 format!("virtual_sp_offset_adjust {}", offset)
             }
-            &Inst::ValueLabelMarker { label, reg } => {
-                format!("value_label {:?}, {}", label, reg.show_rru(mb_rru))
-            }
             &Inst::Unwind { ref inst } => {
                 format!("unwind {:?}", inst)
             }
+            &Inst::DummyUse { reg } => {
+                let reg = pretty_print_reg(reg, allocs);
+                format!("dummy_use {}", reg)
+            }
         }
     }
 }
diff --git a/cranelift/codegen/src/isa/s390x/inst/regs.rs b/cranelift/codegen/src/isa/s390x/inst/regs.rs
index 3ebba43bda..179e8bdc9f 100644
--- a/cranelift/codegen/src/isa/s390x/inst/regs.rs
+++ b/cranelift/codegen/src/isa/s390x/inst/regs.rs
@@ -1,7 +1,12 @@
 //! S390x ISA definitions: registers.
 
+use alloc::string::String;
+use regalloc2::MachineEnv;
+use regalloc2::PReg;
+use regalloc2::VReg;
+
+use crate::machinst::*;
 use crate::settings;
-use regalloc::{RealRegUniverse, Reg, RegClass, RegClassInfo, Writable, NUM_REG_CLASSES};
 
 //=============================================================================
 // Registers, the Universe thereof, and printing
@@ -29,11 +34,8 @@ const FPR_INDICES: [u8; 16] = [
 /// Get a reference to a GPR (integer register).
 pub fn gpr(num: u8) -> Reg {
     assert!(num < 16);
-    Reg::new_real(
-        RegClass::I64,
-        /* enc = */ num,
-        /* index = */ GPR_INDICES[num as usize],
-    )
+    let preg = PReg::new(num as usize, RegClass::Int);
+    Reg::from(VReg::new(preg.index(), RegClass::Int))
 }
 
 /// Get a writable reference to a GPR.
@@ -44,11 +46,8 @@ pub fn writable_gpr(num: u8) -> Writable<Reg> {
 /// Get a reference to a FPR (floating-point register).
 pub fn fpr(num: u8) -> Reg {
     assert!(num < 16);
-    Reg::new_real(
-        RegClass::F64,
-        /* enc = */ num,
-        /* index = */ FPR_INDICES[num as usize],
-    )
+    let preg = PReg::new(num as usize, RegClass::Float);
+    Reg::from(VReg::new(preg.index(), RegClass::Float))
 }
 
 /// Get a writable reference to a V-register.
@@ -88,81 +87,73 @@ pub fn zero_reg() -> Reg {
 }
 
 /// Create the register universe for AArch64.
-pub fn create_reg_universe(_flags: &settings::Flags) -> RealRegUniverse {
-    let mut regs = vec![];
-    let mut allocable_by_class = [None; NUM_REG_CLASSES];
-
-    // Numbering Scheme: we put FPRs first, then GPRs. The GPRs exclude several registers:
-    // r0 (we cannot use this for addressing // FIXME regalloc)
-    // r1 (spilltmp)
-    // r15 (stack pointer)
-
-    // FPRs.
-    let mut base = regs.len();
-    regs.push((fpr(0).to_real_reg(), "%f0".into()));
-    regs.push((fpr(2).to_real_reg(), "%f2".into()));
-    regs.push((fpr(4).to_real_reg(), "%f4".into()));
-    regs.push((fpr(6).to_real_reg(), "%f6".into()));
-    regs.push((fpr(1).to_real_reg(), "%f1".into()));
-    regs.push((fpr(3).to_real_reg(), "%f3".into()));
-    regs.push((fpr(5).to_real_reg(), "%f5".into()));
-    regs.push((fpr(7).to_real_reg(), "%f7".into()));
-    regs.push((fpr(8).to_real_reg(), "%f8".into()));
-    regs.push((fpr(10).to_real_reg(), "%f10".into()));
-    regs.push((fpr(12).to_real_reg(), "%f12".into()));
-    regs.push((fpr(14).to_real_reg(), "%f14".into()));
-    regs.push((fpr(9).to_real_reg(), "%f9".into()));
-    regs.push((fpr(11).to_real_reg(), "%f11".into()));
-    regs.push((fpr(13).to_real_reg(), "%f13".into()));
-    regs.push((fpr(15).to_real_reg(), "%f15".into()));
-
-    allocable_by_class[RegClass::F64.rc_to_usize()] = Some(RegClassInfo {
-        first: base,
-        last: regs.len() - 1,
-        suggested_scratch: Some(fpr(1).get_index()),
-    });
-
-    // Caller-saved GPRs in the SystemV s390x ABI.
-    base = regs.len();
-    regs.push((gpr(2).to_real_reg(), "%r2".into()));
-    regs.push((gpr(3).to_real_reg(), "%r3".into()));
-    regs.push((gpr(4).to_real_reg(), "%r4".into()));
-    regs.push((gpr(5).to_real_reg(), "%r5".into()));
-
-    // Callee-saved GPRs in the SystemV s390x ABI.
-    // We start from r14 downwards in an attempt to allow the
-    // prolog to use as short a STMG as possible.
-    regs.push((gpr(14).to_real_reg(), "%r14".into()));
-    regs.push((gpr(13).to_real_reg(), "%r13".into()));
-    regs.push((gpr(12).to_real_reg(), "%r12".into()));
-    regs.push((gpr(11).to_real_reg(), "%r11".into()));
-    regs.push((gpr(10).to_real_reg(), "%r10".into()));
-    regs.push((gpr(9).to_real_reg(), "%r9".into()));
-    regs.push((gpr(8).to_real_reg(), "%r8".into()));
-    regs.push((gpr(7).to_real_reg(), "%r7".into()));
-    regs.push((gpr(6).to_real_reg(), "%r6".into()));
-
-    allocable_by_class[RegClass::I64.rc_to_usize()] = Some(RegClassInfo {
-        first: base,
-        last: regs.len() - 1,
-        suggested_scratch: Some(gpr(13).get_index()),
-    });
-
-    // Other regs, not available to the allocator.
-    let allocable = regs.len();
-    regs.push((gpr(15).to_real_reg(), "%r15".into()));
-    regs.push((gpr(0).to_real_reg(), "%r0".into()));
-    regs.push((gpr(1).to_real_reg(), "%r1".into()));
-
-    // Assert sanity: the indices in the register structs must match their
-    // actual indices in the array.
-    for (i, reg) in regs.iter().enumerate() {
-        assert_eq!(i, reg.0.get_index());
+pub fn create_machine_env(_flags: &settings::Flags) -> MachineEnv {
+    fn preg(r: Reg) -> PReg {
+        r.to_real_reg().unwrap().into()
     }
 
-    RealRegUniverse {
-        regs,
-        allocable,
-        allocable_by_class,
+    MachineEnv {
+        preferred_regs_by_class: [
+            vec![
+                // no r0; can't use for addressing?
+                // no r1; it is our spilltmp.
+                preg(gpr(2)),
+                preg(gpr(3)),
+                preg(gpr(4)),
+                preg(gpr(5)),
+            ],
+            vec![
+                preg(fpr(0)),
+                preg(fpr(1)),
+                preg(fpr(2)),
+                preg(fpr(3)),
+                preg(fpr(4)),
+                preg(fpr(5)),
+                preg(fpr(6)),
+                preg(fpr(7)),
+            ],
+        ],
+        non_preferred_regs_by_class: [
+            vec![
+                preg(gpr(6)),
+                preg(gpr(7)),
+                preg(gpr(8)),
+                preg(gpr(9)),
+                preg(gpr(10)),
+                preg(gpr(11)),
+                preg(gpr(12)),
+                // no r13; it is our scratch reg.
+                preg(gpr(14)),
+                // no r15; it is the stack pointer.
+            ],
+            vec![
+                preg(fpr(8)),
+                preg(fpr(9)),
+                preg(fpr(10)),
+                preg(fpr(11)),
+                preg(fpr(12)),
+                preg(fpr(13)),
+                preg(fpr(14)),
+                // no f15; it is our scratch reg.
+            ],
+        ],
+        scratch_by_class: [preg(gpr(13)), preg(fpr(15))],
+        fixed_stack_slots: vec![],
     }
 }
+
+pub fn show_reg(reg: Reg) -> String {
+    if let Some(rreg) = reg.to_real_reg() {
+        match rreg.class() {
+            RegClass::Int => format!("%r{}", rreg.hw_enc()),
+            RegClass::Float => format!("%f{}", rreg.hw_enc()),
+        }
+    } else {
+        format!("%{:?}", reg)
+    }
+}
+
+pub fn pretty_print_reg(reg: Reg, allocs: &mut AllocationConsumer<'_>) -> String {
+    let reg = allocs.next(reg);
+    show_reg(reg)
+}
diff --git a/cranelift/codegen/src/isa/s390x/inst/unwind/systemv.rs b/cranelift/codegen/src/isa/s390x/inst/unwind/systemv.rs
index a970c59c69..8abf6c84a2 100644
--- a/cranelift/codegen/src/isa/s390x/inst/unwind/systemv.rs
+++ b/cranelift/codegen/src/isa/s390x/inst/unwind/systemv.rs
@@ -1,8 +1,8 @@
 //! Unwind information for System V ABI (s390x).
 
 use crate::isa::unwind::systemv::RegisterMappingError;
+use crate::machinst::{Reg, RegClass};
 use gimli::{write::CommonInformationEntry, Encoding, Format, Register};
-use regalloc::{Reg, RegClass};
 
 /// Creates a new s390x common information entry (CIE).
 pub fn create_cie() -> CommonInformationEntry {
@@ -64,10 +64,9 @@ pub fn map_reg(reg: Reg) -> Result<Register, RegisterMappingError> {
         Register(31),
     ];
 
-    match reg.get_class() {
-        RegClass::I64 => Ok(GPR_MAP[reg.get_hw_encoding() as usize]),
-        RegClass::F64 => Ok(FPR_MAP[reg.get_hw_encoding() as usize]),
-        _ => Err(RegisterMappingError::UnsupportedRegisterBank("class?")),
+    match reg.class() {
+        RegClass::Int => Ok(GPR_MAP[reg.to_real_reg().unwrap().hw_enc() as usize]),
+        RegClass::Float => Ok(FPR_MAP[reg.to_real_reg().unwrap().hw_enc() as usize]),
     }
 }
 
diff --git a/cranelift/codegen/src/isa/s390x/lower.isle b/cranelift/codegen/src/isa/s390x/lower.isle
index d60f1e4e63..54ced09644 100644
--- a/cranelift/codegen/src/isa/s390x/lower.isle
+++ b/cranelift/codegen/src/isa/s390x/lower.isle
@@ -2137,31 +2137,31 @@
 ;;;; Rules for `trap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 (rule (lower (trap trap_code))
-      (safepoint (trap_impl trap_code)))
+      (side_effect (trap_impl trap_code)))
 
 
 ;;;; Rules for `resumable_trap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 (rule (lower (resumable_trap trap_code))
-      (safepoint (trap_impl trap_code)))
+      (side_effect (trap_impl trap_code)))
 
 
 ;;;; Rules for `trapz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 (rule (lower (trapz val trap_code))
-      (safepoint (trap_if_bool (invert_bool (value_nonzero val)) trap_code)))
+      (side_effect (trap_if_bool (invert_bool (value_nonzero val)) trap_code)))
 
 
 ;;;; Rules for `trapnz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 (rule (lower (trapnz val trap_code))
-      (safepoint (trap_if_bool (value_nonzero val) trap_code)))
+      (side_effect (trap_if_bool (value_nonzero val) trap_code)))
 
 
 ;;;; Rules for `resumable_trapnz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 (rule (lower (resumable_trapnz val trap_code))
-      (safepoint (trap_if_bool (value_nonzero val) trap_code)))
+      (side_effect (trap_if_bool (value_nonzero val) trap_code)))
 
 
 ;;;; Rules for `debugtrap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -2179,7 +2179,7 @@
 ;; the desired comparison here; there is no separate `ifcmp` lowering.
 
 (rule (lower (trapif int_cc (ifcmp x y) trap_code))
-      (safepoint (trap_if_bool (icmp_val $false int_cc x y) trap_code)))
+      (side_effect (trap_if_bool (icmp_val $false int_cc x y) trap_code)))
 
 ;; Recognize the case of `iadd_ifcout` feeding into `trapif`.  Note that
 ;; in the case, the `iadd_ifcout` is generated by a separate lowering
diff --git a/cranelift/codegen/src/isa/s390x/lower.rs b/cranelift/codegen/src/isa/s390x/lower.rs
index 6979e884fa..39c7531392 100644
--- a/cranelift/codegen/src/isa/s390x/lower.rs
+++ b/cranelift/codegen/src/isa/s390x/lower.rs
@@ -11,7 +11,6 @@ use crate::machinst::lower::*;
 use crate::machinst::*;
 use crate::settings::Flags;
 use crate::CodegenResult;
-use regalloc::Reg;
 use smallvec::SmallVec;
 
 pub mod isle;
diff --git a/cranelift/codegen/src/isa/s390x/lower/isle.rs b/cranelift/codegen/src/isa/s390x/lower/isle.rs
index 0d7d1fecfa..a9424d88c9 100644
--- a/cranelift/codegen/src/isa/s390x/lower/isle.rs
+++ b/cranelift/codegen/src/isa/s390x/lower/isle.rs
@@ -14,9 +14,8 @@ use crate::settings::Flags;
 use crate::{
     ir::{
         condcodes::*, immediates::*, types::*, AtomicRmwOp, Endianness, Inst, InstructionData,
-        StackSlot, TrapCode, Value, ValueLabel, ValueList,
+        StackSlot, TrapCode, Value, ValueList,
     },
-    isa::s390x::inst::s390x_map_regs,
     isa::unwind::UnwindInst,
     machinst::{InsnOutput, LowerCtx},
 };
@@ -43,15 +42,9 @@ pub(crate) fn lower<C>(
 where
     C: LowerCtx<I = MInst>,
 {
-    lower_common(
-        lower_ctx,
-        flags,
-        isa_flags,
-        outputs,
-        inst,
-        |cx, insn| generated_code::constructor_lower(cx, insn),
-        s390x_map_regs,
-    )
+    lower_common(lower_ctx, flags, isa_flags, outputs, inst, |cx, insn| {
+        generated_code::constructor_lower(cx, insn)
+    })
 }
 
 /// The main entry point for branch lowering with ISLE.
@@ -65,15 +58,9 @@ pub(crate) fn lower_branch<C>(
 where
     C: LowerCtx<I = MInst>,
 {
-    lower_common(
-        lower_ctx,
-        flags,
-        isa_flags,
-        &[],
-        branch,
-        |cx, insn| generated_code::constructor_lower_branch(cx, insn, &targets.to_vec()),
-        s390x_map_regs,
-    )
+    lower_common(lower_ctx, flags, isa_flags, &[], branch, |cx, insn| {
+        generated_code::constructor_lower_branch(cx, insn, &targets.to_vec())
+    })
 }
 
 impl<C> generated_code::Context for IsleContext<'_, C, Flags, IsaFlags, 6>
@@ -523,11 +510,6 @@ where
 
     #[inline]
     fn emit(&mut self, inst: &MInst) -> Unit {
-        self.emitted_insts.push((inst.clone(), false));
-    }
-
-    #[inline]
-    fn emit_safepoint(&mut self, inst: &MInst) -> Unit {
-        self.emitted_insts.push((inst.clone(), true));
+        self.lower_ctx.emit(inst.clone());
     }
 }
diff --git a/cranelift/codegen/src/isa/s390x/lower/isle/generated_code.manifest b/cranelift/codegen/src/isa/s390x/lower/isle/generated_code.manifest
index b2391d6887..aae4194934 100644
--- a/cranelift/codegen/src/isa/s390x/lower/isle/generated_code.manifest
+++ b/cranelift/codegen/src/isa/s390x/lower/isle/generated_code.manifest
@@ -1,4 +1,4 @@
 src/clif.isle 443b34b797fc8ace
-src/prelude.isle c0751050a11e2686
-src/isa/s390x/inst.isle d91a16074ab186a8
-src/isa/s390x/lower.isle 1cc5a12adc8c75f9
+src/prelude.isle afd037c4d91c875c
+src/isa/s390x/inst.isle 8218bd9e8556446b
+src/isa/s390x/lower.isle 6a8de81f8dc4e568
diff --git a/cranelift/codegen/src/isa/s390x/lower/isle/generated_code.rs b/cranelift/codegen/src/isa/s390x/lower/isle/generated_code.rs
index 1997c856df..5a9d17ab09 100644
--- a/cranelift/codegen/src/isa/s390x/lower/isle/generated_code.rs
+++ b/cranelift/codegen/src/isa/s390x/lower/isle/generated_code.rs
@@ -79,7 +79,6 @@ pub trait Context {
     fn def_inst(&mut self, arg0: Value) -> Option<Inst>;
     fn offset32_to_u32(&mut self, arg0: Offset32) -> u32;
     fn emit(&mut self, arg0: &MInst) -> Unit;
-    fn emit_safepoint(&mut self, arg0: &MInst) -> Unit;
     fn trap_code_division_by_zero(&mut self) -> TrapCode;
     fn trap_code_integer_overflow(&mut self) -> TrapCode;
     fn trap_code_bad_conversion_to_integer(&mut self) -> TrapCode;
@@ -155,13 +154,13 @@ pub trait Context {
     fn same_reg(&mut self, arg0: Reg, arg1: WritableReg) -> Option<()>;
 }
 
-/// Internal type SideEffectNoResult: defined at src/prelude.isle line 405.
+/// Internal type SideEffectNoResult: defined at src/prelude.isle line 402.
 #[derive(Clone, Debug)]
 pub enum SideEffectNoResult {
     Inst { inst: MInst },
 }
 
-/// Internal type ProducesFlags: defined at src/prelude.isle line 427.
+/// Internal type ProducesFlags: defined at src/prelude.isle line 418.
 #[derive(Clone, Debug)]
 pub enum ProducesFlags {
     ProducesFlagsSideEffect { inst: MInst },
@@ -169,7 +168,7 @@ pub enum ProducesFlags {
     ProducesFlagsReturnsResultWithConsumer { inst: MInst, result: Reg },
 }
 
-/// Internal type ConsumesFlags: defined at src/prelude.isle line 438.
+/// Internal type ConsumesFlags: defined at src/prelude.isle line 429.
 #[derive(Clone, Debug)]
 pub enum ConsumesFlags {
     ConsumesFlagsReturnsResultWithProducer {
@@ -666,6 +665,7 @@ pub enum MInst {
     },
     Ret {
         link: Reg,
+        rets: VecReg,
     },
     EpiloguePlaceholder,
     Jump {
@@ -715,9 +715,8 @@ pub enum MInst {
     VirtualSPOffsetAdj {
         offset: i64,
     },
-    ValueLabelMarker {
+    DummyUse {
         reg: Reg,
-        label: ValueLabel,
     },
     Unwind {
         inst: UnwindInst,
@@ -959,7 +958,7 @@ pub fn constructor_side_effect<C: Context>(
         inst: ref pattern1_0,
     } = pattern0_0
     {
-        // Rule at src/prelude.isle line 410.
+        // Rule at src/prelude.isle line 407.
         let expr0_0 = C::emit(ctx, pattern1_0);
         let expr1_0 = C::output_none(ctx);
         return Some(expr1_0);
@@ -967,24 +966,6 @@ pub fn constructor_side_effect<C: Context>(
     return None;
 }
 
-// Generated as internal constructor for term safepoint.
-pub fn constructor_safepoint<C: Context>(
-    ctx: &mut C,
-    arg0: &SideEffectNoResult,
-) -> Option<InstOutput> {
-    let pattern0_0 = arg0;
-    if let &SideEffectNoResult::Inst {
-        inst: ref pattern1_0,
-    } = pattern0_0
-    {
-        // Rule at src/prelude.isle line 416.
-        let expr0_0 = C::emit_safepoint(ctx, pattern1_0);
-        let expr1_0 = C::output_none(ctx);
-        return Some(expr1_0);
-    }
-    return None;
-}
-
 // Generated as internal constructor for term produces_flags_get_reg.
 pub fn constructor_produces_flags_get_reg<C: Context>(
     ctx: &mut C,
@@ -996,7 +977,7 @@ pub fn constructor_produces_flags_get_reg<C: Context>(
         result: pattern1_1,
     } = pattern0_0
     {
-        // Rule at src/prelude.isle line 454.
+        // Rule at src/prelude.isle line 445.
         return Some(pattern1_1);
     }
     return None;
@@ -1013,7 +994,7 @@ pub fn constructor_produces_flags_ignore<C: Context>(
             inst: ref pattern1_0,
             result: pattern1_1,
         } => {
-            // Rule at src/prelude.isle line 459.
+            // Rule at src/prelude.isle line 450.
             let expr0_0 = ProducesFlags::ProducesFlagsSideEffect {
                 inst: pattern1_0.clone(),
             };
@@ -1023,7 +1004,7 @@ pub fn constructor_produces_flags_ignore<C: Context>(
             inst: ref pattern1_0,
             result: pattern1_1,
         } => {
-            // Rule at src/prelude.isle line 461.
+            // Rule at src/prelude.isle line 452.
             let expr0_0 = ProducesFlags::ProducesFlagsSideEffect {
                 inst: pattern1_0.clone(),
             };
@@ -1052,7 +1033,7 @@ pub fn constructor_consumes_flags_concat<C: Context>(
             result: pattern3_1,
         } = pattern2_0
         {
-            // Rule at src/prelude.isle line 468.
+            // Rule at src/prelude.isle line 459.
             let expr0_0 = C::value_regs(ctx, pattern1_1, pattern3_1);
             let expr1_0 = ConsumesFlags::ConsumesFlagsTwiceReturnsValueRegs {
                 inst1: pattern1_0.clone(),
@@ -1082,7 +1063,7 @@ pub fn constructor_with_flags<C: Context>(
                     inst: ref pattern3_0,
                     result: pattern3_1,
                 } => {
-                    // Rule at src/prelude.isle line 493.
+                    // Rule at src/prelude.isle line 484.
                     let expr0_0 = C::emit(ctx, pattern1_0);
                     let expr1_0 = C::emit(ctx, pattern3_0);
                     let expr2_0 = C::value_reg(ctx, pattern3_1);
@@ -1093,7 +1074,7 @@ pub fn constructor_with_flags<C: Context>(
                     inst2: ref pattern3_1,
                     result: pattern3_2,
                 } => {
-                    // Rule at src/prelude.isle line 499.
+                    // Rule at src/prelude.isle line 490.
                     let expr0_0 = C::emit(ctx, pattern1_0);
                     let expr1_0 = C::emit(ctx, pattern3_0);
                     let expr2_0 = C::emit(ctx, pattern3_1);
@@ -1106,7 +1087,7 @@ pub fn constructor_with_flags<C: Context>(
                     inst4: ref pattern3_3,
                     result: pattern3_4,
                 } => {
-                    // Rule at src/prelude.isle line 511.
+                    // Rule at src/prelude.isle line 502.
                     let expr0_0 = C::emit(ctx, pattern1_0);
                     let expr1_0 = C::emit(ctx, pattern3_0);
                     let expr2_0 = C::emit(ctx, pattern3_1);
@@ -1127,7 +1108,7 @@ pub fn constructor_with_flags<C: Context>(
                 result: pattern3_1,
             } = pattern2_0
             {
-                // Rule at src/prelude.isle line 487.
+                // Rule at src/prelude.isle line 478.
                 let expr0_0 = C::emit(ctx, pattern1_0);
                 let expr1_0 = C::emit(ctx, pattern3_0);
                 let expr2_0 = C::value_regs(ctx, pattern1_1, pattern3_1);
@@ -1147,7 +1128,7 @@ pub fn constructor_with_flags_reg<C: Context>(
 ) -> Option<Reg> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/prelude.isle line 528.
+    // Rule at src/prelude.isle line 519.
     let expr0_0 = constructor_with_flags(ctx, pattern0_0, pattern1_0)?;
     let expr1_0: usize = 0;
     let expr2_0 = C::value_regs_get(ctx, expr0_0, expr1_0);
@@ -8098,13 +8079,13 @@ pub fn constructor_lower<C: Context>(ctx: &mut C, arg0: Inst) -> Option<InstOutp
                 &Opcode::Trap => {
                     // Rule at src/isa/s390x/lower.isle line 2139.
                     let expr0_0 = constructor_trap_impl(ctx, pattern2_1)?;
-                    let expr1_0 = constructor_safepoint(ctx, &expr0_0)?;
+                    let expr1_0 = constructor_side_effect(ctx, &expr0_0)?;
                     return Some(expr1_0);
                 }
                 &Opcode::ResumableTrap => {
                     // Rule at src/isa/s390x/lower.isle line 2145.
                     let expr0_0 = constructor_trap_impl(ctx, pattern2_1)?;
-                    let expr1_0 = constructor_safepoint(ctx, &expr0_0)?;
+                    let expr1_0 = constructor_side_effect(ctx, &expr0_0)?;
                     return Some(expr1_0);
                 }
                 _ => {}
@@ -8358,7 +8339,7 @@ pub fn constructor_lower<C: Context>(ctx: &mut C, arg0: Inst) -> Option<InstOutp
                                     ctx, expr0_0, pattern2_2, pattern8_0, pattern8_1,
                                 )?;
                                 let expr2_0 = constructor_trap_if_bool(ctx, &expr1_0, pattern2_3)?;
-                                let expr3_0 = constructor_safepoint(ctx, &expr2_0)?;
+                                let expr3_0 = constructor_side_effect(ctx, &expr2_0)?;
                                 return Some(expr3_0);
                             }
                             &Opcode::IaddIfcout => {
@@ -8391,21 +8372,21 @@ pub fn constructor_lower<C: Context>(ctx: &mut C, arg0: Inst) -> Option<InstOutp
                     let expr0_0 = constructor_value_nonzero(ctx, pattern2_1)?;
                     let expr1_0 = constructor_invert_bool(ctx, &expr0_0)?;
                     let expr2_0 = constructor_trap_if_bool(ctx, &expr1_0, pattern2_2)?;
-                    let expr3_0 = constructor_safepoint(ctx, &expr2_0)?;
+                    let expr3_0 = constructor_side_effect(ctx, &expr2_0)?;
                     return Some(expr3_0);
                 }
                 &Opcode::Trapnz => {
                     // Rule at src/isa/s390x/lower.isle line 2157.
                     let expr0_0 = constructor_value_nonzero(ctx, pattern2_1)?;
                     let expr1_0 = constructor_trap_if_bool(ctx, &expr0_0, pattern2_2)?;
-                    let expr2_0 = constructor_safepoint(ctx, &expr1_0)?;
+                    let expr2_0 = constructor_side_effect(ctx, &expr1_0)?;
                     return Some(expr2_0);
                 }
                 &Opcode::ResumableTrapnz => {
                     // Rule at src/isa/s390x/lower.isle line 2163.
                     let expr0_0 = constructor_value_nonzero(ctx, pattern2_1)?;
                     let expr1_0 = constructor_trap_if_bool(ctx, &expr0_0, pattern2_2)?;
-                    let expr2_0 = constructor_safepoint(ctx, &expr1_0)?;
+                    let expr2_0 = constructor_side_effect(ctx, &expr1_0)?;
                     return Some(expr2_0);
                 }
                 _ => {}
diff --git a/cranelift/codegen/src/isa/s390x/mod.rs b/cranelift/codegen/src/isa/s390x/mod.rs
index e9325c6341..3e9674d6f2 100644
--- a/cranelift/codegen/src/isa/s390x/mod.rs
+++ b/cranelift/codegen/src/isa/s390x/mod.rs
@@ -7,15 +7,13 @@ use crate::isa::s390x::settings as s390x_settings;
 use crate::isa::unwind::systemv::RegisterMappingError;
 use crate::isa::{Builder as IsaBuilder, TargetIsa};
 use crate::machinst::{
-    compile, MachCompileResult, MachTextSectionBuilder, TextSectionBuilder, VCode,
+    compile, MachCompileResult, MachTextSectionBuilder, Reg, TextSectionBuilder, VCode,
 };
 use crate::result::CodegenResult;
 use crate::settings as shared_settings;
-
 use alloc::{boxed::Box, vec::Vec};
 use core::fmt;
-
-use regalloc::{PrettyPrint, RealRegUniverse, Reg};
+use regalloc2::MachineEnv;
 use target_lexicon::{Architecture, Triple};
 
 // New backend:
@@ -24,7 +22,7 @@ pub(crate) mod inst;
 mod lower;
 mod settings;
 
-use inst::create_reg_universe;
+use inst::create_machine_env;
 
 use self::inst::EmitInfo;
 
@@ -33,7 +31,7 @@ pub struct S390xBackend {
     triple: Triple,
     flags: shared_settings::Flags,
     isa_flags: s390x_settings::Flags,
-    reg_universe: RealRegUniverse,
+    machine_env: MachineEnv,
 }
 
 impl S390xBackend {
@@ -43,12 +41,12 @@ impl S390xBackend {
         flags: shared_settings::Flags,
         isa_flags: s390x_settings::Flags,
     ) -> S390xBackend {
-        let reg_universe = create_reg_universe(&flags);
+        let machine_env = create_machine_env(&flags);
         S390xBackend {
             triple,
             flags,
             isa_flags,
-            reg_universe,
+            machine_env,
         }
     }
 
@@ -58,10 +56,10 @@ impl S390xBackend {
         &self,
         func: &Function,
         flags: shared_settings::Flags,
-    ) -> CodegenResult<VCode<inst::Inst>> {
+    ) -> CodegenResult<(VCode<inst::Inst>, regalloc2::Output)> {
         let emit_info = EmitInfo::new(flags.clone(), self.isa_flags.clone());
         let abi = Box::new(abi::S390xABICallee::new(func, flags, self.isa_flags())?);
-        compile::compile::<S390xBackend>(func, self, abi, &self.reg_universe, emit_info)
+        compile::compile::<S390xBackend>(func, self, abi, &self.machine_env, emit_info)
     }
 }
 
@@ -72,28 +70,27 @@ impl TargetIsa for S390xBackend {
         want_disasm: bool,
     ) -> CodegenResult<MachCompileResult> {
         let flags = self.flags();
-        let vcode = self.compile_vcode(func, flags.clone())?;
-        let (buffer, bb_starts, bb_edges) = vcode.emit();
-        let frame_size = vcode.frame_size();
-        let value_labels_ranges = vcode.value_labels_ranges();
-        let stackslot_offsets = vcode.stackslot_offsets().clone();
+        let (vcode, regalloc_result) = self.compile_vcode(func, flags.clone())?;
 
-        let disasm = if want_disasm {
-            Some(vcode.show_rru(Some(&create_reg_universe(flags))))
-        } else {
-            None
-        };
+        let want_disasm = want_disasm || log::log_enabled!(log::Level::Debug);
+        let emit_result = vcode.emit(&regalloc_result, want_disasm, flags.machine_code_cfg_info());
+        let frame_size = emit_result.frame_size;
+        let value_labels_ranges = emit_result.value_labels_ranges;
+        let buffer = emit_result.buffer.finish();
+        let stackslot_offsets = emit_result.stackslot_offsets;
 
-        let buffer = buffer.finish();
+        if let Some(disasm) = emit_result.disasm.as_ref() {
+            log::debug!("disassembly:\n{}", disasm);
+        }
 
         Ok(MachCompileResult {
             buffer,
             frame_size,
-            disasm,
+            disasm: emit_result.disasm,
             value_labels_ranges,
             stackslot_offsets,
-            bb_starts,
-            bb_edges,
+            bb_starts: emit_result.bb_offsets,
+            bb_edges: emit_result.bb_edges,
         })
     }
 
@@ -296,10 +293,11 @@ mod test {
         // jg label3
         // ahi %r2, -4660
         // br %r14
+
         let golden = vec![
-            167, 42, 18, 52, 167, 46, 0, 0, 192, 100, 0, 0, 0, 11, 236, 50, 18, 52, 0, 216, 167,
-            62, 0, 0, 192, 100, 255, 255, 255, 251, 167, 46, 0, 0, 192, 100, 255, 255, 255, 246,
-            167, 42, 237, 204, 7, 254,
+            236, 50, 18, 52, 0, 216, 167, 62, 0, 0, 192, 100, 0, 0, 0, 11, 236, 67, 18, 52, 0, 216,
+            167, 78, 0, 0, 192, 100, 255, 255, 255, 251, 167, 62, 0, 0, 192, 100, 255, 255, 255,
+            246, 236, 35, 237, 204, 0, 216, 7, 254,
         ];
 
         assert_eq!(code, &golden[..]);
diff --git a/cranelift/codegen/src/isa/unwind.rs b/cranelift/codegen/src/isa/unwind.rs
index 4dd8ae78dd..bf4561e840 100644
--- a/cranelift/codegen/src/isa/unwind.rs
+++ b/cranelift/codegen/src/isa/unwind.rs
@@ -1,6 +1,6 @@
 //! Represents information relating to function unwinding.
 
-use regalloc::RealReg;
+use crate::machinst::RealReg;
 
 #[cfg(feature = "enable-serde")]
 use serde::{Deserialize, Serialize};
diff --git a/cranelift/codegen/src/isa/unwind/systemv.rs b/cranelift/codegen/src/isa/unwind/systemv.rs
index b914f13a75..e2c2a381a3 100644
--- a/cranelift/codegen/src/isa/unwind/systemv.rs
+++ b/cranelift/codegen/src/isa/unwind/systemv.rs
@@ -1,8 +1,9 @@
 //! System V ABI unwind information.
 
-use crate::binemit::CodeOffset;
 use crate::isa::unwind::UnwindInst;
-use crate::result::{CodegenError, CodegenResult};
+use crate::machinst::Reg;
+use crate::result::CodegenResult;
+use crate::{binemit::CodeOffset, CodegenError};
 use alloc::vec::Vec;
 use gimli::write::{Address, FrameDescriptionEntry};
 
@@ -160,7 +161,7 @@ pub struct UnwindInfo {
     len: u32,
 }
 
-pub(crate) fn create_unwind_info_from_insts<MR: RegisterMapper<regalloc::Reg>>(
+pub(crate) fn create_unwind_info_from_insts<MR: RegisterMapper<Reg>>(
     insts: &[(CodeOffset, UnwindInst)],
     code_len: usize,
     mr: &MR,
@@ -237,7 +238,7 @@ pub(crate) fn create_unwind_info_from_insts<MR: RegisterMapper<regalloc::Reg>>(
                 reg,
             } => {
                 let reg = mr
-                    .map(reg.to_reg())
+                    .map(reg.into())
                     .map_err(|e| CodegenError::RegisterMappingError(e))?;
                 let off = (clobber_offset as i32) - (clobber_offset_to_cfa as i32);
                 instructions.push((instruction_offset, CallFrameInstruction::Offset(reg, off)));
diff --git a/cranelift/codegen/src/isa/unwind/winx64.rs b/cranelift/codegen/src/isa/unwind/winx64.rs
index 68cb38b758..349b94cfe6 100644
--- a/cranelift/codegen/src/isa/unwind/winx64.rs
+++ b/cranelift/codegen/src/isa/unwind/winx64.rs
@@ -262,7 +262,7 @@ impl UnwindInfo {
 
 const UNWIND_RBP_REG: u8 = 5;
 
-pub(crate) fn create_unwind_info_from_insts<MR: RegisterMapper<regalloc::Reg>>(
+pub(crate) fn create_unwind_info_from_insts<MR: RegisterMapper<crate::machinst::Reg>>(
     insts: &[(CodeOffset, UnwindInst)],
 ) -> CodegenResult<UnwindInfo> {
     let mut unwind_codes = vec![];
@@ -293,7 +293,7 @@ pub(crate) fn create_unwind_info_from_insts<MR: RegisterMapper<regalloc::Reg>>(
             &UnwindInst::SaveReg {
                 clobber_offset,
                 reg,
-            } => match MR::map(reg.to_reg()) {
+            } => match MR::map(reg.into()) {
                 MappedRegister::Int(reg) => {
                     unwind_codes.push(UnwindCode::SaveReg {
                         instruction_offset,
diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs
index 9687ac5f26..620dfec168 100644
--- a/cranelift/codegen/src/isa/x64/abi.rs
+++ b/cranelift/codegen/src/isa/x64/abi.rs
@@ -11,7 +11,7 @@ use crate::{CodegenError, CodegenResult};
 use alloc::boxed::Box;
 use alloc::vec::Vec;
 use args::*;
-use regalloc::{RealReg, Reg, RegClass, Set, Writable};
+use regalloc2::VReg;
 use smallvec::{smallvec, SmallVec};
 use std::convert::TryFrom;
 
@@ -32,7 +32,7 @@ fn try_fill_baldrdash_reg(call_conv: CallConv, param: &ir::AbiParam) -> Option<A
             &ir::ArgumentPurpose::VMContext => {
                 // This is SpiderMonkey's `WasmTlsReg`.
                 Some(ABIArg::reg(
-                    regs::r14().to_real_reg(),
+                    regs::r14().to_real_reg().unwrap(),
                     types::I64,
                     param.extension,
                     param.purpose,
@@ -41,7 +41,7 @@ fn try_fill_baldrdash_reg(call_conv: CallConv, param: &ir::AbiParam) -> Option<A
             &ir::ArgumentPurpose::SignatureId => {
                 // This is SpiderMonkey's `WasmTableCallSigReg`.
                 Some(ABIArg::reg(
-                    regs::r10().to_real_reg(),
+                    regs::r10().to_real_reg().unwrap(),
                     types::I64,
                     param.extension,
                     param.purpose,
@@ -204,7 +204,7 @@ impl ABIMachineSpec for X64ABIMachineSpec {
 
             let mut slots = vec![];
             for (rc, reg_ty) in rcs.iter().zip(reg_tys.iter()) {
-                let intreg = *rc == RegClass::I64;
+                let intreg = *rc == RegClass::Int;
                 let nextreg = if intreg {
                     match args_or_rets {
                         ArgsOrRets::Args => {
@@ -232,7 +232,7 @@ impl ABIMachineSpec for X64ABIMachineSpec {
                         next_vreg += 1;
                     }
                     slots.push(ABIArgSlot::Reg {
-                        reg: reg.to_real_reg(),
+                        reg: reg.to_real_reg().unwrap(),
                         ty: *reg_ty,
                         extension: param.extension,
                     });
@@ -277,7 +277,7 @@ impl ABIMachineSpec for X64ABIMachineSpec {
             debug_assert!(args_or_rets == ArgsOrRets::Args);
             if let Some(reg) = get_intreg_for_arg(&call_conv, next_gpr, next_param_idx) {
                 ret.push(ABIArg::reg(
-                    reg.to_real_reg(),
+                    reg.to_real_reg().unwrap(),
                     types::I64,
                     ir::ArgumentExtension::None,
                     ir::ArgumentPurpose::Normal,
@@ -357,8 +357,8 @@ impl ABIMachineSpec for X64ABIMachineSpec {
         }
     }
 
-    fn gen_ret() -> Self::I {
-        Inst::ret()
+    fn gen_ret(rets: Vec<Reg>) -> Self::I {
+        Inst::ret(rets)
     }
 
     fn gen_epilogue_placeholder() -> Self::I {
@@ -397,8 +397,8 @@ impl ABIMachineSpec for X64ABIMachineSpec {
 
     fn get_stacklimit_reg() -> Reg {
         debug_assert!(
-            !is_callee_save_systemv(regs::r10().to_real_reg())
-                && !is_callee_save_baldrdash(regs::r10().to_real_reg())
+            !is_callee_save_systemv(regs::r10().to_real_reg().unwrap())
+                && !is_callee_save_baldrdash(regs::r10().to_real_reg().unwrap())
         );
 
         // As per comment on trait definition, we must return a caller-save
@@ -499,7 +499,7 @@ impl ABIMachineSpec for X64ABIMachineSpec {
         _call_conv: isa::CallConv,
         setup_frame: bool,
         flags: &settings::Flags,
-        clobbered_callee_saves: &Vec<Writable<RealReg>>,
+        clobbered_callee_saves: &[Writable<RealReg>],
         fixed_frame_storage_size: u32,
         _outgoing_args_size: u32,
     ) -> (u64, SmallVec<[Self::I; 16]>) {
@@ -536,25 +536,24 @@ impl ABIMachineSpec for X64ABIMachineSpec {
         for reg in clobbered_callee_saves {
             let r_reg = reg.to_reg();
             let off = cur_offset;
-            match r_reg.get_class() {
-                RegClass::I64 => {
+            match r_reg.class() {
+                RegClass::Int => {
                     insts.push(Inst::store(
                         types::I64,
-                        r_reg.to_reg(),
+                        r_reg.into(),
                         Amode::imm_reg(cur_offset, regs::rsp()),
                     ));
                     cur_offset += 8;
                 }
-                RegClass::V128 => {
+                RegClass::Float => {
                     cur_offset = align_to(cur_offset, 16);
                     insts.push(Inst::store(
                         types::I8X16,
-                        r_reg.to_reg(),
+                        r_reg.into(),
                         Amode::imm_reg(cur_offset, regs::rsp()),
                     ));
                     cur_offset += 16;
                 }
-                _ => unreachable!(),
             };
             if flags.unwind_info() {
                 insts.push(Inst::Unwind {
@@ -572,7 +571,7 @@ impl ABIMachineSpec for X64ABIMachineSpec {
     fn gen_clobber_restore(
         call_conv: isa::CallConv,
         flags: &settings::Flags,
-        clobbers: &Set<Writable<RealReg>>,
+        clobbers: &[Writable<RealReg>],
         fixed_frame_storage_size: u32,
         _outgoing_args_size: u32,
     ) -> SmallVec<[Self::I; 16]> {
@@ -587,25 +586,24 @@ impl ABIMachineSpec for X64ABIMachineSpec {
         let mut cur_offset = fixed_frame_storage_size;
         for reg in &clobbered_callee_saves {
             let rreg = reg.to_reg();
-            match rreg.get_class() {
-                RegClass::I64 => {
+            match rreg.class() {
+                RegClass::Int => {
                     insts.push(Inst::mov64_m_r(
                         Amode::imm_reg(cur_offset, regs::rsp()),
-                        Writable::from_reg(rreg.to_reg()),
+                        Writable::from_reg(rreg.into()),
                     ));
                     cur_offset += 8;
                 }
-                RegClass::V128 => {
+                RegClass::Float => {
                     cur_offset = align_to(cur_offset, 16);
                     insts.push(Inst::load(
                         types::I8X16,
                         Amode::imm_reg(cur_offset, regs::rsp()),
-                        Writable::from_reg(rreg.to_reg()),
+                        Writable::from_reg(rreg.into()),
                         ExtKind::None,
                     ));
                     cur_offset += 16;
                 }
-                _ => unreachable!(),
             }
         }
         // Adjust RSP back upward.
@@ -641,34 +639,27 @@ impl ABIMachineSpec for X64ABIMachineSpec {
         tmp: Writable<Reg>,
         _callee_conv: isa::CallConv,
         _caller_conv: isa::CallConv,
-    ) -> SmallVec<[(InstIsSafepoint, Self::I); 2]> {
+    ) -> SmallVec<[Self::I; 2]> {
         let mut insts = SmallVec::new();
         match dest {
             &CallDest::ExtName(ref name, RelocDistance::Near) => {
-                insts.push((
-                    InstIsSafepoint::Yes,
-                    Inst::call_known(name.clone(), uses, defs, opcode),
-                ));
+                insts.push(Inst::call_known(name.clone(), uses, defs, opcode));
             }
             &CallDest::ExtName(ref name, RelocDistance::Far) => {
-                insts.push((
-                    InstIsSafepoint::No,
-                    Inst::LoadExtName {
-                        dst: tmp,
-                        name: Box::new(name.clone()),
-                        offset: 0,
-                    },
-                ));
-                insts.push((
-                    InstIsSafepoint::Yes,
-                    Inst::call_unknown(RegMem::reg(tmp.to_reg()), uses, defs, opcode),
+                insts.push(Inst::LoadExtName {
+                    dst: tmp,
+                    name: Box::new(name.clone()),
+                    offset: 0,
+                });
+                insts.push(Inst::call_unknown(
+                    RegMem::reg(tmp.to_reg()),
+                    uses,
+                    defs,
+                    opcode,
                 ));
             }
             &CallDest::Reg(reg) => {
-                insts.push((
-                    InstIsSafepoint::Yes,
-                    Inst::call_unknown(RegMem::reg(reg), uses, defs, opcode),
-                ));
+                insts.push(Inst::call_unknown(RegMem::reg(reg), uses, defs, opcode));
             }
         }
         insts
@@ -722,9 +713,8 @@ impl ABIMachineSpec for X64ABIMachineSpec {
     fn get_number_of_spillslots_for_value(rc: RegClass) -> u32 {
         // We allocate in terms of 8-byte slots.
         match rc {
-            RegClass::I64 => 1,
-            RegClass::V128 => 2,
-            _ => panic!("Unexpected register class!"),
+            RegClass::Int => 1,
+            RegClass::Float => 2,
         }
     }
 
@@ -798,7 +788,7 @@ impl ABIMachineSpec for X64ABIMachineSpec {
 
     fn get_clobbered_callee_saves(
         call_conv: CallConv,
-        regs: &Set<Writable<RealReg>>,
+        regs: &[Writable<RealReg>],
     ) -> Vec<Writable<RealReg>> {
         let mut regs: Vec<Writable<RealReg>> = match call_conv {
             CallConv::BaldrdashSystemV | CallConv::Baldrdash2020 => regs
@@ -824,7 +814,7 @@ impl ABIMachineSpec for X64ABIMachineSpec {
         };
         // Sort registers for deterministic code output. We can do an unstable sort because the
         // registers will be unique (there are no dups).
-        regs.sort_unstable_by_key(|r| r.to_reg().get_index());
+        regs.sort_unstable_by_key(|r| VReg::from(r.to_reg()).vreg());
         regs
     }
 
@@ -981,21 +971,20 @@ fn get_fltreg_for_retval(
 
 fn is_callee_save_systemv(r: RealReg) -> bool {
     use regs::*;
-    match r.get_class() {
-        RegClass::I64 => match r.get_hw_encoding() as u8 {
+    match r.class() {
+        RegClass::Int => match r.hw_enc() {
             ENC_RBX | ENC_RBP | ENC_R12 | ENC_R13 | ENC_R14 | ENC_R15 => true,
             _ => false,
         },
-        RegClass::V128 => false,
-        _ => unimplemented!(),
+        RegClass::Float => false,
     }
 }
 
 fn is_callee_save_baldrdash(r: RealReg) -> bool {
     use regs::*;
-    match r.get_class() {
-        RegClass::I64 => {
-            if r.get_hw_encoding() as u8 == ENC_R14 {
+    match r.class() {
+        RegClass::Int => {
+            if r.hw_enc() == ENC_R14 {
                 // r14 is the WasmTlsReg and is preserved implicitly.
                 false
             } else {
@@ -1003,38 +992,35 @@ fn is_callee_save_baldrdash(r: RealReg) -> bool {
                 is_callee_save_systemv(r)
             }
         }
-        RegClass::V128 => false,
-        _ => unimplemented!(),
+        RegClass::Float => false,
     }
 }
 
 fn is_callee_save_fastcall(r: RealReg) -> bool {
     use regs::*;
-    match r.get_class() {
-        RegClass::I64 => match r.get_hw_encoding() as u8 {
+    match r.class() {
+        RegClass::Int => match r.hw_enc() {
             ENC_RBX | ENC_RBP | ENC_RSI | ENC_RDI | ENC_R12 | ENC_R13 | ENC_R14 | ENC_R15 => true,
             _ => false,
         },
-        RegClass::V128 => match r.get_hw_encoding() as u8 {
+        RegClass::Float => match r.hw_enc() {
             6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 => true,
             _ => false,
         },
-        _ => panic!("Unknown register class: {:?}", r.get_class()),
     }
 }
 
-fn compute_clobber_size(clobbers: &Vec<Writable<RealReg>>) -> u32 {
+fn compute_clobber_size(clobbers: &[Writable<RealReg>]) -> u32 {
     let mut clobbered_size = 0;
     for reg in clobbers {
-        match reg.to_reg().get_class() {
-            RegClass::I64 => {
+        match reg.to_reg().class() {
+            RegClass::Int => {
                 clobbered_size += 8;
             }
-            RegClass::V128 => {
+            RegClass::Float => {
                 clobbered_size = align_to(clobbered_size, 16);
                 clobbered_size += 16;
             }
-            _ => unreachable!(),
         }
     }
     align_to(clobbered_size, 16)
diff --git a/cranelift/codegen/src/isa/x64/encoding/evex.rs b/cranelift/codegen/src/isa/x64/encoding/evex.rs
index 80a3c86bda..117a196f13 100644
--- a/cranelift/codegen/src/isa/x64/encoding/evex.rs
+++ b/cranelift/codegen/src/isa/x64/encoding/evex.rs
@@ -369,8 +369,8 @@ mod tests {
             .map(OpcodeMap::_0F38)
             .w(true)
             .opcode(0x1F)
-            .reg(dst.get_hw_encoding())
-            .rm(src.get_hw_encoding())
+            .reg(dst.to_real_reg().unwrap().hw_enc())
+            .rm(src.to_real_reg().unwrap().hw_enc())
             .length(EvexVectorLength::V128)
             .encode(&mut sink0);
 
@@ -393,8 +393,8 @@ mod tests {
             .map(OpcodeMap::None)
             .w(false)
             .opcode(0x00)
-            .reg(regs::rax().get_hw_encoding())
-            .rm(regs::rax().get_hw_encoding())
+            .reg(regs::rax().to_real_reg().unwrap().hw_enc())
+            .rm(regs::rax().to_real_reg().unwrap().hw_enc())
             .mask(EvexMasking::None)
             .encode(&mut sink1);
 
diff --git a/cranelift/codegen/src/isa/x64/encoding/rex.rs b/cranelift/codegen/src/isa/x64/encoding/rex.rs
index 509309205d..fba1007c95 100644
--- a/cranelift/codegen/src/isa/x64/encoding/rex.rs
+++ b/cranelift/codegen/src/isa/x64/encoding/rex.rs
@@ -8,6 +8,7 @@
 //! operand ("G" in Intelese), the order is always G first, then E. The term "enc" in the following
 //! means "hardware register encoding number".
 
+use crate::machinst::{Reg, RegClass};
 use crate::{
     ir::TrapCode,
     isa::x64::inst::{
@@ -16,7 +17,6 @@ use crate::{
     },
     machinst::MachBuffer,
 };
-use regalloc::{Reg, RegClass};
 
 pub(crate) fn low8_will_sign_extend_to_64(x: u32) -> bool {
     let xs = (x as i32) as i64;
@@ -50,8 +50,8 @@ pub(crate) fn encode_sib(shift: u8, enc_index: u8, enc_base: u8) -> u8 {
 pub(crate) fn int_reg_enc(reg: impl Into<Reg>) -> u8 {
     let reg = reg.into();
     debug_assert!(reg.is_real());
-    debug_assert_eq!(reg.get_class(), RegClass::I64);
-    reg.get_hw_encoding()
+    debug_assert_eq!(reg.class(), RegClass::Int);
+    reg.to_real_reg().unwrap().hw_enc()
 }
 
 /// Get the encoding number of any register.
@@ -59,7 +59,7 @@ pub(crate) fn int_reg_enc(reg: impl Into<Reg>) -> u8 {
 pub(crate) fn reg_enc(reg: impl Into<Reg>) -> u8 {
     let reg = reg.into();
     debug_assert!(reg.is_real());
-    reg.get_hw_encoding()
+    reg.to_real_reg().unwrap().hw_enc()
 }
 
 /// A small bit field to record a REX prefix specification:
diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle
index b4fdbde172..340443c7d8 100644
--- a/cranelift/codegen/src/isa/x64/inst.isle
+++ b/cranelift/codegen/src/isa/x64/inst.isle
@@ -41,7 +41,8 @@
        (Div (size OperandSize) ;; 1, 2, 4, or 8
             (signed bool)
             (divisor GprMem)
-            (dividend Gpr)
+            (dividend_lo Gpr)
+            (dividend_hi Gpr)
             (dst_quotient WritableGpr)
             (dst_remainder WritableGpr))
 
@@ -69,7 +70,8 @@
        ;; regalloc failures where %rdx is live before its first def!
        (CheckedDivOrRemSeq (kind DivOrRemKind)
                            (size OperandSize)
-                           (dividend Gpr)
+                           (dividend_lo Gpr)
+                           (dividend_hi Gpr)
                            ;; The divisor operand. Note it's marked as modified
                            ;; so that it gets assigned a register different from
                            ;; the temporary.
@@ -318,7 +320,7 @@
                     (opcode Opcode))
 
        ;; Return.
-       (Ret)
+       (Ret (rets VecReg))
 
        ;; A placeholder instruction, generating no code, meaning that a function
        ;; epilogue must be inserted there.
@@ -476,13 +478,12 @@
        ;; `rax`.
        (MachOTlsGetAddr (symbol ExternalName))
 
-       ;; A definition of a value label.
-       (ValueLabelMarker (reg Reg)
-                         (label ValueLabel))
-
        ;; An unwind pseudoinstruction describing the state of the machine at
        ;; this program point.
-       (Unwind (inst UnwindInst))))
+       (Unwind (inst UnwindInst))
+
+       ;; A pseudoinstruction that just keeps a value alive.
+       (DummyUse (reg Reg))))
 
 (type OperandSize extern
       (enum Size8
diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs
index 6ad2e0111e..cae4957b4c 100644
--- a/cranelift/codegen/src/isa/x64/inst/args.rs
+++ b/cranelift/codegen/src/isa/x64/inst/args.rs
@@ -1,14 +1,13 @@
 //! Instruction operand sub-components (aka "parts"): definitions and printing.
 
-use super::regs::{self, show_ireg_sized};
+use super::regs::{self};
 use super::EmitState;
 use crate::ir::condcodes::{FloatCC, IntCC};
 use crate::ir::{MemFlags, Type};
+use crate::isa::x64::inst::regs::pretty_print_reg;
 use crate::isa::x64::inst::Inst;
 use crate::machinst::*;
-use regalloc::{
-    PrettyPrint, PrettyPrintSized, RealRegUniverse, Reg, RegClass, RegUsageCollector, Writable,
-};
+use regalloc2::VReg;
 use smallvec::{smallvec, SmallVec};
 use std::fmt;
 use std::string::String;
@@ -23,25 +22,6 @@ pub trait FromWritableReg: Sized {
     fn from_writable_reg(w: Writable<Reg>) -> Option<Self>;
 }
 
-/// An extension trait for mapping register uses on `{Xmm,Gpr}`.
-pub trait MapUseExt {
-    fn map_use<RM>(&mut self, mapper: &RM)
-    where
-        RM: RegMapper;
-}
-
-/// An extension trait for mapping register mods and defs on
-/// `Writable{Xmm,Gpr}`.
-pub trait MapDefModExt {
-    fn map_def<RM>(&mut self, mapper: &RM)
-    where
-        RM: RegMapper;
-
-    fn map_mod<RM>(&mut self, mapper: &RM)
-    where
-        RM: RegMapper;
-}
-
 /// A macro for defining a newtype of `Reg` that enforces some invariant about
 /// the wrapped `Reg` (such as that it is of a particular register class).
 macro_rules! newtype_of_reg {
@@ -55,7 +35,7 @@ macro_rules! newtype_of_reg {
         |$check_reg:ident| $check:expr
     ) => {
         /// A newtype wrapper around `Reg`.
-        #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
+        #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
         pub struct $newtype_reg(Reg);
 
         impl PartialEq<Reg> for $newtype_reg {
@@ -70,12 +50,6 @@ macro_rules! newtype_of_reg {
             }
         }
 
-        impl PrettyPrint for $newtype_reg {
-            fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
-                self.0.show_rru(mb_rru)
-            }
-        }
-
         impl $newtype_reg {
             /// Create this newtype from the given register, or return `None` if the register
             /// is not a valid instance of this newtype.
@@ -107,21 +81,6 @@ macro_rules! newtype_of_reg {
             }
         }
 
-        impl MapUseExt for $newtype_reg {
-            fn map_use<RM>(&mut self, mapper: &RM)
-            where
-                RM: RegMapper,
-            {
-                let mut reg = self.0;
-                mapper.map_use(&mut reg);
-                debug_assert!({
-                    let $check_reg = reg;
-                    $check
-                });
-                *self = $newtype_reg(reg);
-            }
-        }
-
         pub type $newtype_writable_reg = Writable<$newtype_reg>;
 
         #[allow(dead_code)] // Used by some newtypes and not others.
@@ -139,34 +98,6 @@ macro_rules! newtype_of_reg {
             }
         }
 
-        impl MapDefModExt for $newtype_writable_reg {
-            fn map_def<RM>(&mut self, mapper: &RM)
-            where
-                RM: RegMapper,
-            {
-                let mut reg = self.to_writable_reg();
-                mapper.map_def(&mut reg);
-                debug_assert!({
-                    let $check_reg = reg.to_reg();
-                    $check
-                });
-                *self = Writable::from_reg($newtype_reg(reg.to_reg()));
-            }
-
-            fn map_mod<RM>(&mut self, mapper: &RM)
-            where
-                RM: RegMapper,
-            {
-                let mut reg = self.to_writable_reg();
-                mapper.map_mod(&mut reg);
-                debug_assert!({
-                    let $check_reg = reg.to_reg();
-                    $check
-                });
-                *self = Writable::from_reg($newtype_reg(reg.to_reg()));
-            }
-        }
-
         /// A newtype wrapper around `RegMem` for general-purpose registers.
         #[derive(Clone, Debug)]
         pub struct $newtype_reg_mem(RegMem);
@@ -201,44 +132,16 @@ macro_rules! newtype_of_reg {
             }
 
             #[allow(dead_code)] // Used by some newtypes and not others.
-            pub fn map_uses<RM>(&mut self, mapper: &RM)
-            where
-                RM: RegMapper,
-            {
-                self.0.map_uses(mapper);
-                debug_assert!(match self.0 {
-                    RegMem::Reg { reg: $check_reg } => $check,
-                    _ => true,
-                });
-            }
-
-            #[allow(dead_code)] // Used by some newtypes and not others.
-            pub fn map_as_def<RM>(&mut self, mapper: &RM)
-            where
-                RM: RegMapper,
-            {
-                self.0.map_as_def(mapper);
-                debug_assert!(match self.0 {
-                    RegMem::Reg { reg: $check_reg } => $check,
-                    _ => true,
-                });
-            }
-
-            #[allow(dead_code)] // Used by some newtypes and not others.
-            pub fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) {
-                self.0.get_regs_as_uses(collector);
+            pub fn get_operands<F: Fn(VReg) -> VReg>(
+                &self,
+                collector: &mut OperandCollector<'_, F>,
+            ) {
+                self.0.get_operands(collector);
             }
         }
-
         impl PrettyPrint for $newtype_reg_mem {
-            fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
-                self.0.show_rru(mb_rru)
-            }
-        }
-
-        impl PrettyPrintSized for $newtype_reg_mem {
-            fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, size: u8) -> String {
-                self.0.show_rru_sized(mb_rru, size)
+            fn pretty_print(&self, size: u8, allocs: &mut AllocationConsumer<'_>) -> String {
+                self.0.pretty_print(size, allocs)
             }
         }
 
@@ -278,44 +181,17 @@ macro_rules! newtype_of_reg {
             }
 
             #[allow(dead_code)] // Used by some newtypes and not others.
-            pub fn map_uses<RM>(&mut self, mapper: &RM)
-            where
-                RM: RegMapper,
-            {
-                self.0.map_uses(mapper);
-                debug_assert!(match self.0 {
-                    RegMemImm::Reg { reg: $check_reg } => $check,
-                    _ => true,
-                });
-            }
-
-            #[allow(dead_code)] // Used by some newtypes and not others.
-            pub fn map_as_def<RM>(&mut self, mapper: &RM)
-            where
-                RM: RegMapper,
-            {
-                self.0.map_as_def(mapper);
-                debug_assert!(match self.0 {
-                    RegMemImm::Reg { reg: $check_reg } => $check,
-                    _ => true,
-                });
-            }
-
-            #[allow(dead_code)] // Used by some newtypes and not others.
-            pub fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) {
-                self.0.get_regs_as_uses(collector);
+            pub fn get_operands<F: Fn(VReg) -> VReg>(
+                &self,
+                collector: &mut OperandCollector<'_, F>,
+            ) {
+                self.0.get_operands(collector);
             }
         }
 
         impl PrettyPrint for $newtype_reg_mem_imm {
-            fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
-                self.0.show_rru(mb_rru)
-            }
-        }
-
-        impl PrettyPrintSized for $newtype_reg_mem_imm {
-            fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, size: u8) -> String {
-                self.0.show_rru_sized(mb_rru, size)
+            fn pretty_print(&self, size: u8, allocs: &mut AllocationConsumer<'_>) -> String {
+                self.0.pretty_print(size, allocs)
             }
         }
 
@@ -359,7 +235,7 @@ newtype_of_reg!(
     GprMem,
     GprMemImm,
     Imm8Gpr,
-    |reg| reg.get_class() == RegClass::I64
+    |reg| reg.class() == RegClass::Int
 );
 
 // Define a newtype of `Reg` for XMM registers.
@@ -370,7 +246,7 @@ newtype_of_reg!(
     XmmMem,
     XmmMemImm,
     Imm8Xmm,
-    |reg| reg.get_class() == RegClass::V128
+    |reg| reg.class() == RegClass::Float
 );
 
 /// A possible addressing mode (amode) that can be used in instructions.
@@ -400,7 +276,7 @@ pub enum Amode {
 
 impl Amode {
     pub(crate) fn imm_reg(simm32: u32, base: Reg) -> Self {
-        debug_assert!(base.get_class() == RegClass::I64);
+        debug_assert!(base.class() == RegClass::Int);
         Self::ImmReg {
             simm32,
             base,
@@ -409,8 +285,8 @@ impl Amode {
     }
 
     pub(crate) fn imm_reg_reg_shift(simm32: u32, base: Gpr, index: Gpr, shift: u8) -> Self {
-        debug_assert!(base.get_class() == RegClass::I64);
-        debug_assert!(index.get_class() == RegClass::I64);
+        debug_assert!(base.class() == RegClass::Int);
+        debug_assert!(index.class() == RegClass::Int);
         debug_assert!(shift <= 3);
         Self::ImmRegRegShift {
             simm32,
@@ -450,14 +326,17 @@ impl Amode {
     }
 
     /// Add the regs mentioned by `self` to `collector`.
-    pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) {
+    pub(crate) fn get_operands<F: Fn(VReg) -> VReg>(
+        &self,
+        collector: &mut OperandCollector<'_, F>,
+    ) {
         match self {
             Amode::ImmReg { base, .. } => {
-                collector.add_use(*base);
+                collector.reg_use(*base);
             }
             Amode::ImmRegRegShift { base, index, .. } => {
-                collector.add_use(base.to_reg());
-                collector.add_use(index.to_reg());
+                collector.reg_use(base.to_reg());
+                collector.reg_use(index.to_reg());
             }
             Amode::RipRelative { .. } => {
                 // RIP isn't involved in regalloc.
@@ -476,13 +355,56 @@ impl Amode {
     pub(crate) fn can_trap(&self) -> bool {
         !self.get_flags().notrap()
     }
+
+    pub(crate) fn with_allocs(&self, allocs: &mut AllocationConsumer<'_>) -> Self {
+        // The order in which we consume allocs here must match the
+        // order in which we produce operands in get_operands() above.
+        match self {
+            &Amode::ImmReg {
+                simm32,
+                base,
+                flags,
+            } => Amode::ImmReg {
+                simm32,
+                flags,
+                base: allocs.next(base),
+            },
+            &Amode::ImmRegRegShift {
+                simm32,
+                base,
+                index,
+                shift,
+                flags,
+            } => Amode::ImmRegRegShift {
+                simm32,
+                shift,
+                flags,
+                base: Gpr::new(allocs.next(*base)).unwrap(),
+                index: Gpr::new(allocs.next(*index)).unwrap(),
+            },
+            &Amode::RipRelative { target } => Amode::RipRelative { target },
+        }
+    }
+
+    /// Offset the amode by a fixed offset.
+    pub(crate) fn offset(&self, offset: u32) -> Self {
+        let mut ret = self.clone();
+        match &mut ret {
+            &mut Amode::ImmReg { ref mut simm32, .. } => *simm32 += offset,
+            &mut Amode::ImmRegRegShift { ref mut simm32, .. } => *simm32 += offset,
+            _ => panic!("Cannot offset amode: {:?}", self),
+        }
+        ret
+    }
 }
 
 impl PrettyPrint for Amode {
-    fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
+    fn pretty_print(&self, _size: u8, allocs: &mut AllocationConsumer<'_>) -> String {
         match self {
             Amode::ImmReg { simm32, base, .. } => {
-                format!("{}({})", *simm32 as i32, base.show_rru(mb_rru))
+                // Note: size is always 8; the address is 64 bits,
+                // even if the addressed operand is smaller.
+                format!("{}({})", *simm32 as i32, pretty_print_reg(*base, 8, allocs))
             }
             Amode::ImmRegRegShift {
                 simm32,
@@ -493,8 +415,8 @@ impl PrettyPrint for Amode {
             } => format!(
                 "{}({},{},{})",
                 *simm32 as i32,
-                base.show_rru(mb_rru),
-                index.show_rru(mb_rru),
+                pretty_print_reg(base.to_reg(), 8, allocs),
+                pretty_print_reg(index.to_reg(), 8, allocs),
                 1 << shift
             ),
             Amode::RipRelative { ref target } => format!("label{}(%rip)", target.get()),
@@ -524,9 +446,12 @@ impl SyntheticAmode {
     }
 
     /// Add the regs mentioned by `self` to `collector`.
-    pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) {
+    pub(crate) fn get_operands<F: Fn(VReg) -> VReg>(
+        &self,
+        collector: &mut OperandCollector<'_, F>,
+    ) {
         match self {
-            SyntheticAmode::Real(addr) => addr.get_regs_as_uses(collector),
+            SyntheticAmode::Real(addr) => addr.get_operands(collector),
             SyntheticAmode::NominalSPOffset { .. } => {
                 // Nothing to do; the base is SP and isn't involved in regalloc.
             }
@@ -534,16 +459,6 @@ impl SyntheticAmode {
         }
     }
 
-    pub(crate) fn map_uses<RM: RegMapper>(&mut self, map: &RM) {
-        match self {
-            SyntheticAmode::Real(addr) => addr.map_uses(map),
-            SyntheticAmode::NominalSPOffset { .. } => {
-                // Nothing to do.
-            }
-            SyntheticAmode::ConstantOffset(_) => {}
-        }
-    }
-
     pub(crate) fn finalize(&self, state: &mut EmitState, buffer: &MachBuffer<Inst>) -> Amode {
         match self {
             SyntheticAmode::Real(addr) => addr.clone(),
@@ -561,6 +476,15 @@ impl SyntheticAmode {
             }
         }
     }
+
+    pub(crate) fn with_allocs(&self, allocs: &mut AllocationConsumer<'_>) -> Self {
+        match self {
+            SyntheticAmode::Real(addr) => SyntheticAmode::Real(addr.with_allocs(allocs)),
+            &SyntheticAmode::NominalSPOffset { .. } | &SyntheticAmode::ConstantOffset { .. } => {
+                self.clone()
+            }
+        }
+    }
 }
 
 impl Into<SyntheticAmode> for Amode {
@@ -570,9 +494,10 @@ impl Into<SyntheticAmode> for Amode {
 }
 
 impl PrettyPrint for SyntheticAmode {
-    fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
+    fn pretty_print(&self, _size: u8, allocs: &mut AllocationConsumer<'_>) -> String {
         match self {
-            SyntheticAmode::Real(addr) => addr.show_rru(mb_rru),
+            // See note in `Amode` regarding constant size of `8`.
+            SyntheticAmode::Real(addr) => addr.pretty_print(8, allocs),
             SyntheticAmode::NominalSPOffset { simm32 } => {
                 format!("rsp({} + virtual offset)", *simm32 as i32)
             }
@@ -594,7 +519,7 @@ pub enum RegMemImm {
 
 impl RegMemImm {
     pub(crate) fn reg(reg: Reg) -> Self {
-        debug_assert!(reg.get_class() == RegClass::I64 || reg.get_class() == RegClass::V128);
+        debug_assert!(reg.class() == RegClass::Int || reg.class() == RegClass::Float);
         Self::Reg { reg }
     }
     pub(crate) fn mem(addr: impl Into<SyntheticAmode>) -> Self {
@@ -607,15 +532,18 @@ impl RegMemImm {
     /// Asserts that in register mode, the reg class is the one that's expected.
     pub(crate) fn assert_regclass_is(&self, expected_reg_class: RegClass) {
         if let Self::Reg { reg } = self {
-            debug_assert_eq!(reg.get_class(), expected_reg_class);
+            debug_assert_eq!(reg.class(), expected_reg_class);
         }
     }
 
     /// Add the regs mentioned by `self` to `collector`.
-    pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) {
+    pub(crate) fn get_operands<F: Fn(VReg) -> VReg>(
+        &self,
+        collector: &mut OperandCollector<'_, F>,
+    ) {
         match self {
-            Self::Reg { reg } => collector.add_use(*reg),
-            Self::Mem { addr } => addr.get_regs_as_uses(collector),
+            Self::Reg { reg } => collector.reg_use(*reg),
+            Self::Mem { addr } => addr.get_operands(collector),
             Self::Imm { .. } => {}
         }
     }
@@ -626,19 +554,25 @@ impl RegMemImm {
             _ => None,
         }
     }
-}
 
-impl PrettyPrint for RegMemImm {
-    fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
-        self.show_rru_sized(mb_rru, 8)
+    pub(crate) fn with_allocs(&self, allocs: &mut AllocationConsumer<'_>) -> Self {
+        match self {
+            Self::Reg { reg } => Self::Reg {
+                reg: allocs.next(*reg),
+            },
+            Self::Mem { addr } => Self::Mem {
+                addr: addr.with_allocs(allocs),
+            },
+            Self::Imm { .. } => self.clone(),
+        }
     }
 }
 
-impl PrettyPrintSized for RegMemImm {
-    fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, size: u8) -> String {
+impl PrettyPrint for RegMemImm {
+    fn pretty_print(&self, size: u8, allocs: &mut AllocationConsumer<'_>) -> String {
         match self {
-            Self::Reg { reg } => show_ireg_sized(*reg, mb_rru, size),
-            Self::Mem { addr } => addr.show_rru(mb_rru),
+            Self::Reg { reg } => pretty_print_reg(*reg, size, allocs),
+            Self::Mem { addr } => addr.pretty_print(size, allocs),
             Self::Imm { simm32 } => format!("${}", *simm32 as i32),
         }
     }
@@ -673,7 +607,7 @@ pub enum RegMem {
 
 impl RegMem {
     pub(crate) fn reg(reg: Reg) -> Self {
-        debug_assert!(reg.get_class() == RegClass::I64 || reg.get_class() == RegClass::V128);
+        debug_assert!(reg.class() == RegClass::Int || reg.class() == RegClass::Float);
         Self::Reg { reg }
     }
     pub(crate) fn mem(addr: impl Into<SyntheticAmode>) -> Self {
@@ -682,14 +616,17 @@ impl RegMem {
     /// Asserts that in register mode, the reg class is the one that's expected.
     pub(crate) fn assert_regclass_is(&self, expected_reg_class: RegClass) {
         if let Self::Reg { reg } = self {
-            debug_assert_eq!(reg.get_class(), expected_reg_class);
+            debug_assert_eq!(reg.class(), expected_reg_class);
         }
     }
     /// Add the regs mentioned by `self` to `collector`.
-    pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) {
+    pub(crate) fn get_operands<F: Fn(VReg) -> VReg>(
+        &self,
+        collector: &mut OperandCollector<'_, F>,
+    ) {
         match self {
-            RegMem::Reg { reg } => collector.add_use(*reg),
-            RegMem::Mem { addr, .. } => addr.get_regs_as_uses(collector),
+            RegMem::Reg { reg } => collector.reg_use(*reg),
+            RegMem::Mem { addr, .. } => addr.get_operands(collector),
         }
     }
     pub(crate) fn to_reg(&self) -> Option<Reg> {
@@ -698,6 +635,17 @@ impl RegMem {
             _ => None,
         }
     }
+
+    pub(crate) fn with_allocs(&self, allocs: &mut AllocationConsumer<'_>) -> Self {
+        match self {
+            RegMem::Reg { reg } => RegMem::Reg {
+                reg: allocs.next(*reg),
+            },
+            RegMem::Mem { addr } => RegMem::Mem {
+                addr: addr.with_allocs(allocs),
+            },
+        }
+    }
 }
 
 impl From<Writable<Reg>> for RegMem {
@@ -707,16 +655,10 @@ impl From<Writable<Reg>> for RegMem {
 }
 
 impl PrettyPrint for RegMem {
-    fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
-        self.show_rru_sized(mb_rru, 8)
-    }
-}
-
-impl PrettyPrintSized for RegMem {
-    fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, size: u8) -> String {
+    fn pretty_print(&self, size: u8, allocs: &mut AllocationConsumer<'_>) -> String {
         match self {
-            RegMem::Reg { reg } => show_ireg_sized(*reg, mb_rru, size),
-            RegMem::Mem { addr, .. } => addr.show_rru(mb_rru),
+            RegMem::Reg { reg } => pretty_print_reg(*reg, size, allocs),
+            RegMem::Mem { addr, .. } => addr.pretty_print(size, allocs),
         }
     }
 }
@@ -1222,6 +1164,22 @@ impl SseOpcode {
             _ => 8,
         }
     }
+
+    /// Does an XmmRmmRImm with this opcode use src1? FIXME: split
+    /// into separate instructions.
+    pub(crate) fn uses_src1(&self) -> bool {
+        match self {
+            SseOpcode::Pextrb => false,
+            SseOpcode::Pextrw => false,
+            SseOpcode::Pextrd => false,
+            SseOpcode::Pshufd => false,
+            SseOpcode::Roundss => false,
+            SseOpcode::Roundsd => false,
+            SseOpcode::Roundps => false,
+            SseOpcode::Roundpd => false,
+            _ => true,
+        }
+    }
 }
 
 impl fmt::Debug for SseOpcode {
diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs
index 2ff056625c..0f98010978 100644
--- a/cranelift/codegen/src/isa/x64/inst/emit.rs
+++ b/cranelift/codegen/src/isa/x64/inst/emit.rs
@@ -10,15 +10,15 @@ use crate::isa::x64::encoding::rex::{
 };
 use crate::isa::x64::inst::args::*;
 use crate::isa::x64::inst::*;
-use crate::machinst::{inst_common, MachBuffer, MachInstEmit, MachLabel};
+use crate::machinst::{inst_common, MachBuffer, MachInstEmit, MachLabel, Reg, Writable};
 use core::convert::TryInto;
-use regalloc::{Reg, Writable};
 
 /// A small helper to generate a signed conversion instruction.
 fn emit_signed_cvt(
     sink: &mut MachBuffer<Inst>,
     info: &EmitInfo,
     state: &mut EmitState,
+    // Required to be RealRegs.
     src: Reg,
     dst: Writable<Reg>,
     to_f64: bool,
@@ -31,7 +31,7 @@ fn emit_signed_cvt(
         SseOpcode::Cvtsi2ss
     };
     let inst = Inst::gpr_to_xmm(op, RegMem::reg(src), OperandSize::Size64, dst);
-    inst.emit(sink, info, state);
+    inst.emit(&[], sink, info, state);
 }
 
 /// Emits a one way conditional jump if CC is set (true).
@@ -110,6 +110,7 @@ fn emit_reloc(
 ///   care?)
 pub(crate) fn emit(
     inst: &Inst,
+    allocs: &mut AllocationConsumer<'_>,
     sink: &mut MachBuffer<Inst>,
     info: &EmitInfo,
     state: &mut EmitState,
@@ -151,22 +152,24 @@ pub(crate) fn emit(
             src2,
             dst: reg_g,
         } => {
-            debug_assert_eq!(*src1, reg_g.to_reg());
+            let (reg_g, src2) = if inst.produces_const() {
+                let reg_g = allocs.next(reg_g.to_reg().to_reg());
+                (reg_g, RegMemImm::reg(reg_g))
+            } else {
+                let src1 = allocs.next(src1.to_reg());
+                let reg_g = allocs.next(reg_g.to_reg().to_reg());
+                debug_assert_eq!(src1, reg_g);
+                let src2 = src2.clone().to_reg_mem_imm().with_allocs(allocs);
+                (reg_g, src2)
+            };
+
             let mut rex = RexFlags::from(*size);
             if *op == AluRmiROpcode::Mul {
                 // We kinda freeloaded Mul into RMI_R_Op, but it doesn't fit the usual pattern, so
                 // we have to special-case it.
-                match src2.clone().to_reg_mem_imm() {
+                match src2 {
                     RegMemImm::Reg { reg: reg_e } => {
-                        emit_std_reg_reg(
-                            sink,
-                            LegacyPrefixes::None,
-                            0x0FAF,
-                            2,
-                            reg_g.to_reg().to_reg(),
-                            reg_e,
-                            rex,
-                        );
+                        emit_std_reg_reg(sink, LegacyPrefixes::None, 0x0FAF, 2, reg_g, reg_e, rex);
                     }
 
                     RegMemImm::Mem { addr } => {
@@ -178,7 +181,7 @@ pub(crate) fn emit(
                             LegacyPrefixes::None,
                             0x0FAF,
                             2,
-                            reg_g.to_reg().to_reg(),
+                            reg_g,
                             &amode,
                             rex,
                         );
@@ -188,15 +191,7 @@ pub(crate) fn emit(
                         let use_imm8 = low8_will_sign_extend_to_32(simm32);
                         let opcode = if use_imm8 { 0x6B } else { 0x69 };
                         // Yes, really, reg_g twice.
-                        emit_std_reg_reg(
-                            sink,
-                            LegacyPrefixes::None,
-                            opcode,
-                            1,
-                            reg_g.to_reg().to_reg(),
-                            reg_g.to_reg().to_reg(),
-                            rex,
-                        );
+                        emit_std_reg_reg(sink, LegacyPrefixes::None, opcode, 1, reg_g, reg_g, rex);
                         emit_simm(sink, if use_imm8 { 1 } else { 4 }, simm32);
                     }
                 }
@@ -215,11 +210,11 @@ pub(crate) fn emit(
                 };
                 assert!(!(is_8bit && *size == OperandSize::Size64));
 
-                match src2.clone().to_reg_mem_imm() {
+                match src2 {
                     RegMemImm::Reg { reg: reg_e } => {
                         if is_8bit {
                             rex.always_emit_if_8bit_needed(reg_e);
-                            rex.always_emit_if_8bit_needed(reg_g.to_reg().to_reg());
+                            rex.always_emit_if_8bit_needed(reg_g);
                         }
                         // GCC/llvm use the swapped operand encoding (viz., the R/RM vs RM/R
                         // duality). Do this too, so as to be able to compare generated machine
@@ -230,17 +225,17 @@ pub(crate) fn emit(
                             opcode_r,
                             1,
                             reg_e,
-                            reg_g.to_reg().to_reg(),
+                            reg_g,
                             rex,
                         );
                     }
 
                     RegMemImm::Mem { addr } => {
+                        let amode = addr.finalize(state, sink);
                         if is_8bit {
-                            rex.always_emit_if_8bit_needed(reg_g.to_reg().to_reg());
+                            rex.always_emit_if_8bit_needed(reg_g);
                         }
                         // Here we revert to the "normal" G-E ordering.
-                        let amode = addr.finalize(state, sink);
                         emit_std_reg_mem(
                             sink,
                             state,
@@ -248,7 +243,7 @@ pub(crate) fn emit(
                             LegacyPrefixes::None,
                             opcode_m,
                             1,
-                            reg_g.to_reg().to_reg(),
+                            reg_g,
                             &amode,
                             rex,
                         );
@@ -259,7 +254,7 @@ pub(crate) fn emit(
                         let use_imm8 = low8_will_sign_extend_to_32(simm32);
                         let opcode = if use_imm8 { 0x83 } else { 0x81 };
                         // And also here we use the "normal" G-E ordering.
-                        let enc_g = int_reg_enc(reg_g.to_reg().to_reg());
+                        let enc_g = int_reg_enc(reg_g);
                         emit_std_enc_enc(
                             sink,
                             LegacyPrefixes::None,
@@ -276,6 +271,7 @@ pub(crate) fn emit(
         }
 
         Inst::UnaryRmR { size, op, src, dst } => {
+            let dst = allocs.next(dst.to_reg().to_reg());
             let rex_flags = RexFlags::from(*size);
             use UnaryRmROpcode::*;
             let prefix = match size {
@@ -299,17 +295,12 @@ pub(crate) fn emit(
             };
 
             match src.clone().into() {
-                RegMem::Reg { reg: src } => emit_std_reg_reg(
-                    sink,
-                    prefix,
-                    opcode,
-                    num_opcodes,
-                    dst.to_reg().to_reg(),
-                    src,
-                    rex_flags,
-                ),
+                RegMem::Reg { reg: src } => {
+                    let src = allocs.next(src);
+                    emit_std_reg_reg(sink, prefix, opcode, num_opcodes, dst, src, rex_flags);
+                }
                 RegMem::Mem { addr: src } => {
-                    let amode = src.finalize(state, sink);
+                    let amode = src.finalize(state, sink).with_allocs(allocs);
                     emit_std_reg_mem(
                         sink,
                         state,
@@ -317,7 +308,7 @@ pub(crate) fn emit(
                         prefix,
                         opcode,
                         num_opcodes,
-                        dst.to_reg().to_reg(),
+                        dst,
                         &amode,
                         rex_flags,
                     );
@@ -326,8 +317,10 @@ pub(crate) fn emit(
         }
 
         Inst::Not { size, src, dst } => {
-            debug_assert_eq!(*src, dst.to_reg());
-            let rex_flags = RexFlags::from((*size, dst.to_writable_reg().to_reg()));
+            let src = allocs.next(src.to_reg());
+            let dst = allocs.next(dst.to_reg().to_reg());
+            debug_assert_eq!(src, dst);
+            let rex_flags = RexFlags::from((*size, dst));
             let (opcode, prefix) = match size {
                 OperandSize::Size8 => (0xF6, LegacyPrefixes::None),
                 OperandSize::Size16 => (0xF7, LegacyPrefixes::_66),
@@ -336,13 +329,15 @@ pub(crate) fn emit(
             };
 
             let subopcode = 2;
-            let enc_src = int_reg_enc(dst.to_reg());
+            let enc_src = int_reg_enc(dst);
             emit_std_enc_enc(sink, prefix, opcode, 1, subopcode, enc_src, rex_flags)
         }
 
         Inst::Neg { size, src, dst } => {
-            debug_assert_eq!(*src, dst.to_reg());
-            let rex_flags = RexFlags::from((*size, dst.to_writable_reg().to_reg()));
+            let src = allocs.next(src.to_reg());
+            let dst = allocs.next(dst.to_reg().to_reg());
+            debug_assert_eq!(src, dst);
+            let rex_flags = RexFlags::from((*size, dst));
             let (opcode, prefix) = match size {
                 OperandSize::Size8 => (0xF6, LegacyPrefixes::None),
                 OperandSize::Size16 => (0xF7, LegacyPrefixes::_66),
@@ -351,21 +346,28 @@ pub(crate) fn emit(
             };
 
             let subopcode = 3;
-            let enc_src = int_reg_enc(dst.to_reg());
+            let enc_src = int_reg_enc(dst);
             emit_std_enc_enc(sink, prefix, opcode, 1, subopcode, enc_src, rex_flags)
         }
 
         Inst::Div {
             size,
             signed,
-            dividend,
+            dividend_lo,
+            dividend_hi,
             divisor,
             dst_quotient,
             dst_remainder,
         } => {
-            debug_assert_eq!(*dividend, regs::rax());
-            debug_assert_eq!(dst_quotient.to_reg(), regs::rax());
-            debug_assert_eq!(dst_remainder.to_reg(), regs::rdx());
+            let dividend_lo = allocs.next(dividend_lo.to_reg());
+            let dividend_hi = allocs.next(dividend_hi.to_reg());
+            let dst_quotient = allocs.next(dst_quotient.to_reg().to_reg());
+            let dst_remainder = allocs.next(dst_remainder.to_reg().to_reg());
+            debug_assert_eq!(dividend_lo, regs::rax());
+            debug_assert_eq!(dividend_hi, regs::rdx());
+            debug_assert_eq!(dst_quotient, regs::rax());
+            debug_assert_eq!(dst_remainder, regs::rdx());
+
             let (opcode, prefix) = match size {
                 OperandSize::Size8 => (0xF6, LegacyPrefixes::None),
                 OperandSize::Size16 => (0xF7, LegacyPrefixes::_66),
@@ -379,6 +381,7 @@ pub(crate) fn emit(
             let subopcode = if *signed { 7 } else { 6 };
             match divisor.clone().to_reg_mem() {
                 RegMem::Reg { reg } => {
+                    let reg = allocs.next(reg);
                     let src = int_reg_enc(reg);
                     emit_std_enc_enc(
                         sink,
@@ -391,7 +394,7 @@ pub(crate) fn emit(
                     )
                 }
                 RegMem::Mem { addr: src } => {
-                    let amode = src.finalize(state, sink);
+                    let amode = src.finalize(state, sink).with_allocs(allocs);
                     emit_std_enc_mem(
                         sink,
                         state,
@@ -415,9 +418,12 @@ pub(crate) fn emit(
             dst_lo,
             dst_hi,
         } => {
-            debug_assert_eq!(*src1, regs::rax());
-            debug_assert_eq!(dst_lo.to_reg(), regs::rax());
-            debug_assert_eq!(dst_hi.to_reg(), regs::rdx());
+            let src1 = allocs.next(src1.to_reg());
+            let dst_lo = allocs.next(dst_lo.to_reg().to_reg());
+            let dst_hi = allocs.next(dst_hi.to_reg().to_reg());
+            debug_assert_eq!(src1, regs::rax());
+            debug_assert_eq!(dst_lo, regs::rax());
+            debug_assert_eq!(dst_hi, regs::rdx());
 
             let rex_flags = RexFlags::from(*size);
             let prefix = match size {
@@ -430,11 +436,12 @@ pub(crate) fn emit(
             let subopcode = if *signed { 5 } else { 4 };
             match src2.clone().to_reg_mem() {
                 RegMem::Reg { reg } => {
+                    let reg = allocs.next(reg);
                     let src = int_reg_enc(reg);
                     emit_std_enc_enc(sink, prefix, 0xF7, 1, subopcode, src, rex_flags)
                 }
                 RegMem::Mem { addr: src } => {
-                    let amode = src.finalize(state, sink);
+                    let amode = src.finalize(state, sink).with_allocs(allocs);
                     emit_std_enc_mem(
                         sink, state, info, prefix, 0xF7, 1, subopcode, &amode, rex_flags,
                     );
@@ -443,8 +450,10 @@ pub(crate) fn emit(
         }
 
         Inst::SignExtendData { size, src, dst } => {
-            debug_assert_eq!(*src, regs::rax());
-            debug_assert_eq!(dst.to_reg(), regs::rdx());
+            let src = allocs.next(src.to_reg());
+            let dst = allocs.next(dst.to_reg().to_reg());
+            debug_assert_eq!(src, regs::rax());
+            debug_assert_eq!(dst, regs::rdx());
             match size {
                 OperandSize::Size8 => {
                     sink.put1(0x66);
@@ -465,15 +474,23 @@ pub(crate) fn emit(
         Inst::CheckedDivOrRemSeq {
             kind,
             size,
-            dividend,
+            dividend_lo,
+            dividend_hi,
             divisor,
             tmp,
             dst_quotient,
             dst_remainder,
         } => {
-            debug_assert_eq!(*dividend, regs::rax());
-            debug_assert_eq!(dst_quotient.to_reg(), regs::rax());
-            debug_assert_eq!(dst_remainder.to_reg(), regs::rdx());
+            let dividend_lo = allocs.next(dividend_lo.to_reg());
+            let dividend_hi = allocs.next(dividend_hi.to_reg());
+            let divisor = allocs.next(divisor.to_reg().to_reg());
+            let dst_quotient = allocs.next(dst_quotient.to_reg().to_reg());
+            let dst_remainder = allocs.next(dst_remainder.to_reg().to_reg());
+            let tmp = tmp.map(|tmp| allocs.next(tmp.to_reg().to_reg()));
+            debug_assert_eq!(dividend_lo, regs::rax());
+            debug_assert_eq!(dividend_hi, regs::rdx());
+            debug_assert_eq!(dst_quotient, regs::rax());
+            debug_assert_eq!(dst_remainder, regs::rdx());
 
             // Generates the following code sequence:
             //
@@ -504,18 +521,16 @@ pub(crate) fn emit(
             // $done:
 
             // Check if the divisor is zero, first.
-            let inst = Inst::cmp_rmi_r(*size, RegMemImm::imm(0), divisor.to_reg().to_reg());
-            inst.emit(sink, info, state);
+            let inst = Inst::cmp_rmi_r(*size, RegMemImm::imm(0), divisor);
+            inst.emit(&[], sink, info, state);
 
             let inst = Inst::trap_if(CC::Z, TrapCode::IntegerDivisionByZero);
-            inst.emit(sink, info, state);
+            inst.emit(&[], sink, info, state);
 
             let (do_op, done_label) = if kind.is_signed() {
                 // Now check if the divisor is -1.
-                let inst =
-                    Inst::cmp_rmi_r(*size, RegMemImm::imm(0xffffffff), divisor.to_reg().to_reg());
-                inst.emit(sink, info, state);
-
+                let inst = Inst::cmp_rmi_r(*size, RegMemImm::imm(0xffffffff), divisor);
+                inst.emit(&[], sink, info, state);
                 let do_op = sink.get_label();
 
                 // If not equal, jump to do-op.
@@ -527,10 +542,10 @@ pub(crate) fn emit(
                     let done_label = sink.get_label();
 
                     let inst = Inst::imm(OperandSize::Size64, 0, Writable::from_reg(regs::rdx()));
-                    inst.emit(sink, info, state);
+                    inst.emit(&[], sink, info, state);
 
                     let inst = Inst::jmp_known(done_label);
-                    inst.emit(sink, info, state);
+                    inst.emit(&[], sink, info, state);
 
                     (Some(do_op), Some(done_label))
                 } else {
@@ -541,24 +556,21 @@ pub(crate) fn emit(
                         let inst = Inst::imm(
                             OperandSize::Size64,
                             0x8000000000000000,
-                            tmp.to_writable_reg(),
+                            Writable::from_reg(tmp),
                         );
-                        inst.emit(sink, info, state);
+                        inst.emit(&[], sink, info, state);
 
-                        let inst = Inst::cmp_rmi_r(
-                            OperandSize::Size64,
-                            RegMemImm::reg(tmp.to_reg().to_reg()),
-                            regs::rax(),
-                        );
-                        inst.emit(sink, info, state);
+                        let inst =
+                            Inst::cmp_rmi_r(OperandSize::Size64, RegMemImm::reg(tmp), regs::rax());
+                        inst.emit(&[], sink, info, state);
                     } else {
                         let inst = Inst::cmp_rmi_r(*size, RegMemImm::imm(0x80000000), regs::rax());
-                        inst.emit(sink, info, state);
+                        inst.emit(&[], sink, info, state);
                     }
 
                     // If not equal, jump over the trap.
                     let inst = Inst::trap_if(CC::Z, TrapCode::IntegerOverflow);
-                    inst.emit(sink, info, state);
+                    inst.emit(&[], sink, info, state);
 
                     (Some(do_op), None)
                 }
@@ -574,19 +586,15 @@ pub(crate) fn emit(
             if kind.is_signed() {
                 // sign-extend the sign-bit of rax into rdx, for signed opcodes.
                 let inst = Inst::sign_extend_data(*size);
-                inst.emit(sink, info, state);
+                inst.emit(&[], sink, info, state);
             } else {
                 // zero for unsigned opcodes.
                 let inst = Inst::imm(OperandSize::Size64, 0, Writable::from_reg(regs::rdx()));
-                inst.emit(sink, info, state);
+                inst.emit(&[], sink, info, state);
             }
 
-            let inst = Inst::div(
-                *size,
-                kind.is_signed(),
-                RegMem::reg(divisor.to_reg().to_reg()),
-            );
-            inst.emit(sink, info, state);
+            let inst = Inst::div(*size, kind.is_signed(), RegMem::reg(divisor));
+            inst.emit(&[], sink, info, state);
 
             // Lowering takes care of moving the result back into the right register, see comment
             // there.
@@ -601,7 +609,8 @@ pub(crate) fn emit(
             simm64,
             dst,
         } => {
-            let enc_dst = int_reg_enc(dst.to_reg());
+            let dst = allocs.next(dst.to_reg().to_reg());
+            let enc_dst = int_reg_enc(dst);
             if *dst_size == OperandSize::Size64 {
                 if low32_will_sign_extend_to_64(*simm64) {
                     // Sign-extended move imm32.
@@ -630,18 +639,21 @@ pub(crate) fn emit(
         }
 
         Inst::MovRR { size, src, dst } => {
+            let src = allocs.next(src.to_reg());
+            let dst = allocs.next(dst.to_reg().to_reg());
             emit_std_reg_reg(
                 sink,
                 LegacyPrefixes::None,
                 0x89,
                 1,
-                src.to_reg(),
-                dst.to_reg().to_reg(),
+                src,
+                dst,
                 RexFlags::from(*size),
             );
         }
 
         Inst::MovzxRmR { ext_mode, src, dst } => {
+            let dst = allocs.next(dst.to_reg().to_reg());
             let (opcodes, num_opcodes, mut rex_flags) = match ext_mode {
                 ExtMode::BL => {
                     // MOVZBL is (REX.W==0) 0F B6 /r
@@ -675,6 +687,7 @@ pub(crate) fn emit(
 
             match src.clone().to_reg_mem() {
                 RegMem::Reg { reg: src } => {
+                    let src = allocs.next(src);
                     match ext_mode {
                         ExtMode::BL | ExtMode::BQ => {
                             // A redundant REX prefix must be emitted for certain register inputs.
@@ -687,14 +700,14 @@ pub(crate) fn emit(
                         LegacyPrefixes::None,
                         opcodes,
                         num_opcodes,
-                        dst.to_reg().to_reg(),
+                        dst,
                         src,
                         rex_flags,
                     )
                 }
 
                 RegMem::Mem { addr: src } => {
-                    let src = &src.finalize(state, sink);
+                    let src = &src.finalize(state, sink).with_allocs(allocs);
 
                     emit_std_reg_mem(
                         sink,
@@ -703,7 +716,7 @@ pub(crate) fn emit(
                         LegacyPrefixes::None,
                         opcodes,
                         num_opcodes,
-                        dst.to_reg().to_reg(),
+                        dst,
                         src,
                         rex_flags,
                     )
@@ -712,7 +725,8 @@ pub(crate) fn emit(
         }
 
         Inst::Mov64MR { src, dst } => {
-            let src = &src.finalize(state, sink);
+            let dst = allocs.next(dst.to_reg().to_reg());
+            let src = &src.finalize(state, sink).with_allocs(allocs);
 
             emit_std_reg_mem(
                 sink,
@@ -721,14 +735,15 @@ pub(crate) fn emit(
                 LegacyPrefixes::None,
                 0x8B,
                 1,
-                dst.to_reg().to_reg(),
+                dst,
                 src,
                 RexFlags::set_w(),
             )
         }
 
         Inst::LoadEffectiveAddress { addr, dst } => {
-            let amode = addr.finalize(state, sink);
+            let dst = allocs.next(dst.to_reg().to_reg());
+            let amode = addr.finalize(state, sink).with_allocs(allocs);
 
             emit_std_reg_mem(
                 sink,
@@ -737,13 +752,14 @@ pub(crate) fn emit(
                 LegacyPrefixes::None,
                 0x8D,
                 1,
-                dst.to_reg().to_reg(),
+                dst,
                 &amode,
                 RexFlags::set_w(),
             );
         }
 
         Inst::MovsxRmR { ext_mode, src, dst } => {
+            let dst = allocs.next(dst.to_reg().to_reg());
             let (opcodes, num_opcodes, mut rex_flags) = match ext_mode {
                 ExtMode::BL => {
                     // MOVSBL is (REX.W==0) 0F BE /r
@@ -769,6 +785,7 @@ pub(crate) fn emit(
 
             match src.clone().to_reg_mem() {
                 RegMem::Reg { reg: src } => {
+                    let src = allocs.next(src);
                     match ext_mode {
                         ExtMode::BL | ExtMode::BQ => {
                             // A redundant REX prefix must be emitted for certain register inputs.
@@ -781,14 +798,14 @@ pub(crate) fn emit(
                         LegacyPrefixes::None,
                         opcodes,
                         num_opcodes,
-                        dst.to_reg().to_reg(),
+                        dst,
                         src,
                         rex_flags,
                     )
                 }
 
                 RegMem::Mem { addr: src } => {
-                    let src = &src.finalize(state, sink);
+                    let src = &src.finalize(state, sink).with_allocs(allocs);
 
                     emit_std_reg_mem(
                         sink,
@@ -797,7 +814,7 @@ pub(crate) fn emit(
                         LegacyPrefixes::None,
                         opcodes,
                         num_opcodes,
-                        dst.to_reg().to_reg(),
+                        dst,
                         src,
                         rex_flags,
                     )
@@ -806,7 +823,8 @@ pub(crate) fn emit(
         }
 
         Inst::MovRM { size, src, dst } => {
-            let dst = &dst.finalize(state, sink);
+            let src = allocs.next(src.to_reg());
+            let dst = &dst.finalize(state, sink).with_allocs(allocs);
 
             let prefix = match size {
                 OperandSize::Size16 => LegacyPrefixes::_66,
@@ -821,13 +839,13 @@ pub(crate) fn emit(
             // This is one of the few places where the presence of a
             // redundant REX prefix changes the meaning of the
             // instruction.
-            let rex = RexFlags::from((*size, src.to_reg()));
+            let rex = RexFlags::from((*size, src));
 
             //  8-bit: MOV r8, r/m8 is (REX.W==0) 88 /r
             // 16-bit: MOV r16, r/m16 is 66 (REX.W==0) 89 /r
             // 32-bit: MOV r32, r/m32 is (REX.W==0) 89 /r
             // 64-bit: MOV r64, r/m64 is (REX.W==1) 89 /r
-            emit_std_reg_mem(sink, state, info, prefix, opcode, 1, src.to_reg(), dst, rex);
+            emit_std_reg_mem(sink, state, info, prefix, opcode, 1, src, dst, rex);
         }
 
         Inst::ShiftR {
@@ -837,7 +855,9 @@ pub(crate) fn emit(
             num_bits,
             dst,
         } => {
-            debug_assert_eq!(*src, dst.to_reg());
+            let src = allocs.next(src.to_reg());
+            let dst = allocs.next(dst.to_reg().to_reg());
+            debug_assert_eq!(src, dst);
             let subopcode = match kind {
                 ShiftKind::RotateLeft => 0,
                 ShiftKind::RotateRight => 1,
@@ -845,10 +865,11 @@ pub(crate) fn emit(
                 ShiftKind::ShiftRightLogical => 5,
                 ShiftKind::ShiftRightArithmetic => 7,
             };
-            let enc_dst = int_reg_enc(dst.to_reg());
-            let rex_flags = RexFlags::from((*size, dst.to_reg().to_reg()));
+            let enc_dst = int_reg_enc(dst);
+            let rex_flags = RexFlags::from((*size, dst));
             match num_bits.clone().to_imm8_reg() {
                 Imm8Reg::Reg { reg } => {
+                    let reg = allocs.next(reg);
                     debug_assert_eq!(reg, regs::rcx());
                     let (opcode, prefix) = match size {
                         OperandSize::Size8 => (0xD2, LegacyPrefixes::None),
@@ -890,7 +911,9 @@ pub(crate) fn emit(
             src2,
             dst,
         } => {
-            debug_assert_eq!(*src1, dst.to_reg());
+            let src1 = allocs.next(src1.to_reg());
+            let dst = allocs.next(dst.to_reg().to_reg());
+            debug_assert_eq!(src1, dst);
             let rex = RexFlags::clear_w();
             let prefix = LegacyPrefixes::_66;
             let src2 = src2.clone().to_reg_mem_imm();
@@ -906,7 +929,7 @@ pub(crate) fn emit(
                     SseOpcode::Psrlq => (0x0F73, 2),
                     _ => panic!("invalid opcode: {}", opcode),
                 };
-                let dst_enc = reg_enc(dst.to_reg());
+                let dst_enc = reg_enc(dst);
                 emit_std_enc_enc(sink, prefix, opcode_bytes, 2, reg_digit, dst_enc, rex);
                 let imm = (simm32)
                     .try_into()
@@ -927,18 +950,11 @@ pub(crate) fn emit(
 
                 match src2 {
                     RegMemImm::Reg { reg } => {
-                        emit_std_reg_reg(
-                            sink,
-                            prefix,
-                            opcode_bytes,
-                            2,
-                            dst.to_reg().to_reg(),
-                            reg,
-                            rex,
-                        );
+                        let reg = allocs.next(reg);
+                        emit_std_reg_reg(sink, prefix, opcode_bytes, 2, dst, reg, rex);
                     }
                     RegMemImm::Mem { addr } => {
-                        let addr = &addr.finalize(state, sink);
+                        let addr = &addr.finalize(state, sink).with_allocs(allocs);
                         emit_std_reg_mem(
                             sink,
                             state,
@@ -946,7 +962,7 @@ pub(crate) fn emit(
                             prefix,
                             opcode_bytes,
                             2,
-                            dst.to_reg().to_reg(),
+                            dst,
                             addr,
                             rex,
                         );
@@ -962,6 +978,8 @@ pub(crate) fn emit(
             dst: reg_g,
             opcode,
         } => {
+            let reg_g = allocs.next(reg_g.to_reg());
+
             let is_cmp = match opcode {
                 CmpOpcode::Cmp => true,
                 CmpOpcode::Test => false,
@@ -972,10 +990,11 @@ pub(crate) fn emit(
                 prefix = LegacyPrefixes::_66;
             }
             // A redundant REX prefix can change the meaning of this instruction.
-            let mut rex = RexFlags::from((*size, reg_g.to_reg()));
+            let mut rex = RexFlags::from((*size, reg_g));
 
             match src_e.clone().to_reg_mem_imm() {
                 RegMemImm::Reg { reg: reg_e } => {
+                    let reg_e = allocs.next(reg_e);
                     if *size == OperandSize::Size8 {
                         // Check whether the E register forces the use of a redundant REX.
                         rex.always_emit_if_8bit_needed(reg_e);
@@ -989,11 +1008,11 @@ pub(crate) fn emit(
                         (OperandSize::Size8, false) => 0x84,
                         (_, false) => 0x85,
                     };
-                    emit_std_reg_reg(sink, prefix, opcode, 1, reg_e, reg_g.to_reg(), rex);
+                    emit_std_reg_reg(sink, prefix, opcode, 1, reg_e, reg_g, rex);
                 }
 
                 RegMemImm::Mem { addr } => {
-                    let addr = &addr.finalize(state, sink);
+                    let addr = &addr.finalize(state, sink).with_allocs(allocs);
                     // Whereas here we revert to the "normal" G-E ordering for CMP.
                     let opcode = match (*size, is_cmp) {
                         (OperandSize::Size8, true) => 0x3A,
@@ -1001,17 +1020,7 @@ pub(crate) fn emit(
                         (OperandSize::Size8, false) => 0x84,
                         (_, false) => 0x85,
                     };
-                    emit_std_reg_mem(
-                        sink,
-                        state,
-                        info,
-                        prefix,
-                        opcode,
-                        1,
-                        reg_g.to_reg(),
-                        addr,
-                        rex,
-                    );
+                    emit_std_reg_mem(sink, state, info, prefix, opcode, 1, reg_g, addr, rex);
                 }
 
                 RegMemImm::Imm { simm32 } => {
@@ -1037,7 +1046,7 @@ pub(crate) fn emit(
                     };
                     let subopcode = if is_cmp { 7 } else { 0 };
 
-                    let enc_g = int_reg_enc(reg_g.to_reg());
+                    let enc_g = int_reg_enc(reg_g);
                     emit_std_enc_enc(sink, prefix, opcode, 1, subopcode, enc_g, rex);
                     emit_simm(sink, if use_imm8 { 1 } else { size.to_bytes() }, simm32);
                 }
@@ -1045,6 +1054,7 @@ pub(crate) fn emit(
         }
 
         Inst::Setcc { cc, dst } => {
+            let dst = allocs.next(dst.to_reg().to_reg());
             let opcode = 0x0f90 + cc.get_enc() as u32;
             let mut rex_flags = RexFlags::clear_w();
             rex_flags.always_emit();
@@ -1054,7 +1064,7 @@ pub(crate) fn emit(
                 opcode,
                 2,
                 0,
-                reg_enc(dst.to_reg()),
+                reg_enc(dst),
                 rex_flags,
             );
         }
@@ -1066,7 +1076,9 @@ pub(crate) fn emit(
             alternative,
             dst,
         } => {
-            debug_assert_eq!(*alternative, dst.to_reg());
+            let alternative = allocs.next(alternative.to_reg());
+            let dst = allocs.next(dst.to_reg().to_reg());
+            debug_assert_eq!(alternative, dst);
             let rex_flags = RexFlags::from(*size);
             let prefix = match size {
                 OperandSize::Size16 => LegacyPrefixes::_66,
@@ -1077,29 +1089,12 @@ pub(crate) fn emit(
             let opcode = 0x0F40 + cc.get_enc() as u32;
             match consequent.clone().to_reg_mem() {
                 RegMem::Reg { reg } => {
-                    emit_std_reg_reg(
-                        sink,
-                        prefix,
-                        opcode,
-                        2,
-                        dst.to_reg().to_reg(),
-                        reg,
-                        rex_flags,
-                    );
+                    let reg = allocs.next(reg);
+                    emit_std_reg_reg(sink, prefix, opcode, 2, dst, reg, rex_flags);
                 }
                 RegMem::Mem { addr } => {
-                    let addr = &addr.finalize(state, sink);
-                    emit_std_reg_mem(
-                        sink,
-                        state,
-                        info,
-                        prefix,
-                        opcode,
-                        2,
-                        dst.to_reg().to_reg(),
-                        addr,
-                        rex_flags,
-                    );
+                    let addr = &addr.finalize(state, sink).with_allocs(allocs);
+                    emit_std_reg_mem(sink, state, info, prefix, opcode, 2, dst, addr, rex_flags);
                 }
             }
         }
@@ -1111,7 +1106,10 @@ pub(crate) fn emit(
             alternative,
             dst,
         } => {
-            debug_assert_eq!(*alternative, dst.to_reg());
+            let alternative = allocs.next(alternative.to_reg());
+            let dst = allocs.next(dst.to_reg().to_reg());
+            debug_assert_eq!(alternative, dst);
+            let consequent = consequent.clone().to_reg_mem().with_allocs(allocs);
 
             // Lowering of the Select IR opcode when the input is an fcmp relies on the fact that
             // this doesn't clobber flags. Make sure to not do so here.
@@ -1125,19 +1123,20 @@ pub(crate) fn emit(
             } else {
                 SseOpcode::Movss
             };
-            let inst =
-                Inst::xmm_unary_rm_r(op, consequent.clone().to_reg_mem(), dst.to_writable_reg());
-            inst.emit(sink, info, state);
+            let inst = Inst::xmm_unary_rm_r(op, consequent, Writable::from_reg(dst));
+            inst.emit(&[], sink, info, state);
 
             sink.bind_label(next);
         }
 
         Inst::Push64 { src } => {
+            let src = src.clone().to_reg_mem_imm().with_allocs(allocs);
+
             if info.flags.enable_probestack() {
                 sink.add_trap(state.cur_srcloc(), TrapCode::StackOverflow);
             }
 
-            match src.clone().to_reg_mem_imm() {
+            match src {
                 RegMemImm::Reg { reg } => {
                     let enc_reg = int_reg_enc(reg);
                     let rex = 0x40 | ((enc_reg >> 3) & 1);
@@ -1175,7 +1174,8 @@ pub(crate) fn emit(
         }
 
         Inst::Pop64 { dst } => {
-            let enc_dst = int_reg_enc(dst.to_reg());
+            let dst = allocs.next(dst.to_reg().to_reg());
+            let enc_dst = int_reg_enc(dst);
             if enc_dst >= 8 {
                 // 0x41 == REX.{W=0, B=1}.  It seems that REX.W is irrelevant here.
                 sink.put1(0x41);
@@ -1202,13 +1202,15 @@ pub(crate) fn emit(
         }
 
         Inst::CallUnknown { dest, opcode, .. } => {
+            let dest = dest.with_allocs(allocs);
+
             if info.flags.enable_probestack() {
                 sink.add_trap(state.cur_srcloc(), TrapCode::StackOverflow);
             }
             let start_offset = sink.cur_offset();
             match dest {
                 RegMem::Reg { reg } => {
-                    let reg_enc = int_reg_enc(*reg);
+                    let reg_enc = int_reg_enc(reg);
                     emit_std_enc_enc(
                         sink,
                         LegacyPrefixes::None,
@@ -1244,7 +1246,7 @@ pub(crate) fn emit(
             }
         }
 
-        Inst::Ret {} => sink.put1(0xC3),
+        Inst::Ret { .. } => sink.put1(0xC3),
 
         Inst::JmpKnown { dst } => {
             let br_start = sink.cur_offset();
@@ -1305,9 +1307,11 @@ pub(crate) fn emit(
         }
 
         Inst::JmpUnknown { target } => {
+            let target = target.with_allocs(allocs);
+
             match target {
                 RegMem::Reg { reg } => {
-                    let reg_enc = int_reg_enc(*reg);
+                    let reg_enc = int_reg_enc(reg);
                     emit_std_enc_enc(
                         sink,
                         LegacyPrefixes::None,
@@ -1344,6 +1348,10 @@ pub(crate) fn emit(
             default_target,
             ..
         } => {
+            let idx = allocs.next(*idx);
+            let tmp1 = Writable::from_reg(allocs.next(tmp1.to_reg()));
+            let tmp2 = Writable::from_reg(allocs.next(tmp2.to_reg()));
+
             // This sequence is *one* instruction in the vcode, and is expanded only here at
             // emission time, because we cannot allow the regalloc to insert spills/reloads in
             // the middle; we depend on hardcoded PC-rel addressing below.
@@ -1368,13 +1376,13 @@ pub(crate) fn emit(
             one_way_jmp(sink, CC::NB, *default_target); // idx unsigned >= jmp table size
 
             // Copy the index (and make sure to clear the high 32-bits lane of tmp2).
-            let inst = Inst::movzx_rm_r(ExtMode::LQ, RegMem::reg(*idx), *tmp2);
-            inst.emit(sink, info, state);
+            let inst = Inst::movzx_rm_r(ExtMode::LQ, RegMem::reg(idx), tmp2);
+            inst.emit(&[], sink, info, state);
 
             // Load base address of jump table.
             let start_of_jumptable = sink.get_label();
-            let inst = Inst::lea(Amode::rip_relative(start_of_jumptable), *tmp1);
-            inst.emit(sink, info, state);
+            let inst = Inst::lea(Amode::rip_relative(start_of_jumptable), tmp1);
+            inst.emit(&[], sink, info, state);
 
             // Load value out of the jump table. It's a relative offset to the target block, so it
             // might be negative; use a sign-extension.
@@ -1386,22 +1394,22 @@ pub(crate) fn emit(
                     Gpr::new(tmp2.to_reg()).unwrap(),
                     2,
                 )),
-                *tmp2,
+                tmp2,
             );
-            inst.emit(sink, info, state);
+            inst.emit(&[], sink, info, state);
 
             // Add base of jump table to jump-table-sourced block offset.
             let inst = Inst::alu_rmi_r(
                 OperandSize::Size64,
                 AluRmiROpcode::Add,
                 RegMemImm::reg(tmp2.to_reg()),
-                *tmp1,
+                tmp1,
             );
-            inst.emit(sink, info, state);
+            inst.emit(&[], sink, info, state);
 
             // Branch to computed address.
             let inst = Inst::jmp_unknown(RegMem::reg(tmp1.to_reg()));
-            inst.emit(sink, info, state);
+            inst.emit(&[], sink, info, state);
 
             // Emit jump table (table of 32-bit offsets).
             sink.bind_label(start_of_jumptable);
@@ -1426,7 +1434,7 @@ pub(crate) fn emit(
 
             // Trap!
             let inst = Inst::trap(*trap_code);
-            inst.emit(sink, info, state);
+            inst.emit(&[], sink, info, state);
 
             sink.bind_label(else_label);
         }
@@ -1436,6 +1444,9 @@ pub(crate) fn emit(
             src: src_e,
             dst: reg_g,
         } => {
+            let reg_g = allocs.next(reg_g.to_reg().to_reg());
+            let src_e = src_e.clone().to_reg_mem().with_allocs(allocs);
+
             let rex = RexFlags::clear_w();
 
             let (prefix, opcode, num_opcodes) = match op {
@@ -1474,17 +1485,9 @@ pub(crate) fn emit(
                 _ => unimplemented!("Opcode {:?} not implemented", op),
             };
 
-            match src_e.clone().to_reg_mem() {
+            match src_e {
                 RegMem::Reg { reg: reg_e } => {
-                    emit_std_reg_reg(
-                        sink,
-                        prefix,
-                        opcode,
-                        num_opcodes,
-                        reg_g.to_reg().to_reg(),
-                        reg_e,
-                        rex,
-                    );
+                    emit_std_reg_reg(sink, prefix, opcode, num_opcodes, reg_g, reg_e, rex);
                 }
                 RegMem::Mem { addr } => {
                     let addr = &addr.finalize(state, sink);
@@ -1495,7 +1498,7 @@ pub(crate) fn emit(
                         prefix,
                         opcode,
                         num_opcodes,
-                        reg_g.to_reg().to_reg(),
+                        reg_g,
                         addr,
                         rex,
                     );
@@ -1504,21 +1507,24 @@ pub(crate) fn emit(
         }
 
         Inst::XmmUnaryRmREvex { op, src, dst } => {
+            let dst = allocs.next(dst.to_reg().to_reg());
+            let src = src.clone().to_reg_mem().with_allocs(allocs);
+
             let (prefix, map, w, opcode) = match op {
                 Avx512Opcode::Vcvtudq2ps => (LegacyPrefixes::_F2, OpcodeMap::_0F, false, 0x7a),
                 Avx512Opcode::Vpabsq => (LegacyPrefixes::_66, OpcodeMap::_0F38, true, 0x1f),
                 Avx512Opcode::Vpopcntb => (LegacyPrefixes::_66, OpcodeMap::_0F38, false, 0x54),
                 _ => unimplemented!("Opcode {:?} not implemented", op),
             };
-            match src.clone().to_reg_mem() {
+            match src {
                 RegMem::Reg { reg: src } => EvexInstruction::new()
                     .length(EvexVectorLength::V128)
                     .prefix(prefix)
                     .map(map)
                     .w(w)
                     .opcode(opcode)
-                    .reg(dst.to_reg().get_hw_encoding())
-                    .rm(src.get_hw_encoding())
+                    .reg(dst.to_real_reg().unwrap().hw_enc())
+                    .rm(src.to_real_reg().unwrap().hw_enc())
                     .encode(sink),
                 _ => todo!(),
             };
@@ -1530,7 +1536,17 @@ pub(crate) fn emit(
             src2: src_e,
             dst: reg_g,
         } => {
-            debug_assert_eq!(*src1, reg_g.to_reg());
+            let (src_e, reg_g) = if inst.produces_const() {
+                let reg_g = allocs.next(reg_g.to_reg().to_reg());
+                (RegMem::Reg { reg: reg_g }, reg_g)
+            } else {
+                let src1 = allocs.next(src1.to_reg());
+                let reg_g = allocs.next(reg_g.to_reg().to_reg());
+                let src_e = src_e.clone().to_reg_mem().with_allocs(allocs);
+                debug_assert_eq!(src1, reg_g);
+                (src_e, reg_g)
+            };
+
             let rex = RexFlags::clear_w();
             let (prefix, opcode, length) = match op {
                 SseOpcode::Addps => (LegacyPrefixes::None, 0x0F58, 2),
@@ -1637,31 +1653,13 @@ pub(crate) fn emit(
                 _ => unimplemented!("Opcode {:?} not implemented", op),
             };
 
-            match src_e.clone().to_reg_mem() {
+            match src_e {
                 RegMem::Reg { reg: reg_e } => {
-                    emit_std_reg_reg(
-                        sink,
-                        prefix,
-                        opcode,
-                        length,
-                        reg_g.to_reg().to_reg(),
-                        reg_e,
-                        rex,
-                    );
+                    emit_std_reg_reg(sink, prefix, opcode, length, reg_g, reg_e, rex);
                 }
                 RegMem::Mem { addr } => {
                     let addr = &addr.finalize(state, sink);
-                    emit_std_reg_mem(
-                        sink,
-                        state,
-                        info,
-                        prefix,
-                        opcode,
-                        length,
-                        reg_g.to_reg().to_reg(),
-                        addr,
-                        rex,
-                    );
+                    emit_std_reg_mem(sink, state, info, prefix, opcode, length, reg_g, addr, rex);
                 }
             }
         }
@@ -1672,21 +1670,25 @@ pub(crate) fn emit(
             src2,
             dst,
         } => {
+            let dst = allocs.next(dst.to_reg().to_reg());
+            let src2 = allocs.next(src2.to_reg());
+            let src1 = src1.clone().to_reg_mem().with_allocs(allocs);
+
             let (w, opcode) = match op {
                 Avx512Opcode::Vpermi2b => (false, 0x75),
                 Avx512Opcode::Vpmullq => (true, 0x40),
                 _ => unimplemented!("Opcode {:?} not implemented", op),
             };
-            match src1.clone().to_reg_mem() {
+            match src1 {
                 RegMem::Reg { reg: src } => EvexInstruction::new()
                     .length(EvexVectorLength::V128)
                     .prefix(LegacyPrefixes::_66)
                     .map(OpcodeMap::_0F38)
                     .w(w)
                     .opcode(opcode)
-                    .reg(dst.to_reg().get_hw_encoding())
-                    .rm(src.get_hw_encoding())
-                    .vvvvv(src2.get_hw_encoding())
+                    .reg(dst.to_real_reg().unwrap().hw_enc())
+                    .rm(src.to_real_reg().unwrap().hw_enc())
+                    .vvvvv(src2.to_real_reg().unwrap().hw_enc())
                     .encode(sink),
                 _ => todo!(),
             };
@@ -1699,7 +1701,10 @@ pub(crate) fn emit(
             rhs,
             dst,
         } => {
-            debug_assert_eq!(*rhs, dst.to_reg());
+            let rhs = allocs.next(rhs.to_reg());
+            let lhs = allocs.next(lhs.to_reg());
+            let dst = allocs.next(dst.to_reg().to_reg());
+            debug_assert_eq!(rhs, dst);
 
             // Generates the following sequence:
             // cmpss/cmpsd %lhs, %rhs_dst
@@ -1750,8 +1755,8 @@ pub(crate) fn emit(
                 _ => unreachable!(),
             };
 
-            let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(lhs.to_reg()), dst.to_reg().to_reg());
-            inst.emit(sink, info, state);
+            let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(lhs), dst);
+            inst.emit(&[], sink, info, state);
 
             one_way_jmp(sink, CC::NZ, do_min_max);
             one_way_jmp(sink, CC::P, propagate_nan);
@@ -1760,24 +1765,25 @@ pub(crate) fn emit(
             // and negative zero. These instructions merge the sign bits in that
             // case, and are no-ops otherwise.
             let op = if *is_min { or_op } else { and_op };
-            let inst = Inst::xmm_rm_r(op, RegMem::reg(lhs.to_reg()), dst.to_writable_reg());
-            inst.emit(sink, info, state);
+            let inst = Inst::xmm_rm_r(op, RegMem::reg(lhs), Writable::from_reg(dst));
+            inst.emit(&[], sink, info, state);
 
             let inst = Inst::jmp_known(done);
-            inst.emit(sink, info, state);
+            inst.emit(&[], sink, info, state);
 
             // x86's min/max are not symmetric; if either operand is a NaN, they return the
             // read-only operand: perform an addition between the two operands, which has the
             // desired NaN propagation effects.
             sink.bind_label(propagate_nan);
-            let inst = Inst::xmm_rm_r(add_op, RegMem::reg(lhs.to_reg()), dst.to_writable_reg());
-            inst.emit(sink, info, state);
+            let inst = Inst::xmm_rm_r(add_op, RegMem::reg(lhs), Writable::from_reg(dst));
+            inst.emit(&[], sink, info, state);
 
             one_way_jmp(sink, CC::P, done);
 
             sink.bind_label(do_min_max);
-            let inst = Inst::xmm_rm_r(min_max_op, RegMem::reg(lhs.to_reg()), dst.to_writable_reg());
-            inst.emit(sink, info, state);
+
+            let inst = Inst::xmm_rm_r(min_max_op, RegMem::reg(lhs), Writable::from_reg(dst));
+            inst.emit(&[], sink, info, state);
 
             sink.bind_label(done);
         }
@@ -1790,7 +1796,21 @@ pub(crate) fn emit(
             imm,
             size,
         } => {
-            debug_assert_eq!(*src1, dst.to_reg());
+            let (src2, dst) = if inst.produces_const() {
+                let dst = allocs.next(dst.to_reg());
+                (RegMem::Reg { reg: dst }, dst)
+            } else if !op.uses_src1() {
+                let dst = allocs.next(dst.to_reg());
+                let src2 = src2.with_allocs(allocs);
+                (src2, dst)
+            } else {
+                let src1 = allocs.next(*src1);
+                let dst = allocs.next(dst.to_reg());
+                let src2 = src2.with_allocs(allocs);
+                debug_assert_eq!(src1, dst);
+                (src2, dst)
+            };
+
             let (prefix, opcode, len) = match op {
                 SseOpcode::Cmpps => (LegacyPrefixes::None, 0x0FC2, 2),
                 SseOpcode::Cmppd => (LegacyPrefixes::_66, 0x0FC2, 2),
@@ -1824,9 +1844,9 @@ pub(crate) fn emit(
             match src2 {
                 RegMem::Reg { reg } => {
                     if regs_swapped {
-                        emit_std_reg_reg(sink, prefix, opcode, len, *reg, dst.to_reg(), rex);
+                        emit_std_reg_reg(sink, prefix, opcode, len, reg, dst, rex);
                     } else {
-                        emit_std_reg_reg(sink, prefix, opcode, len, dst.to_reg(), *reg, rex);
+                        emit_std_reg_reg(sink, prefix, opcode, len, dst, reg, rex);
                     }
                 }
                 RegMem::Mem { addr } => {
@@ -1835,26 +1855,17 @@ pub(crate) fn emit(
                         !regs_swapped,
                         "No existing way to encode a mem argument in the ModRM r/m field."
                     );
-                    emit_std_reg_mem(
-                        sink,
-                        state,
-                        info,
-                        prefix,
-                        opcode,
-                        len,
-                        dst.to_reg(),
-                        addr,
-                        rex,
-                    );
+                    emit_std_reg_mem(sink, state, info, prefix, opcode, len, dst, addr, rex);
                 }
             }
             sink.put1(*imm);
         }
 
         Inst::XmmLoadConst { src, dst, ty } => {
+            let dst = allocs.next(dst.to_reg());
             let load_offset = Amode::rip_relative(sink.get_label_for_constant(*src));
-            let load = Inst::load(*ty, load_offset, *dst, ExtKind::None);
-            load.emit(sink, info, state);
+            let load = Inst::load(*ty, load_offset, Writable::from_reg(dst), ExtKind::None);
+            load.emit(&[], sink, info, state);
         }
 
         Inst::XmmUninitializedValue { .. } => {
@@ -1863,6 +1874,9 @@ pub(crate) fn emit(
         }
 
         Inst::XmmMovRM { op, src, dst } => {
+            let src = allocs.next(*src);
+            let dst = dst.with_allocs(allocs);
+
             let (prefix, opcode) = match op {
                 SseOpcode::Movaps => (LegacyPrefixes::None, 0x0F29),
                 SseOpcode::Movapd => (LegacyPrefixes::_66, 0x0F29),
@@ -1881,7 +1895,7 @@ pub(crate) fn emit(
                 prefix,
                 opcode,
                 2,
-                *src,
+                src,
                 dst,
                 RexFlags::clear_w(),
             );
@@ -1893,6 +1907,9 @@ pub(crate) fn emit(
             dst,
             dst_size,
         } => {
+            let src = allocs.next(src.to_reg());
+            let dst = allocs.next(dst.to_reg().to_reg());
+
             let (prefix, opcode, dst_first) = match op {
                 SseOpcode::Cvttss2si => (LegacyPrefixes::_F3, 0x0F2C, true),
                 SseOpcode::Cvttsd2si => (LegacyPrefixes::_F2, 0x0F2C, true),
@@ -1905,11 +1922,7 @@ pub(crate) fn emit(
                 _ => panic!("unexpected opcode {:?}", op),
             };
             let rex = RexFlags::from(*dst_size);
-            let (src, dst) = if dst_first {
-                (dst.to_reg().to_reg(), src.to_reg())
-            } else {
-                (src.to_reg(), dst.to_reg().to_reg())
-            };
+            let (src, dst) = if dst_first { (dst, src) } else { (src, dst) };
 
             emit_std_reg_reg(sink, prefix, opcode, 2, src, dst, rex);
         }
@@ -1920,6 +1933,9 @@ pub(crate) fn emit(
             dst: reg_g,
             src_size,
         } => {
+            let reg_g = allocs.next(reg_g.to_reg().to_reg());
+            let src_e = src_e.clone().to_reg_mem().with_allocs(allocs);
+
             let (prefix, opcode) = match op {
                 // Movd and movq use the same opcode; the presence of the REX prefix (set below)
                 // actually determines which is used.
@@ -1929,28 +1945,21 @@ pub(crate) fn emit(
                 _ => panic!("unexpected opcode {:?}", op),
             };
             let rex = RexFlags::from(*src_size);
-            match src_e.clone().to_reg_mem() {
+            match src_e {
                 RegMem::Reg { reg: reg_e } => {
-                    emit_std_reg_reg(sink, prefix, opcode, 2, reg_g.to_reg().to_reg(), reg_e, rex);
+                    emit_std_reg_reg(sink, prefix, opcode, 2, reg_g, reg_e, rex);
                 }
                 RegMem::Mem { addr } => {
                     let addr = &addr.finalize(state, sink);
-                    emit_std_reg_mem(
-                        sink,
-                        state,
-                        info,
-                        prefix,
-                        opcode,
-                        2,
-                        reg_g.to_reg().to_reg(),
-                        addr,
-                        rex,
-                    );
+                    emit_std_reg_mem(sink, state, info, prefix, opcode, 2, reg_g, addr, rex);
                 }
             }
         }
 
         Inst::XmmCmpRmR { op, src, dst } => {
+            let dst = allocs.next(dst.to_reg());
+            let src = src.clone().to_reg_mem().with_allocs(allocs);
+
             let rex = RexFlags::clear_w();
             let (prefix, opcode, len) = match op {
                 SseOpcode::Ptest => (LegacyPrefixes::_66, 0x0F3817, 3),
@@ -1959,23 +1968,13 @@ pub(crate) fn emit(
                 _ => unimplemented!("Emit xmm cmp rm r"),
             };
 
-            match src.clone().to_reg_mem() {
+            match src {
                 RegMem::Reg { reg } => {
-                    emit_std_reg_reg(sink, prefix, opcode, len, dst.to_reg(), reg, rex);
+                    emit_std_reg_reg(sink, prefix, opcode, len, dst, reg, rex);
                 }
                 RegMem::Mem { addr } => {
                     let addr = &addr.finalize(state, sink);
-                    emit_std_reg_mem(
-                        sink,
-                        state,
-                        info,
-                        prefix,
-                        opcode,
-                        len,
-                        dst.to_reg(),
-                        addr,
-                        rex,
-                    );
+                    emit_std_reg_mem(sink, state, info, prefix, opcode, len, dst, addr, rex);
                 }
             }
         }
@@ -1987,6 +1986,11 @@ pub(crate) fn emit(
             tmp_gpr1,
             tmp_gpr2,
         } => {
+            let src = allocs.next(src.to_reg().to_reg());
+            let dst = allocs.next(dst.to_reg().to_reg());
+            let tmp_gpr1 = allocs.next(tmp_gpr1.to_reg().to_reg());
+            let tmp_gpr2 = allocs.next(tmp_gpr2.to_reg().to_reg());
+
             // Note: this sequence is specific to 64-bit mode; a 32-bit mode would require a
             // different sequence.
             //
@@ -2021,12 +2025,8 @@ pub(crate) fn emit(
             // If x seen as a signed int64 is not negative, a signed-conversion will do the right
             // thing.
             // TODO use tst src, src here.
-            let inst = Inst::cmp_rmi_r(
-                OperandSize::Size64,
-                RegMemImm::imm(0),
-                src.to_reg().to_reg(),
-            );
-            inst.emit(sink, info, state);
+            let inst = Inst::cmp_rmi_r(OperandSize::Size64, RegMemImm::imm(0), src);
+            inst.emit(&[], sink, info, state);
 
             one_way_jmp(sink, CC::L, handle_negative);
 
@@ -2036,63 +2036,55 @@ pub(crate) fn emit(
                 sink,
                 info,
                 state,
-                src.to_reg().to_reg(),
-                dst.to_writable_reg(),
+                src,
+                Writable::from_reg(dst),
                 *dst_size == OperandSize::Size64,
             );
 
             let inst = Inst::jmp_known(done);
-            inst.emit(sink, info, state);
+            inst.emit(&[], sink, info, state);
 
             sink.bind_label(handle_negative);
 
             // Divide x by two to get it in range for the signed conversion, keep the LSB, and
             // scale it back up on the FP side.
-            let inst = Inst::gen_move(
-                tmp_gpr1.to_writable_reg(),
-                src.to_reg().to_reg(),
-                types::I64,
-            );
-            inst.emit(sink, info, state);
+            let inst = Inst::gen_move(Writable::from_reg(tmp_gpr1), src, types::I64);
+            inst.emit(&[], sink, info, state);
 
             // tmp_gpr1 := src >> 1
             let inst = Inst::shift_r(
                 OperandSize::Size64,
                 ShiftKind::ShiftRightLogical,
                 Some(1),
-                tmp_gpr1.to_writable_reg(),
+                Writable::from_reg(tmp_gpr1),
             );
-            inst.emit(sink, info, state);
+            inst.emit(&[], sink, info, state);
 
-            let inst = Inst::gen_move(
-                tmp_gpr2.to_writable_reg(),
-                src.to_reg().to_reg(),
-                types::I64,
-            );
-            inst.emit(sink, info, state);
+            let inst = Inst::gen_move(Writable::from_reg(tmp_gpr2), src, types::I64);
+            inst.emit(&[], sink, info, state);
 
             let inst = Inst::alu_rmi_r(
                 OperandSize::Size64,
                 AluRmiROpcode::And,
                 RegMemImm::imm(1),
-                tmp_gpr2.to_writable_reg(),
+                Writable::from_reg(tmp_gpr2),
             );
-            inst.emit(sink, info, state);
+            inst.emit(&[], sink, info, state);
 
             let inst = Inst::alu_rmi_r(
                 OperandSize::Size64,
                 AluRmiROpcode::Or,
-                RegMemImm::reg(tmp_gpr1.to_reg().to_reg()),
-                tmp_gpr2.to_writable_reg(),
+                RegMemImm::reg(tmp_gpr1),
+                Writable::from_reg(tmp_gpr2),
             );
-            inst.emit(sink, info, state);
+            inst.emit(&[], sink, info, state);
 
             emit_signed_cvt(
                 sink,
                 info,
                 state,
-                tmp_gpr2.to_reg().to_reg(),
-                dst.to_writable_reg(),
+                tmp_gpr2,
+                Writable::from_reg(dst),
                 *dst_size == OperandSize::Size64,
             );
 
@@ -2101,12 +2093,8 @@ pub(crate) fn emit(
             } else {
                 SseOpcode::Addss
             };
-            let inst = Inst::xmm_rm_r(
-                add_op,
-                RegMem::reg(dst.to_reg().to_reg()),
-                dst.to_writable_reg(),
-            );
-            inst.emit(sink, info, state);
+            let inst = Inst::xmm_rm_r(add_op, RegMem::reg(dst), Writable::from_reg(dst));
+            inst.emit(&[], sink, info, state);
 
             sink.bind_label(done);
         }
@@ -2120,6 +2108,11 @@ pub(crate) fn emit(
             tmp_gpr,
             tmp_xmm,
         } => {
+            let src = allocs.next(src.to_reg().to_reg());
+            let dst = allocs.next(dst.to_reg().to_reg());
+            let tmp_gpr = allocs.next(tmp_gpr.to_reg().to_reg());
+            let tmp_xmm = allocs.next(tmp_xmm.to_reg().to_reg());
+
             // Emits the following common sequence:
             //
             // cvttss2si/cvttsd2si %src, %dst
@@ -2166,8 +2159,6 @@ pub(crate) fn emit(
             //
             // done:
 
-            let src = src.to_reg();
-
             let (cast_op, cmp_op, trunc_op) = match src_size {
                 OperandSize::Size64 => (SseOpcode::Movq, SseOpcode::Ucomisd, SseOpcode::Cvttsd2si),
                 OperandSize::Size32 => (SseOpcode::Movd, SseOpcode::Ucomiss, SseOpcode::Cvttss2si),
@@ -2178,19 +2169,19 @@ pub(crate) fn emit(
             let not_nan = sink.get_label();
 
             // The truncation.
-            let inst = Inst::xmm_to_gpr(trunc_op, src.to_reg(), dst.to_writable_reg(), *dst_size);
-            inst.emit(sink, info, state);
+            let inst = Inst::xmm_to_gpr(trunc_op, src, Writable::from_reg(dst), *dst_size);
+            inst.emit(&[], sink, info, state);
 
             // Compare against 1, in case of overflow the dst operand was INT_MIN.
-            let inst = Inst::cmp_rmi_r(*dst_size, RegMemImm::imm(1), dst.to_reg().to_reg());
-            inst.emit(sink, info, state);
+            let inst = Inst::cmp_rmi_r(*dst_size, RegMemImm::imm(1), dst);
+            inst.emit(&[], sink, info, state);
 
             one_way_jmp(sink, CC::NO, done); // no overflow => done
 
             // Check for NaN.
 
-            let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(src.to_reg()), src.to_reg());
-            inst.emit(sink, info, state);
+            let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(src), src);
+            inst.emit(&[], sink, info, state);
 
             one_way_jmp(sink, CC::NP, not_nan); // go to not_nan if not a NaN
 
@@ -2199,13 +2190,13 @@ pub(crate) fn emit(
                 let inst = Inst::alu_rmi_r(
                     *dst_size,
                     AluRmiROpcode::Xor,
-                    RegMemImm::reg(dst.to_reg().to_reg()),
-                    dst.to_writable_reg(),
+                    RegMemImm::reg(dst),
+                    Writable::from_reg(dst),
                 );
-                inst.emit(sink, info, state);
+                inst.emit(&[], sink, info, state);
 
                 let inst = Inst::jmp_known(done);
-                inst.emit(sink, info, state);
+                inst.emit(&[], sink, info, state);
 
                 sink.bind_label(not_nan);
 
@@ -2214,17 +2205,13 @@ pub(crate) fn emit(
                 // Zero out tmp_xmm.
                 let inst = Inst::xmm_rm_r(
                     SseOpcode::Xorpd,
-                    RegMem::reg(tmp_xmm.to_reg().to_reg()),
-                    tmp_xmm.to_writable_reg(),
+                    RegMem::reg(tmp_xmm),
+                    Writable::from_reg(tmp_xmm),
                 );
-                inst.emit(sink, info, state);
+                inst.emit(&[], sink, info, state);
 
-                let inst = Inst::xmm_cmp_rm_r(
-                    cmp_op,
-                    RegMem::reg(src.to_reg()),
-                    tmp_xmm.to_reg().to_reg(),
-                );
-                inst.emit(sink, info, state);
+                let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(src), tmp_xmm);
+                inst.emit(&[], sink, info, state);
 
                 // Jump if >= to done.
                 one_way_jmp(sink, CC::NB, done);
@@ -2234,18 +2221,18 @@ pub(crate) fn emit(
                     let inst = Inst::imm(
                         OperandSize::Size64,
                         0x7fffffffffffffff,
-                        dst.to_writable_reg(),
+                        Writable::from_reg(dst),
                     );
-                    inst.emit(sink, info, state);
+                    inst.emit(&[], sink, info, state);
                 } else {
-                    let inst = Inst::imm(OperandSize::Size32, 0x7fffffff, dst.to_writable_reg());
-                    inst.emit(sink, info, state);
+                    let inst = Inst::imm(OperandSize::Size32, 0x7fffffff, Writable::from_reg(dst));
+                    inst.emit(&[], sink, info, state);
                 }
             } else {
                 let check_positive = sink.get_label();
 
                 let inst = Inst::trap(TrapCode::BadConversionToInteger);
-                inst.emit(sink, info, state);
+                inst.emit(&[], sink, info, state);
 
                 // Check if INT_MIN was the correct result: determine the smallest floating point
                 // number that would convert to INT_MIN, put it in a temporary register, and compare
@@ -2261,8 +2248,8 @@ pub(crate) fn emit(
                     OperandSize::Size32 => {
                         let cst = Ieee32::pow2(output_bits - 1).neg().bits();
                         let inst =
-                            Inst::imm(OperandSize::Size32, cst as u64, tmp_gpr.to_writable_reg());
-                        inst.emit(sink, info, state);
+                            Inst::imm(OperandSize::Size32, cst as u64, Writable::from_reg(tmp_gpr));
+                        inst.emit(&[], sink, info, state);
                     }
                     OperandSize::Size64 => {
                         // An f64 can represent `i32::min_value() - 1` exactly with precision to spare,
@@ -2274,32 +2261,28 @@ pub(crate) fn emit(
                             Ieee64::pow2(output_bits - 1).neg()
                         };
                         let inst =
-                            Inst::imm(OperandSize::Size64, cst.bits(), tmp_gpr.to_writable_reg());
-                        inst.emit(sink, info, state);
+                            Inst::imm(OperandSize::Size64, cst.bits(), Writable::from_reg(tmp_gpr));
+                        inst.emit(&[], sink, info, state);
                     }
                     _ => unreachable!(),
                 }
 
                 let inst = Inst::gpr_to_xmm(
                     cast_op,
-                    RegMem::reg(tmp_gpr.to_reg().to_reg()),
+                    RegMem::reg(tmp_gpr),
                     *src_size,
-                    tmp_xmm.to_writable_reg(),
+                    Writable::from_reg(tmp_xmm),
                 );
-                inst.emit(sink, info, state);
+                inst.emit(&[], sink, info, state);
 
-                let inst = Inst::xmm_cmp_rm_r(
-                    cmp_op,
-                    RegMem::reg(tmp_xmm.to_reg().to_reg()),
-                    src.to_reg(),
-                );
-                inst.emit(sink, info, state);
+                let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(tmp_xmm), src);
+                inst.emit(&[], sink, info, state);
 
                 // jump over trap if src >= or > threshold
                 one_way_jmp(sink, no_overflow_cc, check_positive);
 
                 let inst = Inst::trap(TrapCode::IntegerOverflow);
-                inst.emit(sink, info, state);
+                inst.emit(&[], sink, info, state);
 
                 // If positive, it was a real overflow.
 
@@ -2308,22 +2291,18 @@ pub(crate) fn emit(
                 // Zero out the tmp_xmm register.
                 let inst = Inst::xmm_rm_r(
                     SseOpcode::Xorpd,
-                    RegMem::reg(tmp_xmm.to_reg().to_reg()),
-                    tmp_xmm.to_writable_reg(),
+                    RegMem::reg(tmp_xmm),
+                    Writable::from_reg(tmp_xmm),
                 );
-                inst.emit(sink, info, state);
+                inst.emit(&[], sink, info, state);
 
-                let inst = Inst::xmm_cmp_rm_r(
-                    cmp_op,
-                    RegMem::reg(src.to_reg()),
-                    tmp_xmm.to_reg().to_reg(),
-                );
-                inst.emit(sink, info, state);
+                let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(src), tmp_xmm);
+                inst.emit(&[], sink, info, state);
 
                 one_way_jmp(sink, CC::NB, done); // jump over trap if 0 >= src
 
                 let inst = Inst::trap(TrapCode::IntegerOverflow);
-                inst.emit(sink, info, state);
+                inst.emit(&[], sink, info, state);
             }
 
             sink.bind_label(done);
@@ -2338,6 +2317,11 @@ pub(crate) fn emit(
             tmp_gpr,
             tmp_xmm,
         } => {
+            let src = allocs.next(src.to_reg().to_reg());
+            let dst = allocs.next(dst.to_reg().to_reg());
+            let tmp_gpr = allocs.next(tmp_gpr.to_reg().to_reg());
+            let tmp_xmm = allocs.next(tmp_xmm.to_reg().to_reg());
+
             // The only difference in behavior between saturating and non-saturating is how we
             // handle errors. Emits the following sequence:
             //
@@ -2397,23 +2381,19 @@ pub(crate) fn emit(
                 _ => unreachable!(),
             };
 
-            let inst = Inst::imm(*src_size, cst, tmp_gpr.to_writable_reg());
-            inst.emit(sink, info, state);
+            let inst = Inst::imm(*src_size, cst, Writable::from_reg(tmp_gpr));
+            inst.emit(&[], sink, info, state);
 
             let inst = Inst::gpr_to_xmm(
                 cast_op,
-                RegMem::reg(tmp_gpr.to_reg().to_reg()),
+                RegMem::reg(tmp_gpr),
                 *src_size,
-                tmp_xmm.to_writable_reg(),
+                Writable::from_reg(tmp_xmm),
             );
-            inst.emit(sink, info, state);
+            inst.emit(&[], sink, info, state);
 
-            let inst = Inst::xmm_cmp_rm_r(
-                cmp_op,
-                RegMem::reg(tmp_xmm.to_reg().to_reg()),
-                src.to_reg().to_reg(),
-            );
-            inst.emit(sink, info, state);
+            let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(tmp_xmm), src);
+            inst.emit(&[], sink, info, state);
 
             let handle_large = sink.get_label();
             one_way_jmp(sink, CC::NB, handle_large); // jump to handle_large if src >= large_threshold
@@ -2426,17 +2406,17 @@ pub(crate) fn emit(
                 let inst = Inst::alu_rmi_r(
                     *dst_size,
                     AluRmiROpcode::Xor,
-                    RegMemImm::reg(dst.to_reg().to_reg()),
-                    dst.to_writable_reg(),
+                    RegMemImm::reg(dst),
+                    Writable::from_reg(dst),
                 );
-                inst.emit(sink, info, state);
+                inst.emit(&[], sink, info, state);
 
                 let inst = Inst::jmp_known(done);
-                inst.emit(sink, info, state);
+                inst.emit(&[], sink, info, state);
             } else {
                 // Trap.
                 let inst = Inst::trap(TrapCode::BadConversionToInteger);
-                inst.emit(sink, info, state);
+                inst.emit(&[], sink, info, state);
             }
 
             sink.bind_label(not_nan);
@@ -2444,16 +2424,11 @@ pub(crate) fn emit(
             // Actual truncation for small inputs: if the result is not positive, then we had an
             // overflow.
 
-            let inst = Inst::xmm_to_gpr(
-                trunc_op,
-                src.to_reg().to_reg(),
-                dst.to_writable_reg(),
-                *dst_size,
-            );
-            inst.emit(sink, info, state);
+            let inst = Inst::xmm_to_gpr(trunc_op, src, Writable::from_reg(dst), *dst_size);
+            inst.emit(&[], sink, info, state);
 
-            let inst = Inst::cmp_rmi_r(*dst_size, RegMemImm::imm(0), dst.to_reg().to_reg());
-            inst.emit(sink, info, state);
+            let inst = Inst::cmp_rmi_r(*dst_size, RegMemImm::imm(0), dst);
+            inst.emit(&[], sink, info, state);
 
             one_way_jmp(sink, CC::NL, done); // if dst >= 0, jump to done
 
@@ -2463,40 +2438,31 @@ pub(crate) fn emit(
                 let inst = Inst::alu_rmi_r(
                     *dst_size,
                     AluRmiROpcode::Xor,
-                    RegMemImm::reg(dst.to_reg().to_reg()),
-                    dst.to_writable_reg(),
+                    RegMemImm::reg(dst),
+                    Writable::from_reg(dst),
                 );
-                inst.emit(sink, info, state);
+                inst.emit(&[], sink, info, state);
 
                 let inst = Inst::jmp_known(done);
-                inst.emit(sink, info, state);
+                inst.emit(&[], sink, info, state);
             } else {
                 // Trap.
                 let inst = Inst::trap(TrapCode::IntegerOverflow);
-                inst.emit(sink, info, state);
+                inst.emit(&[], sink, info, state);
             }
 
             // Now handle large inputs.
 
             sink.bind_label(handle_large);
 
-            let inst = Inst::xmm_rm_r(
-                sub_op,
-                RegMem::reg(tmp_xmm.to_reg().to_reg()),
-                src.to_writable_reg(),
-            );
-            inst.emit(sink, info, state);
+            let inst = Inst::xmm_rm_r(sub_op, RegMem::reg(tmp_xmm), Writable::from_reg(src));
+            inst.emit(&[], sink, info, state);
 
-            let inst = Inst::xmm_to_gpr(
-                trunc_op,
-                src.to_reg().to_reg(),
-                dst.to_writable_reg(),
-                *dst_size,
-            );
-            inst.emit(sink, info, state);
+            let inst = Inst::xmm_to_gpr(trunc_op, src, Writable::from_reg(dst), *dst_size);
+            inst.emit(&[], sink, info, state);
 
-            let inst = Inst::cmp_rmi_r(*dst_size, RegMemImm::imm(0), dst.to_reg().to_reg());
-            inst.emit(sink, info, state);
+            let inst = Inst::cmp_rmi_r(*dst_size, RegMemImm::imm(0), dst);
+            inst.emit(&[], sink, info, state);
 
             let next_is_large = sink.get_label();
             one_way_jmp(sink, CC::NL, next_is_large); // if dst >= 0, jump to next_is_large
@@ -2511,47 +2477,49 @@ pub(crate) fn emit(
                     } else {
                         u32::max_value() as u64
                     },
-                    dst.to_writable_reg(),
+                    Writable::from_reg(dst),
                 );
-                inst.emit(sink, info, state);
+                inst.emit(&[], sink, info, state);
 
                 let inst = Inst::jmp_known(done);
-                inst.emit(sink, info, state);
+                inst.emit(&[], sink, info, state);
             } else {
                 let inst = Inst::trap(TrapCode::IntegerOverflow);
-                inst.emit(sink, info, state);
+                inst.emit(&[], sink, info, state);
             }
 
             sink.bind_label(next_is_large);
 
             if *dst_size == OperandSize::Size64 {
-                let inst = Inst::imm(OperandSize::Size64, 1 << 63, tmp_gpr.to_writable_reg());
-                inst.emit(sink, info, state);
+                let inst = Inst::imm(OperandSize::Size64, 1 << 63, Writable::from_reg(tmp_gpr));
+                inst.emit(&[], sink, info, state);
 
                 let inst = Inst::alu_rmi_r(
                     OperandSize::Size64,
                     AluRmiROpcode::Add,
-                    RegMemImm::reg(tmp_gpr.to_reg().to_reg()),
-                    dst.to_writable_reg(),
+                    RegMemImm::reg(tmp_gpr),
+                    Writable::from_reg(dst),
                 );
-                inst.emit(sink, info, state);
+                inst.emit(&[], sink, info, state);
             } else {
                 let inst = Inst::alu_rmi_r(
                     OperandSize::Size32,
                     AluRmiROpcode::Add,
                     RegMemImm::imm(1 << 31),
-                    dst.to_writable_reg(),
+                    Writable::from_reg(dst),
                 );
-                inst.emit(sink, info, state);
+                inst.emit(&[], sink, info, state);
             }
 
             sink.bind_label(done);
         }
 
         Inst::LoadExtName { dst, name, offset } => {
+            let dst = allocs.next(dst.to_reg());
+
             if info.flags.is_pic() {
                 // Generates: movq symbol@GOTPCREL(%rip), %dst
-                let enc_dst = int_reg_enc(dst.to_reg());
+                let enc_dst = int_reg_enc(dst);
                 sink.put1(0x48 | ((enc_dst >> 3) & 1) << 2);
                 sink.put1(0x8B);
                 sink.put1(0x05 | ((enc_dst & 7) << 3));
@@ -2575,7 +2543,7 @@ pub(crate) fn emit(
             } else {
                 // The full address can be encoded in the register, with a relocation.
                 // Generates: movabsq $name, %dst
-                let enc_dst = int_reg_enc(dst.to_reg());
+                let enc_dst = int_reg_enc(dst);
                 sink.put1(0x48 | ((enc_dst >> 3) & 1));
                 sink.put1(0xB8 | (enc_dst & 7));
                 emit_reloc(sink, state, Reloc::Abs8, name, *offset);
@@ -2594,8 +2562,13 @@ pub(crate) fn emit(
             mem,
             dst_old,
         } => {
-            debug_assert_eq!(*expected, regs::rax());
-            debug_assert_eq!(dst_old.to_reg(), regs::rax());
+            let replacement = allocs.next(*replacement);
+            let expected = allocs.next(*expected);
+            let dst_old = allocs.next(dst_old.to_reg());
+            let mem = mem.with_allocs(allocs);
+
+            debug_assert_eq!(expected, regs::rax());
+            debug_assert_eq!(dst_old, regs::rax());
 
             // lock cmpxchg{b,w,l,q} %replacement, (mem)
             // Note that 0xF0 is the Lock prefix.
@@ -2606,7 +2579,7 @@ pub(crate) fn emit(
                 types::I64 => (LegacyPrefixes::_F0, 0x0FB1),
                 _ => unreachable!(),
             };
-            let rex = RexFlags::from((OperandSize::from_ty(*ty), *replacement));
+            let rex = RexFlags::from((OperandSize::from_ty(*ty), replacement));
             let amode = mem.finalize(state, sink);
             emit_std_reg_mem(
                 sink,
@@ -2615,7 +2588,7 @@ pub(crate) fn emit(
                 prefix,
                 opcodes,
                 2,
-                *replacement,
+                replacement,
                 &amode,
                 rex,
             );
@@ -2629,6 +2602,7 @@ pub(crate) fn emit(
             temp,
             dst_old,
         } => {
+            // FIXME: use real vregs for this seq.
             debug_assert_eq!(*address, regs::r9());
             debug_assert_eq!(*operand, regs::r10());
             debug_assert_eq!(temp.to_reg(), regs::r11());
@@ -2662,31 +2636,31 @@ pub(crate) fn emit(
             // mov{zbq,zwq,zlq,q} (%r9), %rax
             // No need to call `add_trap` here, since the `i1` emit will do that.
             let i1 = Inst::load(*ty, amode.clone(), rax_w, ExtKind::ZeroExtend);
-            i1.emit(sink, info, state);
+            i1.emit(&[], sink, info, state);
 
             // again:
             sink.bind_label(again_label);
 
             // movq %rax, %r11
             let i2 = Inst::mov_r_r(OperandSize::Size64, rax, r11_w);
-            i2.emit(sink, info, state);
+            i2.emit(&[], sink, info, state);
 
             let r10_rmi = RegMemImm::reg(r10);
             match op {
                 inst_common::AtomicRmwOp::Xchg => {
                     // movq %r10, %r11
                     let i3 = Inst::mov_r_r(OperandSize::Size64, r10, r11_w);
-                    i3.emit(sink, info, state);
+                    i3.emit(&[], sink, info, state);
                 }
                 inst_common::AtomicRmwOp::Nand => {
                     // andq %r10, %r11
                     let i3 =
                         Inst::alu_rmi_r(OperandSize::Size64, AluRmiROpcode::And, r10_rmi, r11_w);
-                    i3.emit(sink, info, state);
+                    i3.emit(&[], sink, info, state);
 
                     // notq %r11
                     let i4 = Inst::not(OperandSize::Size64, r11_w);
-                    i4.emit(sink, info, state);
+                    i4.emit(&[], sink, info, state);
                 }
                 inst_common::AtomicRmwOp::Umin
                 | inst_common::AtomicRmwOp::Umax
@@ -2694,7 +2668,7 @@ pub(crate) fn emit(
                 | inst_common::AtomicRmwOp::Smax => {
                     // cmp %r11, %r10
                     let i3 = Inst::cmp_rmi_r(OperandSize::from_ty(*ty), RegMemImm::reg(r11), r10);
-                    i3.emit(sink, info, state);
+                    i3.emit(&[], sink, info, state);
 
                     // cmovcc %r10, %r11
                     let cc = match op {
@@ -2705,7 +2679,7 @@ pub(crate) fn emit(
                         _ => unreachable!(),
                     };
                     let i4 = Inst::cmove(OperandSize::Size64, cc, RegMem::reg(r10), r11_w);
-                    i4.emit(sink, info, state);
+                    i4.emit(&[], sink, info, state);
                 }
                 _ => {
                     // opq %r10, %r11
@@ -2723,7 +2697,7 @@ pub(crate) fn emit(
                         | inst_common::AtomicRmwOp::Smax => unreachable!(),
                     };
                     let i3 = Inst::alu_rmi_r(OperandSize::Size64, alu_op, r10_rmi, r11_w);
-                    i3.emit(sink, info, state);
+                    i3.emit(&[], sink, info, state);
                 }
             }
 
@@ -2736,7 +2710,7 @@ pub(crate) fn emit(
                 mem: amode.into(),
                 dst_old: Writable::from_reg(regs::rax()),
             };
-            i4.emit(sink, info, state);
+            i4.emit(&[], sink, info, state);
 
             // jnz again
             one_way_jmp(sink, CC::NZ, again_label);
@@ -2902,13 +2876,13 @@ pub(crate) fn emit(
             sink.put1(0x17);
         }
 
-        Inst::ValueLabelMarker { .. } => {
-            // Nothing; this is only used to compute debug info.
-        }
-
         Inst::Unwind { ref inst } => {
             sink.add_unwind(inst.clone());
         }
+
+        Inst::DummyUse { .. } => {
+            // Nothing.
+        }
     }
 
     state.clear_post_insn();
diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs
index 154992f0fb..95f0fd2049 100644
--- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs
+++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs
@@ -19,7 +19,7 @@ use alloc::vec::Vec;
 
 impl Inst {
     fn neg(size: OperandSize, src: Writable<Reg>) -> Inst {
-        debug_assert_eq!(src.to_reg().get_class(), RegClass::I64);
+        debug_assert_eq!(src.to_reg().class(), RegClass::Int);
         Inst::Neg {
             size,
             src: Gpr::new(src.to_reg()).unwrap(),
@@ -1085,7 +1085,7 @@ fn test_x64_emit() {
             w_rdx,
         ),
         "4C01FA",
-        "addq    %r15, %rdx",
+        "addq    %rdx, %r15, %rdx",
     ));
     insns.push((
         Inst::alu_rmi_r(
@@ -1095,7 +1095,7 @@ fn test_x64_emit() {
             w_r8,
         ),
         "4101C8",
-        "addl    %ecx, %r8d",
+        "addl    %r8d, %ecx, %r8d",
     ));
     insns.push((
         Inst::alu_rmi_r(
@@ -1105,7 +1105,7 @@ fn test_x64_emit() {
             w_rsi,
         ),
         "01CE",
-        "addl    %ecx, %esi",
+        "addl    %esi, %ecx, %esi",
     ));
     insns.push((
         Inst::alu_rmi_r(
@@ -1115,7 +1115,7 @@ fn test_x64_emit() {
             w_rdx,
         ),
         "48035763",
-        "addq    99(%rdi), %rdx",
+        "addq    %rdx, 99(%rdi), %rdx",
     ));
     insns.push((
         Inst::alu_rmi_r(
@@ -1125,7 +1125,7 @@ fn test_x64_emit() {
             w_r8,
         ),
         "44034763",
-        "addl    99(%rdi), %r8d",
+        "addl    %r8d, 99(%rdi), %r8d",
     ));
     insns.push((
         Inst::alu_rmi_r(
@@ -1135,7 +1135,7 @@ fn test_x64_emit() {
             w_rsi,
         ),
         "037763",
-        "addl    99(%rdi), %esi",
+        "addl    %esi, 99(%rdi), %esi",
     ));
     insns.push((
         Inst::alu_rmi_r(
@@ -1145,7 +1145,7 @@ fn test_x64_emit() {
             w_rdx,
         ),
         "4883C281",
-        "addq    $-127, %rdx",
+        "addq    %rdx, $-127, %rdx",
     ));
     insns.push((
         Inst::alu_rmi_r(
@@ -1155,7 +1155,7 @@ fn test_x64_emit() {
             w_rdx,
         ),
         "4881C27FFFFFFF",
-        "addq    $-129, %rdx",
+        "addq    %rdx, $-129, %rdx",
     ));
     insns.push((
         Inst::alu_rmi_r(
@@ -1165,7 +1165,7 @@ fn test_x64_emit() {
             w_rdx,
         ),
         "4881C2EAF48F04",
-        "addq    $76543210, %rdx",
+        "addq    %rdx, $76543210, %rdx",
     ));
     insns.push((
         Inst::alu_rmi_r(
@@ -1175,7 +1175,7 @@ fn test_x64_emit() {
             w_r8,
         ),
         "4183C081",
-        "addl    $-127, %r8d",
+        "addl    %r8d, $-127, %r8d",
     ));
     insns.push((
         Inst::alu_rmi_r(
@@ -1185,7 +1185,7 @@ fn test_x64_emit() {
             w_r8,
         ),
         "4181C07FFFFFFF",
-        "addl    $-129, %r8d",
+        "addl    %r8d, $-129, %r8d",
     ));
     insns.push((
         Inst::alu_rmi_r(
@@ -1195,7 +1195,7 @@ fn test_x64_emit() {
             w_r8,
         ),
         "4181C0160B70FB",
-        "addl    $-76543210, %r8d",
+        "addl    %r8d, $-76543210, %r8d",
     ));
     insns.push((
         Inst::alu_rmi_r(
@@ -1205,7 +1205,7 @@ fn test_x64_emit() {
             w_rsi,
         ),
         "83C681",
-        "addl    $-127, %esi",
+        "addl    %esi, $-127, %esi",
     ));
     insns.push((
         Inst::alu_rmi_r(
@@ -1215,7 +1215,7 @@ fn test_x64_emit() {
             w_rsi,
         ),
         "81C67FFFFFFF",
-        "addl    $-129, %esi",
+        "addl    %esi, $-129, %esi",
     ));
     insns.push((
         Inst::alu_rmi_r(
@@ -1225,7 +1225,7 @@ fn test_x64_emit() {
             w_rsi,
         ),
         "81C6EAF48F04",
-        "addl    $76543210, %esi",
+        "addl    %esi, $76543210, %esi",
     ));
     // This is pretty feeble
     insns.push((
@@ -1236,7 +1236,7 @@ fn test_x64_emit() {
             w_rdx,
         ),
         "4C29FA",
-        "subq    %r15, %rdx",
+        "subq    %rdx, %r15, %rdx",
     ));
     insns.push((
         Inst::alu_rmi_r(
@@ -1246,7 +1246,7 @@ fn test_x64_emit() {
             w_rdx,
         ),
         "4C21FA",
-        "andq    %r15, %rdx",
+        "andq    %rdx, %r15, %rdx",
     ));
     insns.push((
         Inst::alu_rmi_r(
@@ -1256,7 +1256,7 @@ fn test_x64_emit() {
             w_rdx,
         ),
         "4C09FA",
-        "orq     %r15, %rdx",
+        "orq     %rdx, %r15, %rdx",
     ));
     insns.push((
         Inst::alu_rmi_r(
@@ -1266,7 +1266,7 @@ fn test_x64_emit() {
             w_rdx,
         ),
         "4420FA",
-        "andb    %r15b, %dl",
+        "andb    %dl, %r15b, %dl",
     ));
     insns.push((
         Inst::alu_rmi_r(
@@ -1276,7 +1276,7 @@ fn test_x64_emit() {
             w_rsi,
         ),
         "4020C6",
-        "andb    %al, %sil",
+        "andb    %sil, %al, %sil",
     ));
     insns.push((
         Inst::alu_rmi_r(
@@ -1286,7 +1286,7 @@ fn test_x64_emit() {
             w_rbx,
         ),
         "20C3",
-        "andb    %al, %bl",
+        "andb    %bl, %al, %bl",
     ));
     insns.push((
         Inst::alu_rmi_r(
@@ -1296,7 +1296,7 @@ fn test_x64_emit() {
             w_rbx,
         ),
         "2218",
-        "andb    0(%rax), %bl",
+        "andb    %bl, 0(%rax), %bl",
     ));
     insns.push((
         Inst::alu_rmi_r(
@@ -1306,7 +1306,7 @@ fn test_x64_emit() {
             w_rdx,
         ),
         "4408FA",
-        "orb     %r15b, %dl",
+        "orb     %dl, %r15b, %dl",
     ));
     insns.push((
         Inst::alu_rmi_r(
@@ -1316,7 +1316,7 @@ fn test_x64_emit() {
             w_rsi,
         ),
         "4008C6",
-        "orb     %al, %sil",
+        "orb     %sil, %al, %sil",
     ));
     insns.push((
         Inst::alu_rmi_r(
@@ -1326,7 +1326,7 @@ fn test_x64_emit() {
             w_rbx,
         ),
         "08C3",
-        "orb     %al, %bl",
+        "orb     %bl, %al, %bl",
     ));
     insns.push((
         Inst::alu_rmi_r(
@@ -1336,7 +1336,7 @@ fn test_x64_emit() {
             w_rbx,
         ),
         "0A18",
-        "orb     0(%rax), %bl",
+        "orb     %bl, 0(%rax), %bl",
     ));
     insns.push((
         Inst::alu_rmi_r(
@@ -1346,7 +1346,7 @@ fn test_x64_emit() {
             w_rdx,
         ),
         "4C31FA",
-        "xorq    %r15, %rdx",
+        "xorq    %rdx, %r15, %rdx",
     ));
     // Test all mul cases, though
     insns.push((
@@ -1357,7 +1357,7 @@ fn test_x64_emit() {
             w_rdx,
         ),
         "490FAFD7",
-        "imulq   %r15, %rdx",
+        "imulq   %rdx, %r15, %rdx",
     ));
     insns.push((
         Inst::alu_rmi_r(
@@ -1367,7 +1367,7 @@ fn test_x64_emit() {
             w_r8,
         ),
         "440FAFC1",
-        "imull   %ecx, %r8d",
+        "imull   %r8d, %ecx, %r8d",
     ));
     insns.push((
         Inst::alu_rmi_r(
@@ -1377,7 +1377,7 @@ fn test_x64_emit() {
             w_rsi,
         ),
         "0FAFF1",
-        "imull   %ecx, %esi",
+        "imull   %esi, %ecx, %esi",
     ));
     insns.push((
         Inst::alu_rmi_r(
@@ -1387,7 +1387,7 @@ fn test_x64_emit() {
             w_rdx,
         ),
         "480FAF5763",
-        "imulq   99(%rdi), %rdx",
+        "imulq   %rdx, 99(%rdi), %rdx",
     ));
     insns.push((
         Inst::alu_rmi_r(
@@ -1397,7 +1397,7 @@ fn test_x64_emit() {
             w_r8,
         ),
         "440FAF4763",
-        "imull   99(%rdi), %r8d",
+        "imull   %r8d, 99(%rdi), %r8d",
     ));
     insns.push((
         Inst::alu_rmi_r(
@@ -1407,7 +1407,7 @@ fn test_x64_emit() {
             w_rsi,
         ),
         "0FAF7763",
-        "imull   99(%rdi), %esi",
+        "imull   %esi, 99(%rdi), %esi",
     ));
     insns.push((
         Inst::alu_rmi_r(
@@ -1417,7 +1417,7 @@ fn test_x64_emit() {
             w_rdx,
         ),
         "486BD281",
-        "imulq   $-127, %rdx",
+        "imulq   %rdx, $-127, %rdx",
     ));
     insns.push((
         Inst::alu_rmi_r(
@@ -1427,7 +1427,7 @@ fn test_x64_emit() {
             w_rdx,
         ),
         "4869D27FFFFFFF",
-        "imulq   $-129, %rdx",
+        "imulq   %rdx, $-129, %rdx",
     ));
     insns.push((
         Inst::alu_rmi_r(
@@ -1437,7 +1437,7 @@ fn test_x64_emit() {
             w_rdx,
         ),
         "4869D2EAF48F04",
-        "imulq   $76543210, %rdx",
+        "imulq   %rdx, $76543210, %rdx",
     ));
     insns.push((
         Inst::alu_rmi_r(
@@ -1447,7 +1447,7 @@ fn test_x64_emit() {
             w_r8,
         ),
         "456BC081",
-        "imull   $-127, %r8d",
+        "imull   %r8d, $-127, %r8d",
     ));
     insns.push((
         Inst::alu_rmi_r(
@@ -1457,7 +1457,7 @@ fn test_x64_emit() {
             w_r8,
         ),
         "4569C07FFFFFFF",
-        "imull   $-129, %r8d",
+        "imull   %r8d, $-129, %r8d",
     ));
     insns.push((
         Inst::alu_rmi_r(
@@ -1467,7 +1467,7 @@ fn test_x64_emit() {
             w_r8,
         ),
         "4569C0160B70FB",
-        "imull   $-76543210, %r8d",
+        "imull   %r8d, $-76543210, %r8d",
     ));
     insns.push((
         Inst::alu_rmi_r(
@@ -1477,7 +1477,7 @@ fn test_x64_emit() {
             w_rsi,
         ),
         "6BF681",
-        "imull   $-127, %esi",
+        "imull   %esi, $-127, %esi",
     ));
     insns.push((
         Inst::alu_rmi_r(
@@ -1487,7 +1487,7 @@ fn test_x64_emit() {
             w_rsi,
         ),
         "69F67FFFFFFF",
-        "imull   $-129, %esi",
+        "imull   %esi, $-129, %esi",
     ));
     insns.push((
         Inst::alu_rmi_r(
@@ -1497,7 +1497,7 @@ fn test_x64_emit() {
             w_rsi,
         ),
         "69F6EAF48F04",
-        "imull   $76543210, %esi",
+        "imull   %esi, $76543210, %esi",
     ));
 
     // ========================================================
@@ -1529,32 +1529,32 @@ fn test_x64_emit() {
     insns.push((
         Inst::not(OperandSize::Size32, Writable::from_reg(regs::rsi())),
         "F7D6",
-        "notl    %esi",
+        "notl    %esi, %esi",
     ));
     insns.push((
         Inst::not(OperandSize::Size64, Writable::from_reg(regs::r15())),
         "49F7D7",
-        "notq    %r15",
+        "notq    %r15, %r15",
     ));
     insns.push((
         Inst::not(OperandSize::Size32, Writable::from_reg(regs::r14())),
         "41F7D6",
-        "notl    %r14d",
+        "notl    %r14d, %r14d",
     ));
     insns.push((
         Inst::not(OperandSize::Size16, Writable::from_reg(regs::rdi())),
         "66F7D7",
-        "notw    %di",
+        "notw    %di, %di",
     ));
     insns.push((
         Inst::not(OperandSize::Size8, Writable::from_reg(regs::rdi())),
         "40F6D7",
-        "notb    %dil",
+        "notb    %dil, %dil",
     ));
     insns.push((
         Inst::not(OperandSize::Size8, Writable::from_reg(regs::rax())),
         "F6D0",
-        "notb    %al",
+        "notb    %al, %al",
     ));
 
     // ========================================================
@@ -1562,32 +1562,32 @@ fn test_x64_emit() {
     insns.push((
         Inst::neg(OperandSize::Size32, Writable::from_reg(regs::rsi())),
         "F7DE",
-        "negl    %esi",
+        "negl    %esi, %esi",
     ));
     insns.push((
         Inst::neg(OperandSize::Size64, Writable::from_reg(regs::r15())),
         "49F7DF",
-        "negq    %r15",
+        "negq    %r15, %r15",
     ));
     insns.push((
         Inst::neg(OperandSize::Size32, Writable::from_reg(regs::r14())),
         "41F7DE",
-        "negl    %r14d",
+        "negl    %r14d, %r14d",
     ));
     insns.push((
         Inst::neg(OperandSize::Size16, Writable::from_reg(regs::rdi())),
         "66F7DF",
-        "negw    %di",
+        "negw    %di, %di",
     ));
     insns.push((
         Inst::neg(OperandSize::Size8, Writable::from_reg(regs::rdi())),
         "40F6DF",
-        "negb    %dil",
+        "negb    %dil, %dil",
     ));
     insns.push((
         Inst::neg(OperandSize::Size8, Writable::from_reg(regs::rax())),
         "F6D8",
-        "negb    %al",
+        "negb    %al, %al",
     ));
 
     // ========================================================
@@ -1599,7 +1599,7 @@ fn test_x64_emit() {
             RegMem::reg(regs::rsi()),
         ),
         "F7FE",
-        "idiv    %esi",
+        "idiv    %eax, %edx, %esi, %eax, %edx",
     ));
     insns.push((
         Inst::div(
@@ -1608,7 +1608,7 @@ fn test_x64_emit() {
             RegMem::reg(regs::r15()),
         ),
         "49F7FF",
-        "idiv    %r15",
+        "idiv    %rax, %rdx, %r15, %rax, %rdx",
     ));
     insns.push((
         Inst::div(
@@ -1617,7 +1617,7 @@ fn test_x64_emit() {
             RegMem::reg(regs::r14()),
         ),
         "41F7F6",
-        "div     %r14d",
+        "div     %eax, %edx, %r14d, %eax, %edx",
     ));
     insns.push((
         Inst::div(
@@ -1626,17 +1626,17 @@ fn test_x64_emit() {
             RegMem::reg(regs::rdi()),
         ),
         "48F7F7",
-        "div     %rdi",
+        "div     %rax, %rdx, %rdi, %rax, %rdx",
     ));
     insns.push((
         Inst::div(OperandSize::Size8, false, RegMem::reg(regs::rax())),
         "F6F0",
-        "div     %al",
+        "div     %al, %dl, %al, %al, %dl",
     ));
     insns.push((
         Inst::div(OperandSize::Size8, false, RegMem::reg(regs::rsi())),
         "40F6F6",
-        "div     %sil",
+        "div     %al, %dl, %sil, %al, %dl",
     ));
 
     // ========================================================
@@ -1648,7 +1648,7 @@ fn test_x64_emit() {
             RegMem::reg(regs::rsi()),
         ),
         "F7EE",
-        "imul    %esi",
+        "imul    %eax, %esi, %eax, %edx",
     ));
     insns.push((
         Inst::mul_hi(
@@ -1657,7 +1657,7 @@ fn test_x64_emit() {
             RegMem::reg(regs::r15()),
         ),
         "49F7EF",
-        "imul    %r15",
+        "imul    %rax, %r15, %rax, %rdx",
     ));
     insns.push((
         Inst::mul_hi(
@@ -1666,7 +1666,7 @@ fn test_x64_emit() {
             RegMem::reg(regs::r14()),
         ),
         "41F7E6",
-        "mul     %r14d",
+        "mul     %eax, %r14d, %eax, %edx",
     ));
     insns.push((
         Inst::mul_hi(
@@ -1675,18 +1675,34 @@ fn test_x64_emit() {
             RegMem::reg(regs::rdi()),
         ),
         "48F7E7",
-        "mul     %rdi",
+        "mul     %rax, %rdi, %rax, %rdx",
     ));
 
     // ========================================================
     // cbw
-    insns.push((Inst::sign_extend_data(OperandSize::Size8), "6698", "cbw"));
+    insns.push((
+        Inst::sign_extend_data(OperandSize::Size8),
+        "6698",
+        "cbw %al, %dl",
+    ));
 
     // ========================================================
     // cdq family: SignExtendRaxRdx
-    insns.push((Inst::sign_extend_data(OperandSize::Size16), "6699", "cwd"));
-    insns.push((Inst::sign_extend_data(OperandSize::Size32), "99", "cdq"));
-    insns.push((Inst::sign_extend_data(OperandSize::Size64), "4899", "cqo"));
+    insns.push((
+        Inst::sign_extend_data(OperandSize::Size16),
+        "6699",
+        "cwd %ax, %dx",
+    ));
+    insns.push((
+        Inst::sign_extend_data(OperandSize::Size32),
+        "99",
+        "cdq %eax, %edx",
+    ));
+    insns.push((
+        Inst::sign_extend_data(OperandSize::Size64),
+        "4899",
+        "cqo %rax, %rdx",
+    ));
 
     // ========================================================
     // Imm_R
@@ -2096,7 +2112,10 @@ fn test_x64_emit() {
         "lea     179(%r10,%r9,1), %r8",
     ));
     insns.push((
-        Inst::lea(Amode::rip_relative(MachLabel::from_block(0)), w_rdi),
+        Inst::lea(
+            Amode::rip_relative(MachLabel::from_block(BlockIndex::new(0))),
+            w_rdi,
+        ),
         "488D3D00000000",
         "lea     label0(%rip), %rdi",
     ));
@@ -2670,47 +2689,47 @@ fn test_x64_emit() {
     insns.push((
         Inst::shift_r(OperandSize::Size32, ShiftKind::ShiftLeft, None, w_rdi),
         "D3E7",
-        "shll    %cl, %edi",
+        "shll    %cl, %edi, %edi",
     ));
     insns.push((
         Inst::shift_r(OperandSize::Size32, ShiftKind::ShiftLeft, None, w_r12),
         "41D3E4",
-        "shll    %cl, %r12d",
+        "shll    %cl, %r12d, %r12d",
     ));
     insns.push((
         Inst::shift_r(OperandSize::Size32, ShiftKind::ShiftLeft, Some(2), w_r8),
         "41C1E002",
-        "shll    $2, %r8d",
+        "shll    $2, %r8d, %r8d",
     ));
     insns.push((
         Inst::shift_r(OperandSize::Size32, ShiftKind::ShiftLeft, Some(31), w_r13),
         "41C1E51F",
-        "shll    $31, %r13d",
+        "shll    $31, %r13d, %r13d",
     ));
     insns.push((
         Inst::shift_r(OperandSize::Size64, ShiftKind::ShiftLeft, None, w_r13),
         "49D3E5",
-        "shlq    %cl, %r13",
+        "shlq    %cl, %r13, %r13",
     ));
     insns.push((
         Inst::shift_r(OperandSize::Size64, ShiftKind::ShiftLeft, None, w_rdi),
         "48D3E7",
-        "shlq    %cl, %rdi",
+        "shlq    %cl, %rdi, %rdi",
     ));
     insns.push((
         Inst::shift_r(OperandSize::Size64, ShiftKind::ShiftLeft, Some(2), w_r8),
         "49C1E002",
-        "shlq    $2, %r8",
+        "shlq    $2, %r8, %r8",
     ));
     insns.push((
         Inst::shift_r(OperandSize::Size64, ShiftKind::ShiftLeft, Some(3), w_rbx),
         "48C1E303",
-        "shlq    $3, %rbx",
+        "shlq    $3, %rbx, %rbx",
     ));
     insns.push((
         Inst::shift_r(OperandSize::Size64, ShiftKind::ShiftLeft, Some(63), w_r13),
         "49C1E53F",
-        "shlq    $63, %r13",
+        "shlq    $63, %r13, %r13",
     ));
     insns.push((
         Inst::shift_r(
@@ -2720,7 +2739,7 @@ fn test_x64_emit() {
             w_rdi,
         ),
         "D3EF",
-        "shrl    %cl, %edi",
+        "shrl    %cl, %edi, %edi",
     ));
     insns.push((
         Inst::shift_r(
@@ -2730,7 +2749,7 @@ fn test_x64_emit() {
             w_r8,
         ),
         "41C1E802",
-        "shrl    $2, %r8d",
+        "shrl    $2, %r8d, %r8d",
     ));
     insns.push((
         Inst::shift_r(
@@ -2740,7 +2759,7 @@ fn test_x64_emit() {
             w_r13,
         ),
         "41C1ED1F",
-        "shrl    $31, %r13d",
+        "shrl    $31, %r13d, %r13d",
     ));
     insns.push((
         Inst::shift_r(
@@ -2750,7 +2769,7 @@ fn test_x64_emit() {
             w_rdi,
         ),
         "48D3EF",
-        "shrq    %cl, %rdi",
+        "shrq    %cl, %rdi, %rdi",
     ));
     insns.push((
         Inst::shift_r(
@@ -2760,7 +2779,7 @@ fn test_x64_emit() {
             w_r8,
         ),
         "49C1E802",
-        "shrq    $2, %r8",
+        "shrq    $2, %r8, %r8",
     ));
     insns.push((
         Inst::shift_r(
@@ -2770,7 +2789,7 @@ fn test_x64_emit() {
             w_r13,
         ),
         "49C1ED3F",
-        "shrq    $63, %r13",
+        "shrq    $63, %r13, %r13",
     ));
     insns.push((
         Inst::shift_r(
@@ -2780,7 +2799,7 @@ fn test_x64_emit() {
             w_rdi,
         ),
         "D3FF",
-        "sarl    %cl, %edi",
+        "sarl    %cl, %edi, %edi",
     ));
     insns.push((
         Inst::shift_r(
@@ -2790,7 +2809,7 @@ fn test_x64_emit() {
             w_r8,
         ),
         "41C1F802",
-        "sarl    $2, %r8d",
+        "sarl    $2, %r8d, %r8d",
     ));
     insns.push((
         Inst::shift_r(
@@ -2800,7 +2819,7 @@ fn test_x64_emit() {
             w_r13,
         ),
         "41C1FD1F",
-        "sarl    $31, %r13d",
+        "sarl    $31, %r13d, %r13d",
     ));
     insns.push((
         Inst::shift_r(
@@ -2810,7 +2829,7 @@ fn test_x64_emit() {
             w_rdi,
         ),
         "48D3FF",
-        "sarq    %cl, %rdi",
+        "sarq    %cl, %rdi, %rdi",
     ));
     insns.push((
         Inst::shift_r(
@@ -2820,7 +2839,7 @@ fn test_x64_emit() {
             w_r8,
         ),
         "49C1F802",
-        "sarq    $2, %r8",
+        "sarq    $2, %r8, %r8",
     ));
     insns.push((
         Inst::shift_r(
@@ -2830,52 +2849,52 @@ fn test_x64_emit() {
             w_r13,
         ),
         "49C1FD3F",
-        "sarq    $63, %r13",
+        "sarq    $63, %r13, %r13",
     ));
     insns.push((
         Inst::shift_r(OperandSize::Size64, ShiftKind::RotateLeft, None, w_r8),
         "49D3C0",
-        "rolq    %cl, %r8",
+        "rolq    %cl, %r8, %r8",
     ));
     insns.push((
         Inst::shift_r(OperandSize::Size32, ShiftKind::RotateLeft, Some(3), w_r9),
         "41C1C103",
-        "roll    $3, %r9d",
+        "roll    $3, %r9d, %r9d",
     ));
     insns.push((
         Inst::shift_r(OperandSize::Size32, ShiftKind::RotateRight, None, w_rsi),
         "D3CE",
-        "rorl    %cl, %esi",
+        "rorl    %cl, %esi, %esi",
     ));
     insns.push((
         Inst::shift_r(OperandSize::Size64, ShiftKind::RotateRight, Some(5), w_r15),
         "49C1CF05",
-        "rorq    $5, %r15",
+        "rorq    $5, %r15, %r15",
     ));
     insns.push((
         Inst::shift_r(OperandSize::Size8, ShiftKind::RotateRight, None, w_rsi),
         "40D2CE",
-        "rorb    %cl, %sil",
+        "rorb    %cl, %sil, %sil",
     ));
     insns.push((
         Inst::shift_r(OperandSize::Size8, ShiftKind::RotateRight, None, w_rax),
         "D2C8",
-        "rorb    %cl, %al",
+        "rorb    %cl, %al, %al",
     ));
     insns.push((
         Inst::shift_r(OperandSize::Size8, ShiftKind::RotateRight, Some(5), w_r15),
         "41C0CF05",
-        "rorb    $5, %r15b",
+        "rorb    $5, %r15b, %r15b",
     ));
     insns.push((
         Inst::shift_r(OperandSize::Size16, ShiftKind::RotateRight, None, w_rsi),
         "66D3CE",
-        "rorw    %cl, %si",
+        "rorw    %cl, %si, %si",
     ));
     insns.push((
         Inst::shift_r(OperandSize::Size16, ShiftKind::RotateRight, Some(5), w_r15),
         "6641C1CF05",
-        "rorw    $5, %r15w",
+        "rorw    $5, %r15w, %r15w",
     ));
 
     // ========================================================
@@ -3319,7 +3338,7 @@ fn test_x64_emit() {
     insns.push((
         Inst::cmove(OperandSize::Size16, CC::O, RegMem::reg(rdi), w_rsi),
         "660F40F7",
-        "cmovow  %di, %si",
+        "cmovow  %di, %si, %si",
     ));
     insns.push((
         Inst::cmove(
@@ -3334,12 +3353,12 @@ fn test_x64_emit() {
             w_r15,
         ),
         "66440F417CB725",
-        "cmovnow 37(%rdi,%rsi,4), %r15w",
+        "cmovnow 37(%rdi,%rsi,4), %r15w, %r15w",
     ));
     insns.push((
         Inst::cmove(OperandSize::Size32, CC::LE, RegMem::reg(rdi), w_rsi),
         "0F4EF7",
-        "cmovlel %edi, %esi",
+        "cmovlel %edi, %esi, %esi",
     ));
     insns.push((
         Inst::cmove(
@@ -3349,12 +3368,12 @@ fn test_x64_emit() {
             w_rsi,
         ),
         "410F4F37",
-        "cmovnlel 0(%r15), %esi",
+        "cmovnlel 0(%r15), %esi, %esi",
     ));
     insns.push((
         Inst::cmove(OperandSize::Size64, CC::Z, RegMem::reg(rdi), w_r14),
         "4C0F44F7",
-        "cmovzq  %rdi, %r14",
+        "cmovzq  %rdi, %r14, %r14",
     ));
     insns.push((
         Inst::cmove(
@@ -3364,7 +3383,7 @@ fn test_x64_emit() {
             w_r14,
         ),
         "4C0F45770D",
-        "cmovnzq 13(%rdi), %r14",
+        "cmovnzq 13(%rdi), %r14, %r14",
     ));
 
     // ========================================================
@@ -3513,7 +3532,7 @@ fn test_x64_emit() {
 
     // ========================================================
     // Ret
-    insns.push((Inst::ret(), "C3", "ret"));
+    insns.push((Inst::ret(vec![]), "C3", "ret"));
 
     // ========================================================
     // JmpKnown skipped for now
@@ -3589,12 +3608,12 @@ fn test_x64_emit() {
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Addss, RegMem::reg(xmm1), w_xmm0),
         "F30F58C1",
-        "addss   %xmm1, %xmm0",
+        "addss   %xmm0, %xmm1, %xmm0",
     ));
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Addss, RegMem::reg(xmm11), w_xmm13),
         "F3450F58EB",
-        "addss   %xmm11, %xmm13",
+        "addss   %xmm13, %xmm11, %xmm13",
     ));
     insns.push((
         Inst::xmm_rm_r(
@@ -3608,23 +3627,23 @@ fn test_x64_emit() {
             w_xmm0,
         ),
         "F3410F5844927B",
-        "addss   123(%r10,%rdx,4), %xmm0",
+        "addss   %xmm0, 123(%r10,%rdx,4), %xmm0",
     ));
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Addsd, RegMem::reg(xmm15), w_xmm4),
         "F2410F58E7",
-        "addsd   %xmm15, %xmm4",
+        "addsd   %xmm4, %xmm15, %xmm4",
     ));
 
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Subss, RegMem::reg(xmm0), w_xmm1),
         "F30F5CC8",
-        "subss   %xmm0, %xmm1",
+        "subss   %xmm1, %xmm0, %xmm1",
     ));
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Subss, RegMem::reg(xmm12), w_xmm1),
         "F3410F5CCC",
-        "subss   %xmm12, %xmm1",
+        "subss   %xmm1, %xmm12, %xmm1",
     ));
     insns.push((
         Inst::xmm_rm_r(
@@ -3638,75 +3657,75 @@ fn test_x64_emit() {
             w_xmm10,
         ),
         "F3450F5C94C241010000",
-        "subss   321(%r10,%rax,8), %xmm10",
+        "subss   %xmm10, 321(%r10,%rax,8), %xmm10",
     ));
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Subsd, RegMem::reg(xmm5), w_xmm14),
         "F2440F5CF5",
-        "subsd   %xmm5, %xmm14",
+        "subsd   %xmm14, %xmm5, %xmm14",
     ));
 
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Mulss, RegMem::reg(xmm5), w_xmm4),
         "F30F59E5",
-        "mulss   %xmm5, %xmm4",
+        "mulss   %xmm4, %xmm5, %xmm4",
     ));
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Mulsd, RegMem::reg(xmm5), w_xmm4),
         "F20F59E5",
-        "mulsd   %xmm5, %xmm4",
+        "mulsd   %xmm4, %xmm5, %xmm4",
     ));
 
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Divss, RegMem::reg(xmm8), w_xmm7),
         "F3410F5EF8",
-        "divss   %xmm8, %xmm7",
+        "divss   %xmm7, %xmm8, %xmm7",
     ));
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Divsd, RegMem::reg(xmm5), w_xmm4),
         "F20F5EE5",
-        "divsd   %xmm5, %xmm4",
+        "divsd   %xmm4, %xmm5, %xmm4",
     ));
 
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Andps, RegMem::reg(xmm3), w_xmm12),
         "440F54E3",
-        "andps   %xmm3, %xmm12",
+        "andps   %xmm12, %xmm3, %xmm12",
     ));
 
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Andnps, RegMem::reg(xmm4), w_xmm11),
         "440F55DC",
-        "andnps  %xmm4, %xmm11",
+        "andnps  %xmm11, %xmm4, %xmm11",
     ));
 
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Orps, RegMem::reg(xmm1), w_xmm15),
         "440F56F9",
-        "orps    %xmm1, %xmm15",
+        "orps    %xmm15, %xmm1, %xmm15",
     ));
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Orps, RegMem::reg(xmm5), w_xmm4),
         "0F56E5",
-        "orps    %xmm5, %xmm4",
+        "orps    %xmm4, %xmm5, %xmm4",
     ));
 
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Blendvpd, RegMem::reg(xmm15), w_xmm4),
         "66410F3815E7",
-        "blendvpd %xmm15, %xmm4",
+        "blendvpd %xmm4, %xmm15, %xmm4",
     ));
 
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Blendvps, RegMem::reg(xmm2), w_xmm3),
         "660F3814DA",
-        "blendvps %xmm2, %xmm3",
+        "blendvps %xmm3, %xmm2, %xmm3",
     ));
 
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Pblendvb, RegMem::reg(xmm12), w_xmm13),
         "66450F3810EC",
-        "pblendvb %xmm12, %xmm13",
+        "pblendvb %xmm13, %xmm12, %xmm13",
     ));
 
     // ========================================================
@@ -3715,139 +3734,139 @@ fn test_x64_emit() {
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Paddb, RegMem::reg(xmm9), w_xmm5),
         "66410FFCE9",
-        "paddb   %xmm9, %xmm5",
+        "paddb   %xmm5, %xmm9, %xmm5",
     ));
 
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Paddw, RegMem::reg(xmm7), w_xmm6),
         "660FFDF7",
-        "paddw   %xmm7, %xmm6",
+        "paddw   %xmm6, %xmm7, %xmm6",
     ));
 
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Paddd, RegMem::reg(xmm12), w_xmm13),
         "66450FFEEC",
-        "paddd   %xmm12, %xmm13",
+        "paddd   %xmm13, %xmm12, %xmm13",
     ));
 
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Paddq, RegMem::reg(xmm1), w_xmm8),
         "66440FD4C1",
-        "paddq   %xmm1, %xmm8",
+        "paddq   %xmm8, %xmm1, %xmm8",
     ));
 
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Paddsb, RegMem::reg(xmm9), w_xmm5),
         "66410FECE9",
-        "paddsb  %xmm9, %xmm5",
+        "paddsb  %xmm5, %xmm9, %xmm5",
     ));
 
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Paddsw, RegMem::reg(xmm7), w_xmm6),
         "660FEDF7",
-        "paddsw  %xmm7, %xmm6",
+        "paddsw  %xmm6, %xmm7, %xmm6",
     ));
 
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Paddusb, RegMem::reg(xmm12), w_xmm13),
         "66450FDCEC",
-        "paddusb %xmm12, %xmm13",
+        "paddusb %xmm13, %xmm12, %xmm13",
     ));
 
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Paddusw, RegMem::reg(xmm1), w_xmm8),
         "66440FDDC1",
-        "paddusw %xmm1, %xmm8",
+        "paddusw %xmm8, %xmm1, %xmm8",
     ));
 
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Psubsb, RegMem::reg(xmm9), w_xmm5),
         "66410FE8E9",
-        "psubsb  %xmm9, %xmm5",
+        "psubsb  %xmm5, %xmm9, %xmm5",
     ));
 
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Psubsw, RegMem::reg(xmm7), w_xmm6),
         "660FE9F7",
-        "psubsw  %xmm7, %xmm6",
+        "psubsw  %xmm6, %xmm7, %xmm6",
     ));
 
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Psubusb, RegMem::reg(xmm12), w_xmm13),
         "66450FD8EC",
-        "psubusb %xmm12, %xmm13",
+        "psubusb %xmm13, %xmm12, %xmm13",
     ));
 
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Psubusw, RegMem::reg(xmm1), w_xmm8),
         "66440FD9C1",
-        "psubusw %xmm1, %xmm8",
+        "psubusw %xmm8, %xmm1, %xmm8",
     ));
 
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Pavgb, RegMem::reg(xmm12), w_xmm13),
         "66450FE0EC",
-        "pavgb   %xmm12, %xmm13",
+        "pavgb   %xmm13, %xmm12, %xmm13",
     ));
 
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Pavgw, RegMem::reg(xmm1), w_xmm8),
         "66440FE3C1",
-        "pavgw   %xmm1, %xmm8",
+        "pavgw   %xmm8, %xmm1, %xmm8",
     ));
 
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Psubb, RegMem::reg(xmm5), w_xmm9),
         "66440FF8CD",
-        "psubb   %xmm5, %xmm9",
+        "psubb   %xmm9, %xmm5, %xmm9",
     ));
 
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Psubw, RegMem::reg(xmm6), w_xmm7),
         "660FF9FE",
-        "psubw   %xmm6, %xmm7",
+        "psubw   %xmm7, %xmm6, %xmm7",
     ));
 
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Psubd, RegMem::reg(xmm13), w_xmm12),
         "66450FFAE5",
-        "psubd   %xmm13, %xmm12",
+        "psubd   %xmm12, %xmm13, %xmm12",
     ));
 
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Psubq, RegMem::reg(xmm8), w_xmm1),
         "66410FFBC8",
-        "psubq   %xmm8, %xmm1",
+        "psubq   %xmm1, %xmm8, %xmm1",
     ));
 
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Pmuldq, RegMem::reg(xmm4), w_xmm15),
         "66440F3828FC",
-        "pmuldq  %xmm4, %xmm15",
+        "pmuldq  %xmm15, %xmm4, %xmm15",
     ));
 
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Pmulhw, RegMem::reg(xmm9), w_xmm1),
         "66410FE5C9",
-        "pmulhw  %xmm9, %xmm1",
+        "pmulhw  %xmm1, %xmm9, %xmm1",
     ));
 
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Pmulhuw, RegMem::reg(xmm7), w_xmm9),
         "66440FE4CF",
-        "pmulhuw %xmm7, %xmm9",
+        "pmulhuw %xmm9, %xmm7, %xmm9",
     ));
 
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Pmulld, RegMem::reg(xmm15), w_xmm6),
         "66410F3840F7",
-        "pmulld  %xmm15, %xmm6",
+        "pmulld  %xmm6, %xmm15, %xmm6",
     ));
 
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Pmullw, RegMem::reg(xmm14), w_xmm1),
         "66410FD5CE",
-        "pmullw  %xmm14, %xmm1",
+        "pmullw  %xmm1, %xmm14, %xmm1",
     ));
 
     insns.push((
@@ -3871,151 +3890,151 @@ fn test_x64_emit() {
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Pmuludq, RegMem::reg(xmm8), w_xmm9),
         "66450FF4C8",
-        "pmuludq %xmm8, %xmm9",
+        "pmuludq %xmm9, %xmm8, %xmm9",
     ));
 
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Pmaddwd, RegMem::reg(xmm8), w_xmm1),
         "66410FF5C8",
-        "pmaddwd %xmm8, %xmm1",
+        "pmaddwd %xmm1, %xmm8, %xmm1",
     ));
 
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Pmaxsb, RegMem::reg(xmm15), w_xmm6),
         "66410F383CF7",
-        "pmaxsb  %xmm15, %xmm6",
+        "pmaxsb  %xmm6, %xmm15, %xmm6",
     ));
 
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Pmaxsw, RegMem::reg(xmm15), w_xmm6),
         "66410FEEF7",
-        "pmaxsw  %xmm15, %xmm6",
+        "pmaxsw  %xmm6, %xmm15, %xmm6",
     ));
 
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Pmaxsd, RegMem::reg(xmm15), w_xmm6),
         "66410F383DF7",
-        "pmaxsd  %xmm15, %xmm6",
+        "pmaxsd  %xmm6, %xmm15, %xmm6",
     ));
 
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Pmaxub, RegMem::reg(xmm14), w_xmm1),
         "66410FDECE",
-        "pmaxub  %xmm14, %xmm1",
+        "pmaxub  %xmm1, %xmm14, %xmm1",
     ));
 
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Pmaxuw, RegMem::reg(xmm14), w_xmm1),
         "66410F383ECE",
-        "pmaxuw  %xmm14, %xmm1",
+        "pmaxuw  %xmm1, %xmm14, %xmm1",
     ));
 
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Pmaxud, RegMem::reg(xmm14), w_xmm1),
         "66410F383FCE",
-        "pmaxud  %xmm14, %xmm1",
+        "pmaxud  %xmm1, %xmm14, %xmm1",
     ));
 
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Pminsb, RegMem::reg(xmm8), w_xmm9),
         "66450F3838C8",
-        "pminsb  %xmm8, %xmm9",
+        "pminsb  %xmm9, %xmm8, %xmm9",
     ));
 
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Pminsw, RegMem::reg(xmm8), w_xmm9),
         "66450FEAC8",
-        "pminsw  %xmm8, %xmm9",
+        "pminsw  %xmm9, %xmm8, %xmm9",
     ));
 
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Pminsd, RegMem::reg(xmm8), w_xmm9),
         "66450F3839C8",
-        "pminsd  %xmm8, %xmm9",
+        "pminsd  %xmm9, %xmm8, %xmm9",
     ));
 
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Pminub, RegMem::reg(xmm3), w_xmm2),
         "660FDAD3",
-        "pminub  %xmm3, %xmm2",
+        "pminub  %xmm2, %xmm3, %xmm2",
     ));
 
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Pminuw, RegMem::reg(xmm3), w_xmm2),
         "660F383AD3",
-        "pminuw  %xmm3, %xmm2",
+        "pminuw  %xmm2, %xmm3, %xmm2",
     ));
 
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Pminud, RegMem::reg(xmm3), w_xmm2),
         "660F383BD3",
-        "pminud  %xmm3, %xmm2",
+        "pminud  %xmm2, %xmm3, %xmm2",
     ));
 
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::reg(xmm11), w_xmm2),
         "66410FEFD3",
-        "pxor    %xmm11, %xmm2",
+        "pxor    %xmm2, %xmm11, %xmm2",
     ));
 
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Pshufb, RegMem::reg(xmm11), w_xmm2),
         "66410F3800D3",
-        "pshufb  %xmm11, %xmm2",
+        "pshufb  %xmm2, %xmm11, %xmm2",
     ));
 
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Packssdw, RegMem::reg(xmm11), w_xmm12),
         "66450F6BE3",
-        "packssdw %xmm11, %xmm12",
+        "packssdw %xmm12, %xmm11, %xmm12",
     ));
 
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Packsswb, RegMem::reg(xmm11), w_xmm2),
         "66410F63D3",
-        "packsswb %xmm11, %xmm2",
+        "packsswb %xmm2, %xmm11, %xmm2",
     ));
 
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Packusdw, RegMem::reg(xmm13), w_xmm6),
         "66410F382BF5",
-        "packusdw %xmm13, %xmm6",
+        "packusdw %xmm6, %xmm13, %xmm6",
     ));
 
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Packuswb, RegMem::reg(xmm9), w_xmm4),
         "66410F67E1",
-        "packuswb %xmm9, %xmm4",
+        "packuswb %xmm4, %xmm9, %xmm4",
     ));
 
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Punpckhbw, RegMem::reg(xmm3), w_xmm2),
         "660F68D3",
-        "punpckhbw %xmm3, %xmm2",
+        "punpckhbw %xmm2, %xmm3, %xmm2",
     ));
 
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Punpckhwd, RegMem::reg(xmm13), w_xmm2),
         "66410F69D5",
-        "punpckhwd %xmm13, %xmm2",
+        "punpckhwd %xmm2, %xmm13, %xmm2",
     ));
 
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Punpcklbw, RegMem::reg(xmm1), w_xmm8),
         "66440F60C1",
-        "punpcklbw %xmm1, %xmm8",
+        "punpcklbw %xmm8, %xmm1, %xmm8",
     ));
 
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Punpcklwd, RegMem::reg(xmm11), w_xmm8),
         "66450F61C3",
-        "punpcklwd %xmm11, %xmm8",
+        "punpcklwd %xmm8, %xmm11, %xmm8",
     ));
 
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Unpcklps, RegMem::reg(xmm11), w_xmm2),
         "410F14D3",
-        "unpcklps %xmm11, %xmm2",
+        "unpcklps %xmm2, %xmm11, %xmm2",
     ));
 
     // ========================================================
@@ -4023,19 +4042,19 @@ fn test_x64_emit() {
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Cvtdq2ps, RegMem::reg(xmm1), w_xmm8),
         "440F5BC1",
-        "cvtdq2ps %xmm1, %xmm8",
+        "cvtdq2ps %xmm8, %xmm1, %xmm8",
     ));
 
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Cvttpd2dq, RegMem::reg(xmm15), w_xmm7),
         "66410FE6FF",
-        "cvttpd2dq %xmm15, %xmm7",
+        "cvttpd2dq %xmm7, %xmm15, %xmm7",
     ));
 
     insns.push((
         Inst::xmm_rm_r(SseOpcode::Cvttps2dq, RegMem::reg(xmm9), w_xmm8),
         "F3450F5BC1",
-        "cvttps2dq %xmm9, %xmm8",
+        "cvttps2dq %xmm8, %xmm9, %xmm8",
     ));
 
     // XMM_Mov_R_M: float stores
@@ -4347,17 +4366,17 @@ fn test_x64_emit() {
     insns.push((
         Inst::xmm_rmi_reg(SseOpcode::Psraw, RegMemImm::reg(xmm10), w_xmm1),
         "66410FE1CA",
-        "psraw   %xmm10, %xmm1",
+        "psraw   %xmm1, %xmm10, %xmm1",
     ));
     insns.push((
         Inst::xmm_rmi_reg(SseOpcode::Pslld, RegMemImm::imm(31), w_xmm1),
         "660F72F11F",
-        "pslld   $31, %xmm1",
+        "pslld   %xmm1, $31, %xmm1",
     ));
     insns.push((
         Inst::xmm_rmi_reg(SseOpcode::Psrlq, RegMemImm::imm(1), w_xmm3),
         "660F73D301",
-        "psrlq   $1, %xmm3",
+        "psrlq   %xmm3, $1, %xmm3",
     ));
 
     // ========================================================
@@ -4371,7 +4390,7 @@ fn test_x64_emit() {
             OperandSize::Size32,
         ),
         "660FC2CD02",
-        "cmppd   $2, %xmm5, %xmm1",
+        "cmppd   $2, %xmm1, %xmm5, %xmm1",
     ));
     insns.push((
         Inst::xmm_rm_r_imm(
@@ -4382,7 +4401,7 @@ fn test_x64_emit() {
             OperandSize::Size32,
         ),
         "410FC2FF00",
-        "cmpps   $0, %xmm15, %xmm7",
+        "cmpps   $0, %xmm7, %xmm15, %xmm7",
     ));
     insns.push((
         Inst::xmm_rm_r_imm(
@@ -4393,7 +4412,7 @@ fn test_x64_emit() {
             OperandSize::Size32,
         ),
         "66440F3A0FC903",
-        "palignr $3, %xmm1, %xmm9",
+        "palignr $3, %xmm9, %xmm1, %xmm9",
     ));
 
     insns.push((
@@ -4405,7 +4424,7 @@ fn test_x64_emit() {
             OperandSize::Size32,
         ),
         "440FC6D188",
-        "shufps  $136, %xmm1, %xmm10",
+        "shufps  $136, %xmm10, %xmm1, %xmm10",
     ));
 
     insns.push((
@@ -4477,7 +4496,7 @@ fn test_x64_emit() {
             dst_old: w_rax,
         },
         "F0410FB09C9241010000",
-        "lock cmpxchgb %bl, 321(%r10,%rdx,4)",
+        "lock cmpxchgb %bl, 321(%r10,%rdx,4), expected=%al, dst_old=%al",
     ));
     // Check redundant rex retention in 8-bit cases.
     insns.push((
@@ -4489,7 +4508,7 @@ fn test_x64_emit() {
             dst_old: w_rax,
         },
         "F00FB094F1C7CFFFFF",
-        "lock cmpxchgb %dl, -12345(%rcx,%rsi,8)",
+        "lock cmpxchgb %dl, -12345(%rcx,%rsi,8), expected=%al, dst_old=%al",
     ));
     insns.push((
         Inst::LockCmpxchg {
@@ -4500,7 +4519,7 @@ fn test_x64_emit() {
             dst_old: w_rax,
         },
         "F0400FB0B4F1C7CFFFFF",
-        "lock cmpxchgb %sil, -12345(%rcx,%rsi,8)",
+        "lock cmpxchgb %sil, -12345(%rcx,%rsi,8), expected=%al, dst_old=%al",
     ));
     insns.push((
         Inst::LockCmpxchg {
@@ -4511,7 +4530,7 @@ fn test_x64_emit() {
             dst_old: w_rax,
         },
         "F0440FB094F1C7CFFFFF",
-        "lock cmpxchgb %r10b, -12345(%rcx,%rsi,8)",
+        "lock cmpxchgb %r10b, -12345(%rcx,%rsi,8), expected=%al, dst_old=%al",
     ));
     insns.push((
         Inst::LockCmpxchg {
@@ -4522,7 +4541,7 @@ fn test_x64_emit() {
             dst_old: w_rax,
         },
         "F0440FB0BCF1C7CFFFFF",
-        "lock cmpxchgb %r15b, -12345(%rcx,%rsi,8)",
+        "lock cmpxchgb %r15b, -12345(%rcx,%rsi,8), expected=%al, dst_old=%al",
     ));
     // 16 bit cases
     insns.push((
@@ -4534,7 +4553,7 @@ fn test_x64_emit() {
             dst_old: w_rax,
         },
         "66F00FB1B4F1C7CFFFFF",
-        "lock cmpxchgw %si, -12345(%rcx,%rsi,8)",
+        "lock cmpxchgw %si, -12345(%rcx,%rsi,8), expected=%ax, dst_old=%ax",
     ));
     insns.push((
         Inst::LockCmpxchg {
@@ -4545,7 +4564,7 @@ fn test_x64_emit() {
             dst_old: w_rax,
         },
         "66F0440FB194F1C7CFFFFF",
-        "lock cmpxchgw %r10w, -12345(%rcx,%rsi,8)",
+        "lock cmpxchgw %r10w, -12345(%rcx,%rsi,8), expected=%ax, dst_old=%ax",
     ));
     // 32 bit cases
     insns.push((
@@ -4557,7 +4576,7 @@ fn test_x64_emit() {
             dst_old: w_rax,
         },
         "F00FB1B4F1C7CFFFFF",
-        "lock cmpxchgl %esi, -12345(%rcx,%rsi,8)",
+        "lock cmpxchgl %esi, -12345(%rcx,%rsi,8), expected=%eax, dst_old=%eax",
     ));
     insns.push((
         Inst::LockCmpxchg {
@@ -4568,7 +4587,7 @@ fn test_x64_emit() {
             dst_old: w_rax,
         },
         "F0440FB194F1C7CFFFFF",
-        "lock cmpxchgl %r10d, -12345(%rcx,%rsi,8)",
+        "lock cmpxchgl %r10d, -12345(%rcx,%rsi,8), expected=%eax, dst_old=%eax",
     ));
     // 64 bit cases
     insns.push((
@@ -4580,7 +4599,7 @@ fn test_x64_emit() {
             dst_old: w_rax,
         },
         "F0480FB1B4F1C7CFFFFF",
-        "lock cmpxchgq %rsi, -12345(%rcx,%rsi,8)",
+        "lock cmpxchgq %rsi, -12345(%rcx,%rsi,8), expected=%rax, dst_old=%rax",
     ));
     insns.push((
         Inst::LockCmpxchg {
@@ -4591,7 +4610,7 @@ fn test_x64_emit() {
             dst_old: w_rax,
         },
         "F04C0FB194F1C7CFFFFF",
-        "lock cmpxchgq %r10, -12345(%rcx,%rsi,8)",
+        "lock cmpxchgq %r10, -12345(%rcx,%rsi,8), expected=%rax, dst_old=%rax",
     ));
 
     // AtomicRmwSeq
@@ -4726,15 +4745,14 @@ fn test_x64_emit() {
     isa_flag_builder.enable("has_avx512vl").unwrap();
     let isa_flags = x64::settings::Flags::new(&flags, isa_flag_builder);
 
-    let rru = regs::create_reg_universe_systemv(&flags);
     let emit_info = EmitInfo::new(flags, isa_flags);
     for (insn, expected_encoding, expected_printing) in insns {
         // Check the printed text is as expected.
-        let actual_printing = insn.show_rru(Some(&rru));
+        let actual_printing = insn.pretty_print_inst(&[], &mut Default::default());
         assert_eq!(expected_printing, actual_printing);
         let mut buffer = MachBuffer::new();
 
-        insn.emit(&mut buffer, &emit_info, &mut Default::default());
+        insn.emit(&[], &mut buffer, &emit_info, &mut Default::default());
 
         // Allow one label just after the instruction (so the offset is 0).
         let label = buffer.get_label();
diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs
index 9bf0fe2f04..1dd8d18ad6 100644
--- a/cranelift/codegen/src/isa/x64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/x64/inst/mod.rs
@@ -1,17 +1,15 @@
 //! This module defines x86_64-specific machine instruction types.
 
 use crate::binemit::{Addend, CodeOffset, Reloc, StackMap};
-use crate::ir::{types, ExternalName, Opcode, SourceLoc, TrapCode, Type, ValueLabel};
+use crate::ir::{types, ExternalName, Opcode, SourceLoc, TrapCode, Type};
 use crate::isa::x64::abi::X64ABIMachineSpec;
+use crate::isa::x64::inst::regs::pretty_print_reg;
 use crate::isa::x64::settings as x64_settings;
 use crate::isa::CallConv;
 use crate::machinst::*;
 use crate::{settings, CodegenError, CodegenResult};
 use alloc::vec::Vec;
-use regalloc::{
-    PrettyPrint, PrettyPrintSized, RealRegUniverse, Reg, RegClass, RegUsageCollector, SpillSlot,
-    VirtualReg, Writable,
-};
+use regalloc2::{Allocation, VReg};
 use smallvec::{smallvec, SmallVec};
 use std::fmt;
 use std::string::{String, ToString};
@@ -24,7 +22,6 @@ pub mod regs;
 pub mod unwind;
 
 use args::*;
-use regs::show_ireg_sized;
 
 //=============================================================================
 // Instructions (top level): definition
@@ -80,7 +77,7 @@ impl Inst {
             | Inst::Nop { .. }
             | Inst::Pop64 { .. }
             | Inst::Push64 { .. }
-            | Inst::Ret
+            | Inst::Ret { .. }
             | Inst::Setcc { .. }
             | Inst::ShiftR { .. }
             | Inst::SignExtendData { .. }
@@ -94,8 +91,8 @@ impl Inst {
             | Inst::XmmUninitializedValue { .. }
             | Inst::ElfTlsGetAddr { .. }
             | Inst::MachOTlsGetAddr { .. }
-            | Inst::ValueLabelMarker { .. }
-            | Inst::Unwind { .. } => smallvec![],
+            | Inst::Unwind { .. }
+            | Inst::DummyUse { .. } => smallvec![],
 
             Inst::UnaryRmR { op, .. } => op.available_from(),
 
@@ -128,8 +125,8 @@ impl Inst {
         dst: Writable<Reg>,
     ) -> Self {
         debug_assert!(size.is_one_of(&[OperandSize::Size32, OperandSize::Size64]));
-        src.assert_regclass_is(RegClass::I64);
-        debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+        src.assert_regclass_is(RegClass::Int);
+        debug_assert!(dst.to_reg().class() == RegClass::Int);
         Self::AluRmiR {
             size,
             op,
@@ -146,8 +143,8 @@ impl Inst {
         src: RegMem,
         dst: Writable<Reg>,
     ) -> Self {
-        src.assert_regclass_is(RegClass::I64);
-        debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+        src.assert_regclass_is(RegClass::Int);
+        debug_assert!(dst.to_reg().class() == RegClass::Int);
         debug_assert!(size.is_one_of(&[
             OperandSize::Size16,
             OperandSize::Size32,
@@ -162,7 +159,7 @@ impl Inst {
     }
 
     pub(crate) fn not(size: OperandSize, src: Writable<Reg>) -> Inst {
-        debug_assert_eq!(src.to_reg().get_class(), RegClass::I64);
+        debug_assert_eq!(src.to_reg().class(), RegClass::Int);
         Inst::Not {
             size,
             src: Gpr::new(src.to_reg()).unwrap(),
@@ -171,12 +168,13 @@ impl Inst {
     }
 
     pub(crate) fn div(size: OperandSize, signed: bool, divisor: RegMem) -> Inst {
-        divisor.assert_regclass_is(RegClass::I64);
+        divisor.assert_regclass_is(RegClass::Int);
         Inst::Div {
             size,
             signed,
             divisor: GprMem::new(divisor).unwrap(),
-            dividend: Gpr::new(regs::rax()).unwrap(),
+            dividend_lo: Gpr::new(regs::rax()).unwrap(),
+            dividend_hi: Gpr::new(regs::rdx()).unwrap(),
             dst_quotient: WritableGpr::from_reg(Gpr::new(regs::rax()).unwrap()),
             dst_remainder: Writable::from_reg(Gpr::new(regs::rdx()).unwrap()),
         }
@@ -188,7 +186,7 @@ impl Inst {
             OperandSize::Size32,
             OperandSize::Size64
         ]));
-        rhs.assert_regclass_is(RegClass::I64);
+        rhs.assert_regclass_is(RegClass::Int);
         Inst::MulHi {
             size,
             signed,
@@ -205,15 +203,16 @@ impl Inst {
         divisor: Writable<Reg>,
         tmp: Option<Writable<Reg>>,
     ) -> Inst {
-        debug_assert!(divisor.to_reg().get_class() == RegClass::I64);
+        debug_assert!(divisor.to_reg().class() == RegClass::Int);
         debug_assert!(tmp
-            .map(|tmp| tmp.to_reg().get_class() == RegClass::I64)
+            .map(|tmp| tmp.to_reg().class() == RegClass::Int)
             .unwrap_or(true));
         Inst::CheckedDivOrRemSeq {
             kind,
             size,
             divisor: WritableGpr::from_writable_reg(divisor).unwrap(),
-            dividend: Gpr::new(regs::rax()).unwrap(),
+            dividend_lo: Gpr::new(regs::rax()).unwrap(),
+            dividend_hi: Gpr::new(regs::rdx()).unwrap(),
             dst_quotient: Writable::from_reg(Gpr::new(regs::rax()).unwrap()),
             dst_remainder: Writable::from_reg(Gpr::new(regs::rdx()).unwrap()),
             tmp: tmp.map(|tmp| WritableGpr::from_writable_reg(tmp).unwrap()),
@@ -230,7 +229,7 @@ impl Inst {
 
     pub(crate) fn imm(dst_size: OperandSize, simm64: u64, dst: Writable<Reg>) -> Inst {
         debug_assert!(dst_size.is_one_of(&[OperandSize::Size32, OperandSize::Size64]));
-        debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+        debug_assert!(dst.to_reg().class() == RegClass::Int);
         // Try to generate a 32-bit immediate when the upper high bits are zeroed (which matches
         // the semantics of movl).
         let dst_size = match dst_size {
@@ -246,8 +245,8 @@ impl Inst {
 
     pub(crate) fn mov_r_r(size: OperandSize, src: Reg, dst: Writable<Reg>) -> Inst {
         debug_assert!(size.is_one_of(&[OperandSize::Size32, OperandSize::Size64]));
-        debug_assert!(src.get_class() == RegClass::I64);
-        debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+        debug_assert!(src.class() == RegClass::Int);
+        debug_assert!(dst.to_reg().class() == RegClass::Int);
         let src = Gpr::new(src).unwrap();
         let dst = WritableGpr::from_writable_reg(dst).unwrap();
         Inst::MovRR { size, src, dst }
@@ -255,8 +254,8 @@ impl Inst {
 
     // TODO Can be replaced by `Inst::move` (high-level) and `Inst::unary_rm_r` (low-level)
     pub(crate) fn xmm_mov(op: SseOpcode, src: RegMem, dst: Writable<Reg>) -> Inst {
-        src.assert_regclass_is(RegClass::V128);
-        debug_assert!(dst.to_reg().get_class() == RegClass::V128);
+        src.assert_regclass_is(RegClass::Float);
+        debug_assert!(dst.to_reg().class() == RegClass::Float);
         Inst::XmmUnaryRmR {
             op,
             src: XmmMem::new(src).unwrap(),
@@ -265,15 +264,15 @@ impl Inst {
     }
 
     pub(crate) fn xmm_load_const(src: VCodeConstant, dst: Writable<Reg>, ty: Type) -> Inst {
-        debug_assert!(dst.to_reg().get_class() == RegClass::V128);
+        debug_assert!(dst.to_reg().class() == RegClass::Float);
         debug_assert!(ty.is_vector() && ty.bits() == 128);
         Inst::XmmLoadConst { src, dst, ty }
     }
 
     /// Convenient helper for unary float operations.
     pub(crate) fn xmm_unary_rm_r(op: SseOpcode, src: RegMem, dst: Writable<Reg>) -> Inst {
-        src.assert_regclass_is(RegClass::V128);
-        debug_assert!(dst.to_reg().get_class() == RegClass::V128);
+        src.assert_regclass_is(RegClass::Float);
+        debug_assert!(dst.to_reg().class() == RegClass::Float);
         Inst::XmmUnaryRmR {
             op,
             src: XmmMem::new(src).unwrap(),
@@ -282,8 +281,8 @@ impl Inst {
     }
 
     pub(crate) fn xmm_unary_rm_r_evex(op: Avx512Opcode, src: RegMem, dst: Writable<Reg>) -> Inst {
-        src.assert_regclass_is(RegClass::V128);
-        debug_assert!(dst.to_reg().get_class() == RegClass::V128);
+        src.assert_regclass_is(RegClass::Float);
+        debug_assert!(dst.to_reg().class() == RegClass::Float);
         Inst::XmmUnaryRmREvex {
             op,
             src: XmmMem::new(src).unwrap(),
@@ -292,8 +291,8 @@ impl Inst {
     }
 
     pub(crate) fn xmm_rm_r(op: SseOpcode, src: RegMem, dst: Writable<Reg>) -> Self {
-        src.assert_regclass_is(RegClass::V128);
-        debug_assert!(dst.to_reg().get_class() == RegClass::V128);
+        src.assert_regclass_is(RegClass::Float);
+        debug_assert!(dst.to_reg().class() == RegClass::Float);
         Inst::XmmRmR {
             op,
             src1: Xmm::new(dst.to_reg()).unwrap(),
@@ -308,9 +307,9 @@ impl Inst {
         src2: Reg,
         dst: Writable<Reg>,
     ) -> Self {
-        src1.assert_regclass_is(RegClass::V128);
-        debug_assert!(src2.get_class() == RegClass::V128);
-        debug_assert!(dst.to_reg().get_class() == RegClass::V128);
+        src1.assert_regclass_is(RegClass::Float);
+        debug_assert!(src2.class() == RegClass::Float);
+        debug_assert!(dst.to_reg().class() == RegClass::Float);
         Inst::XmmRmREvex {
             op,
             src1: XmmMem::new(src1).unwrap(),
@@ -320,14 +319,14 @@ impl Inst {
     }
 
     pub(crate) fn xmm_uninit_value(dst: Writable<Reg>) -> Self {
-        debug_assert!(dst.to_reg().get_class() == RegClass::V128);
+        debug_assert!(dst.to_reg().class() == RegClass::Float);
         Inst::XmmUninitializedValue {
             dst: WritableXmm::from_writable_reg(dst).unwrap(),
         }
     }
 
     pub(crate) fn xmm_mov_r_m(op: SseOpcode, src: Reg, dst: impl Into<SyntheticAmode>) -> Inst {
-        debug_assert!(src.get_class() == RegClass::V128);
+        debug_assert!(src.class() == RegClass::Float);
         Inst::XmmMovRM {
             op,
             src,
@@ -341,8 +340,8 @@ impl Inst {
         dst: Writable<Reg>,
         dst_size: OperandSize,
     ) -> Inst {
-        debug_assert!(src.get_class() == RegClass::V128);
-        debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+        debug_assert!(src.class() == RegClass::Float);
+        debug_assert!(dst.to_reg().class() == RegClass::Int);
         debug_assert!(dst_size.is_one_of(&[OperandSize::Size32, OperandSize::Size64]));
         Inst::XmmToGpr {
             op,
@@ -358,9 +357,9 @@ impl Inst {
         src_size: OperandSize,
         dst: Writable<Reg>,
     ) -> Inst {
-        src.assert_regclass_is(RegClass::I64);
+        src.assert_regclass_is(RegClass::Int);
         debug_assert!(src_size.is_one_of(&[OperandSize::Size32, OperandSize::Size64]));
-        debug_assert!(dst.to_reg().get_class() == RegClass::V128);
+        debug_assert!(dst.to_reg().class() == RegClass::Float);
         Inst::GprToXmm {
             op,
             src: GprMem::new(src).unwrap(),
@@ -370,8 +369,8 @@ impl Inst {
     }
 
     pub(crate) fn xmm_cmp_rm_r(op: SseOpcode, src: RegMem, dst: Reg) -> Inst {
-        src.assert_regclass_is(RegClass::V128);
-        debug_assert!(dst.get_class() == RegClass::V128);
+        src.assert_regclass_is(RegClass::Float);
+        debug_assert!(dst.class() == RegClass::Float);
         let src = XmmMem::new(src).unwrap();
         let dst = Xmm::new(dst).unwrap();
         Inst::XmmCmpRmR { op, src, dst }
@@ -385,10 +384,10 @@ impl Inst {
         dst: Writable<Reg>,
     ) -> Inst {
         debug_assert!(dst_size.is_one_of(&[OperandSize::Size32, OperandSize::Size64]));
-        debug_assert!(src.to_reg().get_class() == RegClass::I64);
-        debug_assert!(tmp_gpr1.to_reg().get_class() == RegClass::I64);
-        debug_assert!(tmp_gpr2.to_reg().get_class() == RegClass::I64);
-        debug_assert!(dst.to_reg().get_class() == RegClass::V128);
+        debug_assert!(src.to_reg().class() == RegClass::Int);
+        debug_assert!(tmp_gpr1.to_reg().class() == RegClass::Int);
+        debug_assert!(tmp_gpr2.to_reg().class() == RegClass::Int);
+        debug_assert!(dst.to_reg().class() == RegClass::Float);
         Inst::CvtUint64ToFloatSeq {
             src: WritableGpr::from_writable_reg(src).unwrap(),
             dst: WritableXmm::from_writable_reg(dst).unwrap(),
@@ -409,10 +408,10 @@ impl Inst {
     ) -> Inst {
         debug_assert!(src_size.is_one_of(&[OperandSize::Size32, OperandSize::Size64]));
         debug_assert!(dst_size.is_one_of(&[OperandSize::Size32, OperandSize::Size64]));
-        debug_assert!(src.to_reg().get_class() == RegClass::V128);
-        debug_assert!(tmp_xmm.to_reg().get_class() == RegClass::V128);
-        debug_assert!(tmp_gpr.to_reg().get_class() == RegClass::I64);
-        debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+        debug_assert!(src.to_reg().class() == RegClass::Float);
+        debug_assert!(tmp_xmm.to_reg().class() == RegClass::Float);
+        debug_assert!(tmp_gpr.to_reg().class() == RegClass::Int);
+        debug_assert!(dst.to_reg().class() == RegClass::Int);
         Inst::CvtFloatToSintSeq {
             src_size,
             dst_size,
@@ -435,10 +434,10 @@ impl Inst {
     ) -> Inst {
         debug_assert!(src_size.is_one_of(&[OperandSize::Size32, OperandSize::Size64]));
         debug_assert!(dst_size.is_one_of(&[OperandSize::Size32, OperandSize::Size64]));
-        debug_assert!(src.to_reg().get_class() == RegClass::V128);
-        debug_assert!(tmp_xmm.to_reg().get_class() == RegClass::V128);
-        debug_assert!(tmp_gpr.to_reg().get_class() == RegClass::I64);
-        debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+        debug_assert!(src.to_reg().class() == RegClass::Float);
+        debug_assert!(tmp_xmm.to_reg().class() == RegClass::Float);
+        debug_assert!(tmp_gpr.to_reg().class() == RegClass::Int);
+        debug_assert!(dst.to_reg().class() == RegClass::Int);
         Inst::CvtFloatToUintSeq {
             src_size,
             dst_size,
@@ -459,9 +458,9 @@ impl Inst {
         dst: Writable<Reg>,
     ) -> Inst {
         debug_assert!(size.is_one_of(&[OperandSize::Size32, OperandSize::Size64]));
-        debug_assert_eq!(lhs.get_class(), RegClass::V128);
-        debug_assert_eq!(rhs.get_class(), RegClass::V128);
-        debug_assert_eq!(dst.to_reg().get_class(), RegClass::V128);
+        debug_assert_eq!(lhs.class(), RegClass::Float);
+        debug_assert_eq!(rhs.class(), RegClass::Float);
+        debug_assert_eq!(dst.to_reg().class(), RegClass::Float);
         Inst::XmmMinMaxSeq {
             size,
             is_min,
@@ -490,16 +489,16 @@ impl Inst {
     }
 
     pub(crate) fn movzx_rm_r(ext_mode: ExtMode, src: RegMem, dst: Writable<Reg>) -> Inst {
-        src.assert_regclass_is(RegClass::I64);
-        debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+        src.assert_regclass_is(RegClass::Int);
+        debug_assert!(dst.to_reg().class() == RegClass::Int);
         let src = GprMem::new(src).unwrap();
         let dst = WritableGpr::from_writable_reg(dst).unwrap();
         Inst::MovzxRmR { ext_mode, src, dst }
     }
 
     pub(crate) fn xmm_rmi_reg(opcode: SseOpcode, src: RegMemImm, dst: Writable<Reg>) -> Inst {
-        src.assert_regclass_is(RegClass::V128);
-        debug_assert!(dst.to_reg().get_class() == RegClass::V128);
+        src.assert_regclass_is(RegClass::Float);
+        debug_assert!(dst.to_reg().class() == RegClass::Float);
         Inst::XmmRmiReg {
             opcode,
             src1: Xmm::new(dst.to_reg()).unwrap(),
@@ -509,15 +508,15 @@ impl Inst {
     }
 
     pub(crate) fn movsx_rm_r(ext_mode: ExtMode, src: RegMem, dst: Writable<Reg>) -> Inst {
-        src.assert_regclass_is(RegClass::I64);
-        debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+        src.assert_regclass_is(RegClass::Int);
+        debug_assert!(dst.to_reg().class() == RegClass::Int);
         let src = GprMem::new(src).unwrap();
         let dst = WritableGpr::from_writable_reg(dst).unwrap();
         Inst::MovsxRmR { ext_mode, src, dst }
     }
 
     pub(crate) fn mov64_m_r(src: impl Into<SyntheticAmode>, dst: Writable<Reg>) -> Inst {
-        debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+        debug_assert!(dst.to_reg().class() == RegClass::Int);
         Inst::Mov64MR {
             src: src.into(),
             dst: WritableGpr::from_writable_reg(dst).unwrap(),
@@ -526,7 +525,7 @@ impl Inst {
 
     /// A convenience function to be able to use a RegMem as the source of a move.
     pub(crate) fn mov64_rm_r(src: RegMem, dst: Writable<Reg>) -> Inst {
-        src.assert_regclass_is(RegClass::I64);
+        src.assert_regclass_is(RegClass::Int);
         match src {
             RegMem::Reg { reg } => Self::mov_r_r(OperandSize::Size64, reg, dst),
             RegMem::Mem { addr } => Self::mov64_m_r(addr, dst),
@@ -534,7 +533,7 @@ impl Inst {
     }
 
     pub(crate) fn mov_r_m(size: OperandSize, src: Reg, dst: impl Into<SyntheticAmode>) -> Inst {
-        debug_assert!(src.get_class() == RegClass::I64);
+        debug_assert!(src.class() == RegClass::Int);
         Inst::MovRM {
             size,
             src: Gpr::new(src).unwrap(),
@@ -543,7 +542,7 @@ impl Inst {
     }
 
     pub(crate) fn lea(addr: impl Into<SyntheticAmode>, dst: Writable<Reg>) -> Inst {
-        debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+        debug_assert!(dst.to_reg().class() == RegClass::Int);
         Inst::LoadEffectiveAddress {
             addr: addr.into(),
             dst: WritableGpr::from_writable_reg(dst).unwrap(),
@@ -561,7 +560,7 @@ impl Inst {
         } else {
             true
         });
-        debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+        debug_assert!(dst.to_reg().class() == RegClass::Int);
         Inst::ShiftR {
             size,
             kind,
@@ -578,8 +577,8 @@ impl Inst {
     /// Does a comparison of dst - src for operands of size `size`, as stated by the machine
     /// instruction semantics. Be careful with the order of parameters!
     pub(crate) fn cmp_rmi_r(size: OperandSize, src: RegMemImm, dst: Reg) -> Inst {
-        src.assert_regclass_is(RegClass::I64);
-        debug_assert_eq!(dst.get_class(), RegClass::I64);
+        src.assert_regclass_is(RegClass::Int);
+        debug_assert_eq!(dst.class(), RegClass::Int);
         Inst::CmpRmiR {
             size,
             src: GprMemImm::new(src).unwrap(),
@@ -590,8 +589,8 @@ impl Inst {
 
     /// Does a comparison of dst & src for operands of size `size`.
     pub(crate) fn test_rmi_r(size: OperandSize, src: RegMemImm, dst: Reg) -> Inst {
-        src.assert_regclass_is(RegClass::I64);
-        debug_assert_eq!(dst.get_class(), RegClass::I64);
+        src.assert_regclass_is(RegClass::Int);
+        debug_assert_eq!(dst.class(), RegClass::Int);
         Inst::CmpRmiR {
             size,
             src: GprMemImm::new(src).unwrap(),
@@ -607,7 +606,7 @@ impl Inst {
     }
 
     pub(crate) fn setcc(cc: CC, dst: Writable<Reg>) -> Inst {
-        debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+        debug_assert!(dst.to_reg().class() == RegClass::Int);
         let dst = WritableGpr::from_writable_reg(dst).unwrap();
         Inst::Setcc { cc, dst }
     }
@@ -618,7 +617,7 @@ impl Inst {
             OperandSize::Size32,
             OperandSize::Size64
         ]));
-        debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+        debug_assert!(dst.to_reg().class() == RegClass::Int);
         Inst::Cmove {
             size,
             cc,
@@ -630,8 +629,8 @@ impl Inst {
 
     pub(crate) fn xmm_cmove(size: OperandSize, cc: CC, src: RegMem, dst: Writable<Reg>) -> Inst {
         debug_assert!(size.is_one_of(&[OperandSize::Size32, OperandSize::Size64]));
-        src.assert_regclass_is(RegClass::V128);
-        debug_assert!(dst.to_reg().get_class() == RegClass::V128);
+        src.assert_regclass_is(RegClass::Float);
+        debug_assert!(dst.to_reg().class() == RegClass::Float);
         let src = XmmMem::new(src).unwrap();
         let dst = WritableXmm::from_writable_reg(dst).unwrap();
         Inst::XmmCmove {
@@ -644,13 +643,13 @@ impl Inst {
     }
 
     pub(crate) fn push64(src: RegMemImm) -> Inst {
-        src.assert_regclass_is(RegClass::I64);
+        src.assert_regclass_is(RegClass::Int);
         let src = GprMemImm::new(src).unwrap();
         Inst::Push64 { src }
     }
 
     pub(crate) fn pop64(dst: Writable<Reg>) -> Inst {
-        debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+        debug_assert!(dst.to_reg().class() == RegClass::Int);
         let dst = WritableGpr::from_writable_reg(dst).unwrap();
         Inst::Pop64 { dst }
     }
@@ -675,7 +674,7 @@ impl Inst {
         defs: Vec<Writable<Reg>>,
         opcode: Opcode,
     ) -> Inst {
-        dest.assert_regclass_is(RegClass::I64);
+        dest.assert_regclass_is(RegClass::Int);
         Inst::CallUnknown {
             dest,
             uses,
@@ -684,8 +683,8 @@ impl Inst {
         }
     }
 
-    pub(crate) fn ret() -> Inst {
-        Inst::Ret
+    pub(crate) fn ret(rets: Vec<Reg>) -> Inst {
+        Inst::Ret { rets }
     }
 
     pub(crate) fn epilogue_placeholder() -> Inst {
@@ -709,7 +708,7 @@ impl Inst {
     }
 
     pub(crate) fn jmp_unknown(target: RegMem) -> Inst {
-        target.assert_regclass_is(RegClass::I64);
+        target.assert_regclass_is(RegClass::Int);
         Inst::JmpUnknown { target }
     }
 
@@ -726,9 +725,9 @@ impl Inst {
         to_reg: Writable<Reg>,
         ext_kind: ExtKind,
     ) -> Inst {
-        let rc = to_reg.to_reg().get_class();
+        let rc = to_reg.to_reg().class();
         match rc {
-            RegClass::I64 => {
+            RegClass::Int => {
                 let ext_mode = match ty.bytes() {
                     1 => Some(ExtMode::BQ),
                     2 => Some(ExtMode::WQ),
@@ -755,7 +754,7 @@ impl Inst {
                     Inst::mov64_m_r(from_addr, to_reg)
                 }
             }
-            RegClass::V128 => {
+            RegClass::Float => {
                 let opcode = match ty {
                     types::F32 => SseOpcode::Movss,
                     types::F64 => SseOpcode::Movsd,
@@ -766,16 +765,15 @@ impl Inst {
                 };
                 Inst::xmm_unary_rm_r(opcode, RegMem::mem(from_addr), to_reg)
             }
-            _ => panic!("unable to generate load for register class: {:?}", rc),
         }
     }
 
     /// Choose which instruction to use for storing a register value to memory.
     pub(crate) fn store(ty: Type, from_reg: Reg, to_addr: impl Into<SyntheticAmode>) -> Inst {
-        let rc = from_reg.get_class();
+        let rc = from_reg.class();
         match rc {
-            RegClass::I64 => Inst::mov_r_m(OperandSize::from_ty(ty), from_reg, to_addr),
-            RegClass::V128 => {
+            RegClass::Int => Inst::mov_r_m(OperandSize::from_ty(ty), from_reg, to_addr),
+            RegClass::Float => {
                 let opcode = match ty {
                     types::F32 => SseOpcode::Movss,
                     types::F64 => SseOpcode::Movsd,
@@ -786,7 +784,6 @@ impl Inst {
                 };
                 Inst::xmm_mov_r_m(opcode, from_reg, to_addr)
             }
-            _ => panic!("unable to generate store for register class: {:?}", rc),
         }
     }
 }
@@ -831,342 +828,13 @@ impl Inst {
             _ => false,
         }
     }
-
-    /// Translate three-operand instructions into a sequence of two-operand
-    /// instructions.
-    ///
-    /// For example:
-    ///
-    /// ```text
-    /// x = add a, b
-    /// ```
-    ///
-    /// Becomes:
-    ///
-    /// ```text
-    /// mov x, a
-    /// add x, b
-    /// ```
-    ///
-    /// The three-operand form for instructions allows our ISLE DSL code to have
-    /// a value-based, SSA view of the world. This method is responsible for
-    /// undoing that.
-    ///
-    /// Note that register allocation cleans up most of these inserted `mov`s
-    /// with its move coalescing.
-    pub(crate) fn mov_mitosis(mut self) -> impl Iterator<Item = Self> {
-        log::trace!("mov_mitosis({:?})", self);
-
-        let mut insts = SmallVec::<[Self; 4]>::new();
-
-        match &mut self {
-            Inst::AluRmiR { src1, dst, .. } => {
-                if *src1 != dst.to_reg() {
-                    debug_assert!(src1.is_virtual());
-                    insts.push(Self::gen_move(
-                        dst.to_writable_reg(),
-                        src1.to_reg(),
-                        types::I64,
-                    ));
-                    *src1 = dst.to_reg();
-                }
-                insts.push(self);
-            }
-            Inst::XmmRmiReg { src1, dst, .. } => {
-                if *src1 != dst.to_reg() {
-                    debug_assert!(src1.is_virtual());
-                    insts.push(Self::gen_move(
-                        dst.to_writable_reg(),
-                        src1.to_reg(),
-                        types::I8X16,
-                    ));
-                    *src1 = dst.to_reg();
-                }
-                insts.push(self);
-            }
-            Inst::XmmRmR { src1, dst, .. } => {
-                if *src1 != dst.to_reg() {
-                    debug_assert!(src1.is_virtual());
-                    insts.push(Self::gen_move(
-                        dst.to_writable_reg(),
-                        src1.to_reg(),
-                        types::I8X16,
-                    ));
-                    *src1 = dst.to_reg();
-                }
-                insts.push(self);
-            }
-            Inst::XmmRmRImm { src1, dst, .. } => {
-                if *src1 != dst.to_reg() {
-                    debug_assert!(src1.is_virtual());
-                    insts.push(Self::gen_move(*dst, *src1, types::I8X16));
-                    *src1 = dst.to_reg();
-                }
-                insts.push(self);
-            }
-            Inst::XmmMinMaxSeq { rhs, dst, .. } => {
-                if *rhs != dst.to_reg() {
-                    debug_assert!(rhs.is_virtual());
-                    insts.push(Self::gen_move(
-                        dst.to_writable_reg(),
-                        rhs.to_reg(),
-                        types::I8X16,
-                    ));
-                    *rhs = dst.to_reg();
-                }
-                insts.push(self);
-            }
-            Inst::Cmove {
-                size,
-                alternative,
-                dst,
-                ..
-            } => {
-                if *alternative != dst.to_reg() {
-                    debug_assert!(alternative.is_virtual());
-                    insts.push(Self::mov_r_r(
-                        *size,
-                        alternative.to_reg(),
-                        dst.to_writable_reg(),
-                    ));
-                    *alternative = dst.to_reg();
-                }
-                insts.push(self);
-            }
-            Inst::XmmCmove {
-                alternative, dst, ..
-            } => {
-                if *alternative != dst.to_reg() {
-                    debug_assert!(alternative.is_virtual());
-                    insts.push(Self::gen_move(
-                        dst.to_writable_reg(),
-                        alternative.to_reg(),
-                        types::F32X4,
-                    ));
-                    *alternative = dst.to_reg();
-                }
-                insts.push(self);
-            }
-            Inst::Not { src, dst, .. } | Inst::Neg { src, dst, .. } => {
-                if *src != dst.to_reg() {
-                    debug_assert!(src.is_virtual());
-                    insts.push(Self::gen_move(
-                        dst.to_writable_reg(),
-                        src.to_reg(),
-                        types::I64,
-                    ));
-                    *src = dst.to_reg();
-                }
-                insts.push(self);
-            }
-            Inst::Div {
-                dividend,
-                dst_quotient,
-                dst_remainder,
-                ..
-            }
-            | Inst::CheckedDivOrRemSeq {
-                dividend,
-                dst_quotient,
-                dst_remainder,
-                ..
-            } => {
-                if *dividend != regs::rax() {
-                    debug_assert!(dividend.is_virtual());
-                    insts.push(Self::gen_move(
-                        Writable::from_reg(regs::rax()),
-                        dividend.to_reg(),
-                        types::I64,
-                    ));
-                    *dividend = Gpr::new(regs::rax()).unwrap();
-                }
-                let mut quotient_mov = None;
-                if dst_quotient.to_reg() != regs::rax() {
-                    debug_assert!(dst_quotient.to_reg().is_virtual());
-                    quotient_mov = Some(Self::gen_move(
-                        dst_quotient.to_writable_reg(),
-                        regs::rax(),
-                        types::I64,
-                    ));
-                    *dst_quotient = Writable::from_reg(Gpr::new(regs::rax()).unwrap());
-                }
-                let mut remainder_mov = None;
-                if dst_remainder.to_reg() != regs::rdx() {
-                    debug_assert!(dst_remainder.to_reg().is_virtual());
-                    remainder_mov = Some(Self::gen_move(
-                        dst_remainder.to_writable_reg(),
-                        regs::rdx(),
-                        types::I64,
-                    ));
-                    *dst_remainder = Writable::from_reg(Gpr::new(regs::rdx()).unwrap());
-                }
-                insts.push(self);
-                insts.extend(quotient_mov);
-                insts.extend(remainder_mov);
-            }
-            Inst::MulHi {
-                src1,
-                dst_lo,
-                dst_hi,
-                ..
-            } => {
-                if *src1 != regs::rax() {
-                    debug_assert!(src1.is_virtual());
-                    insts.push(Self::gen_move(
-                        Writable::from_reg(regs::rax()),
-                        src1.to_reg(),
-                        types::I64,
-                    ));
-                    *src1 = Gpr::new(regs::rax()).unwrap();
-                }
-                let mut dst_lo_mov = None;
-                if dst_lo.to_reg() != regs::rax() {
-                    debug_assert!(dst_lo.to_reg().is_virtual());
-                    dst_lo_mov = Some(Self::gen_move(
-                        dst_lo.to_writable_reg(),
-                        regs::rax(),
-                        types::I64,
-                    ));
-                    *dst_lo = Writable::from_reg(Gpr::new(regs::rax()).unwrap());
-                }
-                let mut dst_hi_mov = None;
-                if dst_hi.to_reg() != regs::rdx() {
-                    debug_assert!(dst_hi.to_reg().is_virtual());
-                    dst_hi_mov = Some(Self::gen_move(
-                        dst_hi.to_writable_reg(),
-                        regs::rdx(),
-                        types::I64,
-                    ));
-                    *dst_hi = Writable::from_reg(Gpr::new(regs::rdx()).unwrap());
-                }
-                insts.push(self);
-                insts.extend(dst_lo_mov);
-                insts.extend(dst_hi_mov);
-            }
-            Inst::SignExtendData { src, dst, .. } => {
-                if *src != regs::rax() {
-                    debug_assert!(src.is_virtual());
-                    insts.push(Self::gen_move(
-                        Writable::from_reg(regs::rax()),
-                        src.to_reg(),
-                        types::I64,
-                    ));
-                    *src = Gpr::new(regs::rax()).unwrap();
-                }
-                let mut dst_mov = None;
-                if dst.to_reg() != regs::rax() {
-                    debug_assert!(dst.to_reg().is_virtual());
-                    dst_mov = Some(Self::gen_move(
-                        dst.to_writable_reg(),
-                        dst.to_reg().to_reg(),
-                        types::I64,
-                    ));
-                    *dst = Writable::from_reg(Gpr::new(regs::rax()).unwrap());
-                }
-                insts.push(self);
-                insts.extend(dst_mov);
-            }
-            Inst::ShiftR {
-                src, num_bits, dst, ..
-            } => {
-                if *src != dst.to_reg() {
-                    debug_assert!(src.is_virtual());
-                    insts.push(Self::gen_move(
-                        dst.to_writable_reg(),
-                        src.to_reg(),
-                        types::I64,
-                    ));
-                    *src = dst.to_reg();
-                }
-                if let Imm8Reg::Reg { reg } = num_bits.clone().to_imm8_reg() {
-                    if reg != regs::rcx() {
-                        debug_assert!(reg.is_virtual());
-                        insts.push(Self::gen_move(
-                            Writable::from_reg(regs::rcx()),
-                            reg,
-                            types::I64,
-                        ));
-                        *num_bits = Imm8Gpr::new(Imm8Reg::Reg { reg: regs::rcx() }).unwrap();
-                    }
-                }
-                insts.push(self);
-            }
-            Inst::LockCmpxchg {
-                ty,
-                expected,
-                dst_old,
-                ..
-            } => {
-                if *expected != regs::rax() {
-                    debug_assert!(expected.is_virtual());
-                    insts.push(Self::gen_move(
-                        Writable::from_reg(regs::rax()),
-                        *expected,
-                        *ty,
-                    ));
-                }
-                let mut dst_old_mov = None;
-                if dst_old.to_reg() != regs::rax() {
-                    debug_assert!(dst_old.to_reg().is_virtual());
-                    dst_old_mov = Some(Self::gen_move(*dst_old, regs::rax(), *ty));
-                    *dst_old = Writable::from_reg(regs::rax());
-                }
-                insts.push(self);
-                insts.extend(dst_old_mov);
-            }
-            Inst::AtomicRmwSeq {
-                ty,
-                address,
-                operand,
-                dst_old,
-                ..
-            } => {
-                if *address != regs::r9() {
-                    debug_assert!(address.is_virtual());
-                    insts.push(Self::gen_move(
-                        Writable::from_reg(regs::r9()),
-                        *address,
-                        types::I64,
-                    ));
-                    *address = regs::r9();
-                }
-                if *operand != regs::r10() {
-                    debug_assert!(operand.is_virtual());
-                    insts.push(Self::gen_move(
-                        Writable::from_reg(regs::r10()),
-                        *operand,
-                        *ty,
-                    ));
-                    *address = regs::r10();
-                }
-                let mut dst_old_mov = None;
-                if dst_old.to_reg() != regs::rax() {
-                    debug_assert!(dst_old.to_reg().is_virtual());
-                    dst_old_mov = Some(Self::gen_move(*dst_old, regs::rax(), *ty));
-                    *dst_old = Writable::from_reg(regs::rax());
-                }
-                insts.push(self);
-                insts.extend(dst_old_mov);
-            }
-            // No other instruction needs 3-operand to 2-operand legalization.
-            _ => insts.push(self),
-        }
-
-        if log::log_enabled!(log::Level::Trace) {
-            for inst in &insts {
-                log::trace!("  -> {:?}", inst);
-            }
-        }
-
-        insts.into_iter()
-    }
 }
 
 //=============================================================================
 // Instructions: printing
 
 impl PrettyPrint for Inst {
-    fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
+    fn pretty_print(&self, _size: u8, allocs: &mut AllocationConsumer<'_>) -> String {
         fn ljustify(s: String) -> String {
             let w = 7;
             if s.len() >= w {
@@ -1219,116 +887,214 @@ impl PrettyPrint for Inst {
         match self {
             Inst::Nop { len } => format!("{} len={}", ljustify("nop".to_string()), len),
 
+            Inst::AluRmiR { size, op, dst, .. } if self.produces_const() => {
+                let dst =
+                    pretty_print_reg(dst.to_reg().to_reg(), size_lqb(*size, op.is_8bit()), allocs);
+                format!(
+                    "{} {}, {}, {}",
+                    ljustify2(op.to_string(), suffix_lqb(*size, op.is_8bit())),
+                    dst,
+                    dst,
+                    dst
+                )
+            }
             Inst::AluRmiR {
                 size,
                 op,
-                src1: _,
+                src1,
                 src2,
                 dst,
-            } => format!(
-                "{} {}, {}",
-                ljustify2(op.to_string(), suffix_lqb(*size, op.is_8bit())),
-                src2.show_rru_sized(mb_rru, size_lqb(*size, op.is_8bit())),
-                show_ireg_sized(dst.to_reg().to_reg(), mb_rru, size_lqb(*size, op.is_8bit())),
-            ),
+            } => {
+                let size_bytes = size_lqb(*size, op.is_8bit());
+                let src1 = pretty_print_reg(src1.to_reg(), size_bytes, allocs);
+                let dst = pretty_print_reg(dst.to_reg().to_reg(), size_bytes, allocs);
+                let src2 = src2.pretty_print(size_bytes, allocs);
+                format!(
+                    "{} {}, {}, {}",
+                    ljustify2(op.to_string(), suffix_lqb(*size, op.is_8bit())),
+                    src1,
+                    src2,
+                    dst
+                )
+            }
+            Inst::UnaryRmR { src, dst, op, size } => {
+                let dst = pretty_print_reg(dst.to_reg().to_reg(), size.to_bytes(), allocs);
+                let src = src.pretty_print(size.to_bytes(), allocs);
+                format!(
+                    "{} {}, {}",
+                    ljustify2(op.to_string(), suffix_bwlq(*size)),
+                    src,
+                    dst,
+                )
+            }
 
-            Inst::UnaryRmR { src, dst, op, size } => format!(
-                "{} {}, {}",
-                ljustify2(op.to_string(), suffix_bwlq(*size)),
-                src.show_rru_sized(mb_rru, size.to_bytes()),
-                show_ireg_sized(dst.to_reg().to_reg(), mb_rru, size.to_bytes()),
-            ),
+            Inst::Not { size, src, dst } => {
+                let src = pretty_print_reg(src.to_reg(), size.to_bytes(), allocs);
+                let dst = pretty_print_reg(dst.to_reg().to_reg(), size.to_bytes(), allocs);
+                format!(
+                    "{} {}, {}",
+                    ljustify2("not".to_string(), suffix_bwlq(*size)),
+                    src,
+                    dst,
+                )
+            }
 
-            Inst::Not { size, src: _, dst } => format!(
-                "{} {}",
-                ljustify2("not".to_string(), suffix_bwlq(*size)),
-                show_ireg_sized(dst.to_reg().to_reg(), mb_rru, size.to_bytes())
-            ),
-
-            Inst::Neg { size, src: _, dst } => format!(
-                "{} {}",
-                ljustify2("neg".to_string(), suffix_bwlq(*size)),
-                show_ireg_sized(dst.to_reg().to_reg(), mb_rru, size.to_bytes())
-            ),
+            Inst::Neg { size, src, dst } => {
+                let src = pretty_print_reg(src.to_reg(), size.to_bytes(), allocs);
+                let dst = pretty_print_reg(dst.to_reg().to_reg(), size.to_bytes(), allocs);
+                format!(
+                    "{} {}, {}",
+                    ljustify2("neg".to_string(), suffix_bwlq(*size)),
+                    src,
+                    dst,
+                )
+            }
 
             Inst::Div {
                 size,
                 signed,
                 divisor,
-                ..
-            } => format!(
-                "{} {}",
-                ljustify(if *signed {
-                    "idiv".to_string()
-                } else {
-                    "div".into()
-                }),
-                divisor.show_rru_sized(mb_rru, size.to_bytes())
-            ),
+                dividend_lo,
+                dividend_hi,
+                dst_quotient,
+                dst_remainder,
+            } => {
+                let dividend_lo = pretty_print_reg(dividend_lo.to_reg(), size.to_bytes(), allocs);
+                let dividend_hi = pretty_print_reg(dividend_hi.to_reg(), size.to_bytes(), allocs);
+                let dst_quotient =
+                    pretty_print_reg(dst_quotient.to_reg().to_reg(), size.to_bytes(), allocs);
+                let dst_remainder =
+                    pretty_print_reg(dst_remainder.to_reg().to_reg(), size.to_bytes(), allocs);
+                let divisor = divisor.pretty_print(size.to_bytes(), allocs);
+                format!(
+                    "{} {}, {}, {}, {}, {}",
+                    ljustify(if *signed {
+                        "idiv".to_string()
+                    } else {
+                        "div".into()
+                    }),
+                    dividend_lo,
+                    dividend_hi,
+                    divisor,
+                    dst_quotient,
+                    dst_remainder,
+                )
+            }
 
             Inst::MulHi {
-                size, signed, src2, ..
-            } => format!(
-                "{} {}",
-                ljustify(if *signed {
-                    "imul".to_string()
-                } else {
-                    "mul".to_string()
-                }),
-                src2.show_rru_sized(mb_rru, size.to_bytes())
-            ),
+                size,
+                signed,
+                src1,
+                src2,
+                dst_lo,
+                dst_hi,
+            } => {
+                let src1 = pretty_print_reg(src1.to_reg(), size.to_bytes(), allocs);
+                let dst_lo = pretty_print_reg(dst_lo.to_reg().to_reg(), size.to_bytes(), allocs);
+                let dst_hi = pretty_print_reg(dst_hi.to_reg().to_reg(), size.to_bytes(), allocs);
+                let src2 = src2.pretty_print(size.to_bytes(), allocs);
+                format!(
+                    "{} {}, {}, {}, {}",
+                    ljustify(if *signed {
+                        "imul".to_string()
+                    } else {
+                        "mul".to_string()
+                    }),
+                    src1,
+                    src2,
+                    dst_lo,
+                    dst_hi,
+                )
+            }
 
             Inst::CheckedDivOrRemSeq {
                 kind,
                 size,
                 divisor,
-                ..
-            } => format!(
-                "{} $rax:$rdx, {}",
-                match kind {
-                    DivOrRemKind::SignedDiv => "sdiv",
-                    DivOrRemKind::UnsignedDiv => "udiv",
-                    DivOrRemKind::SignedRem => "srem",
-                    DivOrRemKind::UnsignedRem => "urem",
-                },
-                show_ireg_sized(divisor.to_reg().to_reg(), mb_rru, size.to_bytes()),
-            ),
-
-            Inst::SignExtendData { size, .. } => match size {
-                OperandSize::Size8 => "cbw",
-                OperandSize::Size16 => "cwd",
-                OperandSize::Size32 => "cdq",
-                OperandSize::Size64 => "cqo",
+                dividend_lo,
+                dividend_hi,
+                dst_quotient,
+                dst_remainder,
+                tmp,
+            } => {
+                let dividend_lo = pretty_print_reg(dividend_lo.to_reg(), size.to_bytes(), allocs);
+                let dividend_hi = pretty_print_reg(dividend_hi.to_reg(), size.to_bytes(), allocs);
+                let divisor = pretty_print_reg(divisor.to_reg().to_reg(), size.to_bytes(), allocs);
+                let dst_quotient =
+                    pretty_print_reg(dst_quotient.to_reg().to_reg(), size.to_bytes(), allocs);
+                let dst_remainder =
+                    pretty_print_reg(dst_remainder.to_reg().to_reg(), size.to_bytes(), allocs);
+                let tmp = tmp
+                    .map(|tmp| pretty_print_reg(tmp.to_reg().to_reg(), size.to_bytes(), allocs))
+                    .unwrap_or("(none)".to_string());
+                format!(
+                    "{} {}, {}, {}, {}, {}, tmp={}",
+                    match kind {
+                        DivOrRemKind::SignedDiv => "sdiv_seq",
+                        DivOrRemKind::UnsignedDiv => "udiv_seq",
+                        DivOrRemKind::SignedRem => "srem_seq",
+                        DivOrRemKind::UnsignedRem => "urem_seq",
+                    },
+                    dividend_lo,
+                    dividend_hi,
+                    divisor,
+                    dst_quotient,
+                    dst_remainder,
+                    tmp,
+                )
             }
-            .into(),
 
-            Inst::XmmUnaryRmR { op, src, dst, .. } => format!(
-                "{} {}, {}",
-                ljustify(op.to_string()),
-                src.show_rru_sized(mb_rru, op.src_size()),
-                show_ireg_sized(dst.to_reg().to_reg(), mb_rru, 8),
-            ),
+            Inst::SignExtendData { size, src, dst } => {
+                let src = pretty_print_reg(src.to_reg(), size.to_bytes(), allocs);
+                let dst = pretty_print_reg(dst.to_reg().to_reg(), size.to_bytes(), allocs);
+                format!(
+                    "{} {}, {}",
+                    match size {
+                        OperandSize::Size8 => "cbw",
+                        OperandSize::Size16 => "cwd",
+                        OperandSize::Size32 => "cdq",
+                        OperandSize::Size64 => "cqo",
+                    },
+                    src,
+                    dst,
+                )
+            }
 
-            Inst::XmmUnaryRmREvex { op, src, dst, .. } => format!(
-                "{} {}, {}",
-                ljustify(op.to_string()),
-                src.show_rru_sized(mb_rru, 8),
-                show_ireg_sized(dst.to_reg().to_reg(), mb_rru, 8),
-            ),
+            Inst::XmmUnaryRmR { op, src, dst, .. } => {
+                let dst = pretty_print_reg(dst.to_reg().to_reg(), op.src_size(), allocs);
+                let src = src.pretty_print(op.src_size(), allocs);
+                format!("{} {}, {}", ljustify(op.to_string()), src, dst)
+            }
 
-            Inst::XmmMovRM { op, src, dst, .. } => format!(
-                "{} {}, {}",
-                ljustify(op.to_string()),
-                show_ireg_sized(*src, mb_rru, 8),
-                dst.show_rru(mb_rru),
-            ),
+            Inst::XmmUnaryRmREvex { op, src, dst, .. } => {
+                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs);
+                let src = src.pretty_print(8, allocs);
+                format!("{} {}, {}", ljustify(op.to_string()), src, dst)
+            }
 
-            Inst::XmmRmR { op, src2, dst, .. } => format!(
-                "{} {}, {}",
-                ljustify(op.to_string()),
-                src2.show_rru_sized(mb_rru, 8),
-                show_ireg_sized(dst.to_reg().to_reg(), mb_rru, 8),
-            ),
+            Inst::XmmMovRM { op, src, dst, .. } => {
+                let src = pretty_print_reg(*src, 8, allocs);
+                let dst = dst.pretty_print(8, allocs);
+                format!("{} {}, {}", ljustify(op.to_string()), src, dst)
+            }
+
+            Inst::XmmRmR { op, dst, .. } if self.produces_const() => {
+                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs);
+                format!("{} {}, {}, {}", ljustify(op.to_string()), dst, dst, dst)
+            }
+
+            Inst::XmmRmR {
+                op,
+                src1,
+                src2,
+                dst,
+                ..
+            } => {
+                let src1 = pretty_print_reg(src1.to_reg(), 8, allocs);
+                let src2 = src2.pretty_print(8, allocs);
+                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs);
+                format!("{} {}, {}, {}", ljustify(op.to_string()), src1, src2, dst)
+            }
 
             Inst::XmmRmREvex {
                 op,
@@ -1336,13 +1102,12 @@ impl PrettyPrint for Inst {
                 src2,
                 dst,
                 ..
-            } => format!(
-                "{} {}, {}, {}",
-                ljustify(op.to_string()),
-                src1.show_rru_sized(mb_rru, 8),
-                show_ireg_sized(src2.to_reg(), mb_rru, 8),
-                show_ireg_sized(dst.to_reg().to_reg(), mb_rru, 8),
-            ),
+            } => {
+                let src2 = pretty_print_reg(src2.to_reg(), 8, allocs);
+                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs);
+                let src1 = src1.pretty_print(8, allocs);
+                format!("{} {}, {}, {}", ljustify(op.to_string()), src1, src2, dst)
+            }
 
             Inst::XmmMinMaxSeq {
                 lhs,
@@ -1350,50 +1115,90 @@ impl PrettyPrint for Inst {
                 dst,
                 is_min,
                 size,
-            } => format!(
-                "{} {}, {}, {}",
-                ljustify2(
-                    if *is_min {
-                        "xmm min seq ".to_string()
-                    } else {
-                        "xmm max seq ".to_string()
-                    },
-                    format!("f{}", size.to_bits())
-                ),
-                show_ireg_sized(lhs.to_reg(), mb_rru, 8),
-                show_ireg_sized(rhs.to_reg(), mb_rru, 8),
-                show_ireg_sized(dst.to_reg().to_reg(), mb_rru, 8),
-            ),
+            } => {
+                let rhs = pretty_print_reg(rhs.to_reg(), 8, allocs);
+                let lhs = pretty_print_reg(lhs.to_reg(), 8, allocs);
+                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs);
+                format!(
+                    "{} {}, {}, {}",
+                    ljustify2(
+                        if *is_min {
+                            "xmm min seq ".to_string()
+                        } else {
+                            "xmm max seq ".to_string()
+                        },
+                        format!("f{}", size.to_bits())
+                    ),
+                    lhs,
+                    rhs,
+                    dst
+                )
+            }
+
+            Inst::XmmRmRImm {
+                op, dst, imm, size, ..
+            } if self.produces_const() => {
+                let dst = pretty_print_reg(dst.to_reg(), 8, allocs);
+                format!(
+                    "{} ${}, {}, {}, {}",
+                    ljustify(format!(
+                        "{}{}",
+                        op.to_string(),
+                        if *size == OperandSize::Size64 {
+                            ".w"
+                        } else {
+                            ""
+                        }
+                    )),
+                    imm,
+                    dst,
+                    dst,
+                    dst,
+                )
+            }
 
             Inst::XmmRmRImm {
                 op,
+                src1,
                 src2,
                 dst,
                 imm,
                 size,
                 ..
-            } => format!(
-                "{} ${}, {}, {}",
-                ljustify(format!(
-                    "{}{}",
-                    op.to_string(),
-                    if *size == OperandSize::Size64 {
-                        ".w"
-                    } else {
-                        ""
-                    }
-                )),
-                imm,
-                src2.show_rru(mb_rru),
-                dst.show_rru(mb_rru),
-            ),
+            } => {
+                let src1 = if op.uses_src1() {
+                    pretty_print_reg(*src1, 8, allocs) + ", "
+                } else {
+                    "".into()
+                };
+                let dst = pretty_print_reg(dst.to_reg(), 8, allocs);
+                let src2 = src2.pretty_print(8, allocs);
+                format!(
+                    "{} ${}, {}{}, {}",
+                    ljustify(format!(
+                        "{}{}",
+                        op.to_string(),
+                        if *size == OperandSize::Size64 {
+                            ".w"
+                        } else {
+                            ""
+                        }
+                    )),
+                    imm,
+                    src1,
+                    src2,
+                    dst,
+                )
+            }
 
             Inst::XmmUninitializedValue { dst } => {
-                format!("{} {}", ljustify("uninit".into()), dst.show_rru(mb_rru),)
+                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs);
+                format!("{} {}", ljustify("uninit".into()), dst)
             }
 
             Inst::XmmLoadConst { src, dst, .. } => {
-                format!("load_const {:?}, {}", src, dst.show_rru(mb_rru),)
+                let dst = pretty_print_reg(dst.to_reg(), 8, allocs);
+                format!("load_const {:?}, {}", src, dst)
             }
 
             Inst::XmmToGpr {
@@ -1403,12 +1208,9 @@ impl PrettyPrint for Inst {
                 dst_size,
             } => {
                 let dst_size = dst_size.to_bytes();
-                format!(
-                    "{} {}, {}",
-                    ljustify(op.to_string()),
-                    src.show_rru(mb_rru),
-                    show_ireg_sized(dst.to_reg().to_reg(), mb_rru, dst_size),
-                )
+                let src = pretty_print_reg(src.to_reg(), 8, allocs);
+                let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size, allocs);
+                format!("{} {}, {}", ljustify(op.to_string()), src, dst)
             }
 
             Inst::GprToXmm {
@@ -1416,179 +1218,243 @@ impl PrettyPrint for Inst {
                 src,
                 src_size,
                 dst,
-            } => format!(
-                "{} {}, {}",
-                ljustify(op.to_string()),
-                src.show_rru_sized(mb_rru, src_size.to_bytes()),
-                dst.show_rru(mb_rru)
-            ),
+            } => {
+                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs);
+                let src = src.pretty_print(src_size.to_bytes(), allocs);
+                format!("{} {}, {}", ljustify(op.to_string()), src, dst)
+            }
 
-            Inst::XmmCmpRmR { op, src, dst } => format!(
-                "{} {}, {}",
-                ljustify(op.to_string()),
-                src.show_rru_sized(mb_rru, 8),
-                show_ireg_sized(dst.to_reg(), mb_rru, 8),
-            ),
+            Inst::XmmCmpRmR { op, src, dst } => {
+                let dst = pretty_print_reg(dst.to_reg(), 8, allocs);
+                let src = src.pretty_print(8, allocs);
+                format!("{} {}, {}", ljustify(op.to_string()), src, dst)
+            }
 
             Inst::CvtUint64ToFloatSeq {
-                src, dst, dst_size, ..
-            } => format!(
-                "{} {}, {}",
-                ljustify(format!(
-                    "u64_to_{}_seq",
-                    if *dst_size == OperandSize::Size64 {
-                        "f64"
-                    } else {
-                        "f32"
-                    }
-                )),
-                show_ireg_sized(src.to_reg().to_reg(), mb_rru, 8),
-                dst.show_rru(mb_rru),
-            ),
+                src,
+                dst,
+                dst_size,
+                tmp_gpr1,
+                tmp_gpr2,
+                ..
+            } => {
+                let src = pretty_print_reg(src.to_reg().to_reg(), 8, allocs);
+                let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes(), allocs);
+                let tmp_gpr1 = pretty_print_reg(tmp_gpr1.to_reg().to_reg(), 8, allocs);
+                let tmp_gpr2 = pretty_print_reg(tmp_gpr2.to_reg().to_reg(), 8, allocs);
+                format!(
+                    "{} {}, {}, {}, {}",
+                    ljustify(format!(
+                        "u64_to_{}_seq",
+                        if *dst_size == OperandSize::Size64 {
+                            "f64"
+                        } else {
+                            "f32"
+                        }
+                    )),
+                    src,
+                    dst,
+                    tmp_gpr1,
+                    tmp_gpr2
+                )
+            }
 
             Inst::CvtFloatToSintSeq {
                 src,
                 dst,
                 src_size,
                 dst_size,
+                tmp_xmm,
+                tmp_gpr,
                 ..
-            } => format!(
-                "{} {}, {}",
-                ljustify(format!(
-                    "cvt_float{}_to_sint{}_seq",
-                    src_size.to_bits(),
-                    dst_size.to_bits()
-                )),
-                show_ireg_sized(src.to_reg().to_reg(), mb_rru, 8),
-                show_ireg_sized(dst.to_reg().to_reg(), mb_rru, dst_size.to_bytes()),
-            ),
+            } => {
+                let src = pretty_print_reg(src.to_reg().to_reg(), src_size.to_bytes(), allocs);
+                let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes(), allocs);
+                let tmp_gpr = pretty_print_reg(tmp_gpr.to_reg().to_reg(), 8, allocs);
+                let tmp_xmm = pretty_print_reg(tmp_xmm.to_reg().to_reg(), 8, allocs);
+                format!(
+                    "{} {}, {}, {}, {}",
+                    ljustify(format!(
+                        "cvt_float{}_to_sint{}_seq",
+                        src_size.to_bits(),
+                        dst_size.to_bits()
+                    )),
+                    src,
+                    dst,
+                    tmp_gpr,
+                    tmp_xmm,
+                )
+            }
 
             Inst::CvtFloatToUintSeq {
                 src,
                 dst,
                 src_size,
                 dst_size,
+                tmp_gpr,
+                tmp_xmm,
                 ..
-            } => format!(
-                "{} {}, {}",
-                ljustify(format!(
-                    "cvt_float{}_to_uint{}_seq",
-                    src_size.to_bits(),
-                    dst_size.to_bits()
-                )),
-                show_ireg_sized(src.to_reg().to_reg(), mb_rru, 8),
-                show_ireg_sized(dst.to_reg().to_reg(), mb_rru, dst_size.to_bytes()),
-            ),
+            } => {
+                let src = pretty_print_reg(src.to_reg().to_reg(), src_size.to_bytes(), allocs);
+                let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes(), allocs);
+                let tmp_gpr = pretty_print_reg(tmp_gpr.to_reg().to_reg(), 8, allocs);
+                let tmp_xmm = pretty_print_reg(tmp_xmm.to_reg().to_reg(), 8, allocs);
+                format!(
+                    "{} {}, {}, {}, {}",
+                    ljustify(format!(
+                        "cvt_float{}_to_uint{}_seq",
+                        src_size.to_bits(),
+                        dst_size.to_bits()
+                    )),
+                    src,
+                    dst,
+                    tmp_gpr,
+                    tmp_xmm,
+                )
+            }
 
             Inst::Imm {
                 dst_size,
                 simm64,
                 dst,
             } => {
+                let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes(), allocs);
                 if *dst_size == OperandSize::Size64 {
                     format!(
                         "{} ${}, {}",
                         ljustify("movabsq".to_string()),
                         *simm64 as i64,
-                        show_ireg_sized(dst.to_reg().to_reg(), mb_rru, 8)
+                        dst,
                     )
                 } else {
                     format!(
                         "{} ${}, {}",
                         ljustify("movl".to_string()),
                         (*simm64 as u32) as i32,
-                        show_ireg_sized(dst.to_reg().to_reg(), mb_rru, 4)
+                        dst,
                     )
                 }
             }
 
-            Inst::MovRR { size, src, dst } => format!(
-                "{} {}, {}",
-                ljustify2("mov".to_string(), suffix_lq(*size)),
-                show_ireg_sized(src.to_reg(), mb_rru, size.to_bytes()),
-                show_ireg_sized(dst.to_reg().to_reg(), mb_rru, size.to_bytes())
-            ),
+            Inst::MovRR { size, src, dst } => {
+                let src = pretty_print_reg(src.to_reg(), size.to_bytes(), allocs);
+                let dst = pretty_print_reg(dst.to_reg().to_reg(), size.to_bytes(), allocs);
+                format!(
+                    "{} {}, {}",
+                    ljustify2("mov".to_string(), suffix_lq(*size)),
+                    src,
+                    dst
+                )
+            }
 
             Inst::MovzxRmR {
                 ext_mode, src, dst, ..
             } => {
+                let dst_size = if *ext_mode == ExtMode::LQ {
+                    4
+                } else {
+                    ext_mode.dst_size()
+                };
+                let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size, allocs);
+                let src = src.pretty_print(ext_mode.src_size(), allocs);
                 if *ext_mode == ExtMode::LQ {
-                    format!(
-                        "{} {}, {}",
-                        ljustify("movl".to_string()),
-                        src.show_rru_sized(mb_rru, ext_mode.src_size()),
-                        show_ireg_sized(dst.to_reg().to_reg(), mb_rru, 4)
-                    )
+                    format!("{} {}, {}", ljustify("movl".to_string()), src, dst)
                 } else {
                     format!(
                         "{} {}, {}",
                         ljustify2("movz".to_string(), ext_mode.to_string()),
-                        src.show_rru_sized(mb_rru, ext_mode.src_size()),
-                        show_ireg_sized(dst.to_reg().to_reg(), mb_rru, ext_mode.dst_size())
+                        src,
+                        dst,
                     )
                 }
             }
 
-            Inst::Mov64MR { src, dst, .. } => format!(
-                "{} {}, {}",
-                ljustify("movq".to_string()),
-                src.show_rru(mb_rru),
-                dst.show_rru(mb_rru)
-            ),
+            Inst::Mov64MR { src, dst, .. } => {
+                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs);
+                let src = src.pretty_print(8, allocs);
+                format!("{} {}, {}", ljustify("movq".to_string()), src, dst)
+            }
 
-            Inst::LoadEffectiveAddress { addr, dst } => format!(
-                "{} {}, {}",
-                ljustify("lea".to_string()),
-                addr.show_rru(mb_rru),
-                dst.show_rru(mb_rru)
-            ),
+            Inst::LoadEffectiveAddress { addr, dst } => {
+                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs);
+                let addr = addr.pretty_print(8, allocs);
+                format!("{} {}, {}", ljustify("lea".to_string()), addr, dst)
+            }
 
             Inst::MovsxRmR {
                 ext_mode, src, dst, ..
-            } => format!(
-                "{} {}, {}",
-                ljustify2("movs".to_string(), ext_mode.to_string()),
-                src.show_rru_sized(mb_rru, ext_mode.src_size()),
-                show_ireg_sized(dst.to_reg().to_reg(), mb_rru, ext_mode.dst_size())
-            ),
+            } => {
+                let dst = pretty_print_reg(dst.to_reg().to_reg(), ext_mode.dst_size(), allocs);
+                let src = src.pretty_print(ext_mode.src_size(), allocs);
+                format!(
+                    "{} {}, {}",
+                    ljustify2("movs".to_string(), ext_mode.to_string()),
+                    src,
+                    dst
+                )
+            }
 
-            Inst::MovRM { size, src, dst, .. } => format!(
-                "{} {}, {}",
-                ljustify2("mov".to_string(), suffix_bwlq(*size)),
-                show_ireg_sized(src.to_reg(), mb_rru, size.to_bytes()),
-                dst.show_rru(mb_rru)
-            ),
+            Inst::MovRM { size, src, dst, .. } => {
+                let src = pretty_print_reg(src.to_reg(), size.to_bytes(), allocs);
+                let dst = dst.pretty_print(size.to_bytes(), allocs);
+                format!(
+                    "{} {}, {}",
+                    ljustify2("mov".to_string(), suffix_bwlq(*size)),
+                    src,
+                    dst
+                )
+            }
 
             Inst::ShiftR {
                 size,
                 kind,
                 num_bits,
+                src,
                 dst,
                 ..
-            } => match num_bits.clone().to_imm8_reg() {
-                Imm8Reg::Reg { reg } => format!(
-                    "{} {}, {}",
-                    ljustify2(kind.to_string(), suffix_bwlq(*size)),
-                    show_ireg_sized(reg, mb_rru, 1),
-                    show_ireg_sized(dst.to_reg().to_reg(), mb_rru, size.to_bytes())
-                ),
+            } => {
+                let src = pretty_print_reg(src.to_reg(), size.to_bytes(), allocs);
+                let dst = pretty_print_reg(dst.to_reg().to_reg(), size.to_bytes(), allocs);
+                match num_bits.clone().to_imm8_reg() {
+                    Imm8Reg::Reg { reg } => {
+                        let reg = pretty_print_reg(reg, 1, allocs);
+                        format!(
+                            "{} {}, {}, {}",
+                            ljustify2(kind.to_string(), suffix_bwlq(*size)),
+                            reg,
+                            src,
+                            dst,
+                        )
+                    }
 
-                Imm8Reg::Imm8 { imm: num_bits } => format!(
-                    "{} ${}, {}",
-                    ljustify2(kind.to_string(), suffix_bwlq(*size)),
-                    num_bits,
-                    show_ireg_sized(dst.to_reg().to_reg(), mb_rru, size.to_bytes())
-                ),
-            },
+                    Imm8Reg::Imm8 { imm: num_bits } => format!(
+                        "{} ${}, {}, {}",
+                        ljustify2(kind.to_string(), suffix_bwlq(*size)),
+                        num_bits,
+                        src,
+                        dst,
+                    ),
+                }
+            }
 
             Inst::XmmRmiReg {
-                opcode, src2, dst, ..
-            } => format!(
-                "{} {}, {}",
-                ljustify(opcode.to_string()),
-                src2.show_rru(mb_rru),
-                dst.to_reg().show_rru(mb_rru)
-            ),
+                opcode,
+                src1,
+                src2,
+                dst,
+                ..
+            } => {
+                let src1 = pretty_print_reg(src1.to_reg(), 8, allocs);
+                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs);
+                let src2 = src2.pretty_print(8, allocs);
+                format!(
+                    "{} {}, {}, {}",
+                    ljustify(opcode.to_string()),
+                    src1,
+                    src2,
+                    dst,
+                )
+            }
 
             Inst::CmpRmiR {
                 size,
@@ -1596,6 +1462,8 @@ impl PrettyPrint for Inst {
                 dst,
                 opcode,
             } => {
+                let dst = pretty_print_reg(dst.to_reg(), size.to_bytes(), allocs);
+                let src = src.pretty_print(size.to_bytes(), allocs);
                 let op = match opcode {
                     CmpOpcode::Cmp => "cmp",
                     CmpOpcode::Test => "test",
@@ -1603,67 +1471,79 @@ impl PrettyPrint for Inst {
                 format!(
                     "{} {}, {}",
                     ljustify2(op.to_string(), suffix_bwlq(*size)),
-                    src.show_rru_sized(mb_rru, size.to_bytes()),
-                    show_ireg_sized(dst.to_reg(), mb_rru, size.to_bytes())
+                    src,
+                    dst,
                 )
             }
 
-            Inst::Setcc { cc, dst } => format!(
-                "{} {}",
-                ljustify2("set".to_string(), cc.to_string()),
-                show_ireg_sized(dst.to_reg().to_reg(), mb_rru, 1)
-            ),
+            Inst::Setcc { cc, dst } => {
+                let dst = pretty_print_reg(dst.to_reg().to_reg(), 1, allocs);
+                format!("{} {}", ljustify2("set".to_string(), cc.to_string()), dst)
+            }
 
             Inst::Cmove {
                 size,
                 cc,
-                consequent: src,
-                alternative: _,
+                consequent,
+                alternative,
                 dst,
-            } => format!(
-                "{} {}, {}",
-                ljustify(format!("cmov{}{}", cc.to_string(), suffix_bwlq(*size))),
-                src.show_rru_sized(mb_rru, size.to_bytes()),
-                show_ireg_sized(dst.to_reg().to_reg(), mb_rru, size.to_bytes())
-            ),
+            } => {
+                let alternative = pretty_print_reg(alternative.to_reg(), size.to_bytes(), allocs);
+                let dst = pretty_print_reg(dst.to_reg().to_reg(), size.to_bytes(), allocs);
+                let consequent = consequent.pretty_print(size.to_bytes(), allocs);
+                format!(
+                    "{} {}, {}, {}",
+                    ljustify(format!("cmov{}{}", cc.to_string(), suffix_bwlq(*size))),
+                    consequent,
+                    alternative,
+                    dst,
+                )
+            }
 
             Inst::XmmCmove {
                 size,
                 cc,
-                consequent: src,
+                consequent,
+                alternative,
                 dst,
                 ..
             } => {
+                let alternative = pretty_print_reg(alternative.to_reg(), size.to_bytes(), allocs);
+                let dst = pretty_print_reg(dst.to_reg().to_reg(), size.to_bytes(), allocs);
+                let consequent = consequent.pretty_print(size.to_bytes(), allocs);
                 format!(
-                    "j{} $next; mov{} {}, {}; $next: ",
+                    "mov {}, {}; j{} $next; mov{} {}, {}; $next: ",
                     cc.invert().to_string(),
                     if *size == OperandSize::Size64 {
                         "sd"
                     } else {
                         "ss"
                     },
-                    src.show_rru_sized(mb_rru, size.to_bytes()),
-                    show_ireg_sized(dst.to_reg().to_reg(), mb_rru, size.to_bytes())
+                    consequent,
+                    dst,
+                    alternative,
+                    dst,
                 )
             }
 
             Inst::Push64 { src } => {
-                format!("{} {}", ljustify("pushq".to_string()), src.show_rru(mb_rru))
+                let src = src.pretty_print(8, allocs);
+                format!("{} {}", ljustify("pushq".to_string()), src)
             }
 
             Inst::Pop64 { dst } => {
-                format!("{} {}", ljustify("popq".to_string()), dst.show_rru(mb_rru))
+                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs);
+                format!("{} {}", ljustify("popq".to_string()), dst)
             }
 
             Inst::CallKnown { dest, .. } => format!("{} {:?}", ljustify("call".to_string()), dest),
 
-            Inst::CallUnknown { dest, .. } => format!(
-                "{} *{}",
-                ljustify("call".to_string()),
-                dest.show_rru(mb_rru)
-            ),
+            Inst::CallUnknown { dest, .. } => {
+                let dest = dest.pretty_print(8, allocs);
+                format!("{} *{}", ljustify("call".to_string()), dest)
+            }
 
-            Inst::Ret => "ret".to_string(),
+            Inst::Ret { .. } => "ret".to_string(),
 
             Inst::EpiloguePlaceholder => "epilogue placeholder".to_string(),
 
@@ -1689,14 +1569,14 @@ impl PrettyPrint for Inst {
             ),
 
             Inst::JmpTableSeq { idx, .. } => {
-                format!("{} {}", ljustify("br_table".into()), idx.show_rru(mb_rru))
+                let idx = pretty_print_reg(*idx, 8, allocs);
+                format!("{} {}", ljustify("br_table".into()), idx)
             }
 
-            Inst::JmpUnknown { target } => format!(
-                "{} *{}",
-                ljustify("jmp".to_string()),
-                target.show_rru(mb_rru)
-            ),
+            Inst::JmpUnknown { target } => {
+                let target = target.pretty_print(8, allocs);
+                format!("{} *{}", ljustify("jmp".to_string()), target)
+            }
 
             Inst::TrapIf { cc, trap_code, .. } => {
                 format!("j{} ; ud2 {} ;", cc.invert().to_string(), trap_code)
@@ -1704,26 +1584,37 @@ impl PrettyPrint for Inst {
 
             Inst::LoadExtName {
                 dst, name, offset, ..
-            } => format!(
-                "{} {}+{}, {}",
-                ljustify("load_ext_name".into()),
-                name,
-                offset,
-                show_ireg_sized(dst.to_reg(), mb_rru, 8),
-            ),
+            } => {
+                let dst = pretty_print_reg(dst.to_reg(), 8, allocs);
+                format!(
+                    "{} {}+{}, {}",
+                    ljustify("load_ext_name".into()),
+                    name,
+                    offset,
+                    dst,
+                )
+            }
 
             Inst::LockCmpxchg {
                 ty,
                 replacement,
+                expected,
                 mem,
+                dst_old,
                 ..
             } => {
                 let size = ty.bytes() as u8;
+                let replacement = pretty_print_reg(*replacement, size, allocs);
+                let expected = pretty_print_reg(*expected, size, allocs);
+                let dst_old = pretty_print_reg(dst_old.to_reg(), size, allocs);
+                let mem = mem.pretty_print(size, allocs);
                 format!(
-                    "lock cmpxchg{} {}, {}",
+                    "lock cmpxchg{} {}, {}, expected={}, dst_old={}",
                     suffix_bwlq(OperandSize::from_bytes(size as u32)),
-                    show_ireg_sized(*replacement, mb_rru, size),
-                    mem.show_rru(mb_rru)
+                    replacement,
+                    mem,
+                    expected,
+                    dst_old,
                 )
             }
 
@@ -1753,71 +1644,74 @@ impl PrettyPrint for Inst {
                 format!("macho_tls_get_addr {:?}", symbol)
             }
 
-            Inst::ValueLabelMarker { label, reg } => {
-                format!("value_label {:?}, {}", label, reg.show_rru(mb_rru))
-            }
-
             Inst::Unwind { inst } => {
                 format!("unwind {:?}", inst)
             }
+
+            Inst::DummyUse { reg } => {
+                let reg = pretty_print_reg(*reg, 8, allocs);
+                format!("dummy_use {}", reg)
+            }
         }
     }
 }
 
-// Temp hook for legacy printing machinery
 impl fmt::Debug for Inst {
     fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
-        // Print the insn without a Universe :-(
-        write!(fmt, "{}", self.show_rru(None))
+        write!(
+            fmt,
+            "{}",
+            self.pretty_print_inst(&[], &mut Default::default())
+        )
     }
 }
 
-fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
-    // This is a bit subtle. If some register is in the modified set, then it may not be in either
-    // the use or def sets. However, enforcing that directly is somewhat difficult. Instead,
-    // regalloc.rs will "fix" this for us by removing the modified set from the use and def
-    // sets.
+fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCollector<'_, F>) {
+    // FIXME: remove all remaining `mod` operands here to get to pure
+    // SSA.
+
+    // Note: because we need to statically know the indices of each
+    // reg in the operands list in order to fetch its allocation
+    // later, we put the variable-operand-count bits (the RegMem,
+    // RegMemImm, etc args) last. regalloc2 doesn't care what order
+    // the operands come in; they can be freely reordered.
+
+    // N.B.: we MUST keep the below in careful sync with (i) emission,
+    // in `emit.rs`, and (ii) pretty-printing, in the `pretty_print`
+    // method above.
     match inst {
         Inst::AluRmiR {
             src1, src2, dst, ..
         } => {
-            debug_assert_eq!(*src1, dst.to_reg());
             if inst.produces_const() {
-                // No need to account for src2, since src2 == dst.
-                collector.add_def(dst.to_writable_reg());
+                collector.reg_def(dst.to_writable_reg());
             } else {
-                src2.get_regs_as_uses(collector);
-                collector.add_mod(dst.to_writable_reg());
+                collector.reg_use(src1.to_reg());
+                collector.reg_reuse_def(dst.to_writable_reg(), 0);
+                src2.get_operands(collector);
             }
         }
         Inst::Not { src, dst, .. } => {
-            debug_assert_eq!(*src, dst.to_reg());
-            collector.add_mod(dst.to_writable_reg());
+            collector.reg_use(src.to_reg());
+            collector.reg_reuse_def(dst.to_writable_reg(), 0);
         }
         Inst::Neg { src, dst, .. } => {
-            debug_assert_eq!(*src, dst.to_reg());
-            collector.add_mod(dst.to_writable_reg());
+            collector.reg_use(src.to_reg());
+            collector.reg_reuse_def(dst.to_writable_reg(), 0);
         }
         Inst::Div {
-            size,
             divisor,
-            dividend,
+            dividend_lo,
+            dividend_hi,
             dst_quotient,
             dst_remainder,
             ..
         } => {
-            debug_assert_eq!(*dividend, regs::rax());
-            debug_assert_eq!(dst_quotient.to_reg(), regs::rax());
-            collector.add_mod(Writable::from_reg(regs::rax()));
-
-            debug_assert_eq!(dst_remainder.to_reg(), regs::rdx());
-            if *size == OperandSize::Size8 {
-                collector.add_def(Writable::from_reg(regs::rdx()));
-            } else {
-                collector.add_mod(Writable::from_reg(regs::rdx()));
-            }
-
-            divisor.get_regs_as_uses(collector);
+            collector.reg_fixed_use(dividend_lo.to_reg(), regs::rax());
+            collector.reg_fixed_use(dividend_hi.to_reg(), regs::rdx());
+            collector.reg_fixed_def(dst_quotient.to_writable_reg(), regs::rax());
+            collector.reg_fixed_def(dst_remainder.to_writable_reg(), regs::rdx());
+            divisor.get_operands(collector);
         }
         Inst::MulHi {
             src1,
@@ -1826,54 +1720,52 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
             dst_hi,
             ..
         } => {
-            debug_assert_eq!(*src1, regs::rax());
-            debug_assert_eq!(dst_lo.to_reg(), regs::rax());
-            collector.add_mod(Writable::from_reg(regs::rax()));
-
-            debug_assert_eq!(dst_hi.to_reg(), regs::rdx());
-            collector.add_def(Writable::from_reg(regs::rdx()));
-
-            src2.get_regs_as_uses(collector);
+            collector.reg_fixed_use(src1.to_reg(), regs::rax());
+            collector.reg_fixed_def(dst_lo.to_writable_reg(), regs::rax());
+            collector.reg_fixed_def(dst_hi.to_writable_reg(), regs::rdx());
+            src2.get_operands(collector);
         }
         Inst::CheckedDivOrRemSeq {
             divisor,
-            dividend,
+            dividend_lo,
+            dividend_hi,
             dst_quotient,
             dst_remainder,
             tmp,
             ..
         } => {
-            debug_assert_eq!(*dividend, regs::rax());
-            debug_assert_eq!(dst_quotient.to_reg(), regs::rax());
-            debug_assert_eq!(dst_remainder.to_reg(), regs::rdx());
-            // Mark both fixed registers as mods, to avoid an early clobber problem in codegen
-            // (i.e. the temporary is allocated one of the fixed registers). This requires writing
-            // the rdx register *before* the instruction, which is not too bad.
-            collector.add_mod(Writable::from_reg(regs::rax()));
-            collector.add_mod(Writable::from_reg(regs::rdx()));
-            collector.add_mod(divisor.to_writable_reg());
+            collector.reg_fixed_use(dividend_lo.to_reg(), regs::rax());
+            collector.reg_fixed_use(dividend_hi.to_reg(), regs::rdx());
+            collector.reg_mod(divisor.to_writable_reg());
+            collector.reg_fixed_def(dst_quotient.to_writable_reg(), regs::rax());
+            collector.reg_fixed_def(dst_remainder.to_writable_reg(), regs::rdx());
             if let Some(tmp) = tmp {
-                collector.add_def(tmp.to_writable_reg());
+                collector.reg_early_def(tmp.to_writable_reg());
             }
         }
         Inst::SignExtendData { size, src, dst } => {
-            debug_assert_eq!(*src, regs::rax());
-            debug_assert_eq!(dst.to_reg(), regs::rdx());
             match size {
-                OperandSize::Size8 => collector.add_mod(Writable::from_reg(regs::rax())),
+                OperandSize::Size8 => {
+                    // Note `rax` on both src and dest: 8->16 extend
+                    // does AL -> AX.
+                    collector.reg_fixed_use(src.to_reg(), regs::rax());
+                    collector.reg_fixed_def(dst.to_writable_reg(), regs::rax());
+                }
                 _ => {
-                    collector.add_use(regs::rax());
-                    collector.add_def(Writable::from_reg(regs::rdx()));
+                    // All other widths do RAX -> RDX (AX -> DX:AX,
+                    // EAX -> EDX:EAX).
+                    collector.reg_fixed_use(src.to_reg(), regs::rax());
+                    collector.reg_fixed_def(dst.to_writable_reg(), regs::rdx());
                 }
             }
         }
         Inst::UnaryRmR { src, dst, .. } => {
-            src.clone().to_reg_mem().get_regs_as_uses(collector);
-            collector.add_def(dst.to_writable_reg());
+            collector.reg_def(dst.to_writable_reg());
+            src.get_operands(collector);
         }
         Inst::XmmUnaryRmR { src, dst, .. } | Inst::XmmUnaryRmREvex { src, dst, .. } => {
-            src.clone().to_reg_mem().get_regs_as_uses(collector);
-            collector.add_def(dst.to_writable_reg());
+            collector.reg_def(dst.to_writable_reg());
+            src.get_operands(collector);
         }
         Inst::XmmRmR {
             src1,
@@ -1882,19 +1774,19 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
             op,
             ..
         } => {
-            debug_assert_eq!(*src1, dst.to_reg());
             if inst.produces_const() {
-                // No need to account for src, since src == dst.
-                collector.add_def(dst.to_writable_reg());
+                collector.reg_def(dst.to_writable_reg());
             } else {
-                src2.clone().to_reg_mem().get_regs_as_uses(collector);
-                collector.add_mod(dst.to_writable_reg());
+                collector.reg_use(src1.to_reg());
+                collector.reg_reuse_def(dst.to_writable_reg(), 0);
+                src2.get_operands(collector);
+
                 // Some instructions have an implicit use of XMM0.
                 if *op == SseOpcode::Blendvpd
                     || *op == SseOpcode::Blendvps
                     || *op == SseOpcode::Pblendvb
                 {
-                    collector.add_use(regs::xmm0());
+                    collector.reg_use(regs::xmm0());
                 }
             }
         }
@@ -1905,12 +1797,12 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
             dst,
             ..
         } => {
-            src1.clone().to_reg_mem().get_regs_as_uses(collector);
-            collector.add_use(src2.to_reg());
             match *op {
-                Avx512Opcode::Vpermi2b => collector.add_mod(dst.to_writable_reg()),
-                _ => collector.add_def(dst.to_writable_reg()),
+                Avx512Opcode::Vpermi2b => collector.reg_mod(dst.to_writable_reg()),
+                _ => collector.reg_def(dst.to_writable_reg()),
             }
+            collector.reg_use(src2.to_reg());
+            src1.get_operands(collector);
         }
         Inst::XmmRmRImm {
             op,
@@ -1919,63 +1811,56 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
             dst,
             ..
         } => {
-            debug_assert_eq!(*src1, dst.to_reg());
             if inst.produces_const() {
-                // No need to account for src2, since src2 == dst.
-                debug_assert_eq!(src2.to_reg(), Some(dst.to_reg()));
-                collector.add_def(*dst);
-            } else if *op == SseOpcode::Pextrb
-                || *op == SseOpcode::Pextrw
-                || *op == SseOpcode::Pextrd
-                || *op == SseOpcode::Pshufd
-                || *op == SseOpcode::Roundss
-                || *op == SseOpcode::Roundsd
-                || *op == SseOpcode::Roundps
-                || *op == SseOpcode::Roundpd
-            {
-                src2.get_regs_as_uses(collector);
-                collector.add_def(*dst);
+                collector.reg_def(*dst);
+            } else if !op.uses_src1() {
+                // FIXME: split this instruction into two, so we don't
+                // need this awkward src1-is-only-sometimes-an-arg
+                // behavior.
+                collector.reg_def(*dst);
+                src2.get_operands(collector);
             } else {
-                src2.get_regs_as_uses(collector);
-                collector.add_mod(*dst);
+                collector.reg_use(*src1);
+                collector.reg_reuse_def(*dst, 0);
+                src2.get_operands(collector);
             }
         }
-        Inst::XmmUninitializedValue { dst } => collector.add_def(dst.to_writable_reg()),
-        Inst::XmmLoadConst { dst, .. } => collector.add_def(*dst),
+        Inst::XmmUninitializedValue { dst } => collector.reg_def(dst.to_writable_reg()),
+        Inst::XmmLoadConst { dst, .. } => collector.reg_def(*dst),
         Inst::XmmMinMaxSeq { lhs, rhs, dst, .. } => {
-            debug_assert_eq!(*rhs, dst.to_reg());
-            collector.add_use(lhs.to_reg());
-            collector.add_mod(dst.to_writable_reg());
+            collector.reg_use(rhs.to_reg());
+            collector.reg_use(lhs.to_reg());
+            collector.reg_reuse_def(dst.to_writable_reg(), 0); // Reuse RHS.
         }
         Inst::XmmRmiReg {
             src1, src2, dst, ..
         } => {
-            debug_assert_eq!(*src1, dst.to_reg());
-            src2.clone().to_reg_mem_imm().get_regs_as_uses(collector);
-            collector.add_mod(dst.to_writable_reg());
+            collector.reg_use(src1.to_reg());
+            collector.reg_reuse_def(dst.to_writable_reg(), 0); // Reuse RHS.
+            src2.get_operands(collector);
         }
         Inst::XmmMovRM { src, dst, .. } => {
-            collector.add_use(*src);
-            dst.get_regs_as_uses(collector);
+            collector.reg_use(*src);
+            dst.get_operands(collector);
         }
         Inst::XmmCmpRmR { src, dst, .. } => {
-            src.get_regs_as_uses(collector);
-            collector.add_use(dst.to_reg());
+            collector.reg_use(dst.to_reg());
+            src.get_operands(collector);
         }
         Inst::Imm { dst, .. } => {
-            collector.add_def(dst.to_writable_reg());
+            collector.reg_def(dst.to_writable_reg());
         }
         Inst::MovRR { src, dst, .. } => {
-            collector.add_use(src.to_reg());
-            collector.add_def(dst.to_writable_reg());
+            collector.reg_use(src.to_reg());
+            collector.reg_def(dst.to_writable_reg());
         }
         Inst::XmmToGpr { src, dst, .. } => {
-            collector.add_use(src.to_reg());
-            collector.add_def(dst.to_writable_reg());
+            collector.reg_use(src.to_reg());
+            collector.reg_def(dst.to_writable_reg());
         }
         Inst::GprToXmm { src, dst, .. } => {
-            src.get_regs_as_uses(collector);
-            collector.add_def(dst.to_writable_reg());
+            collector.reg_def(dst.to_writable_reg());
+            src.get_operands(collector);
         }
         Inst::CvtUint64ToFloatSeq {
             src,
@@ -1984,10 +1869,10 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
             tmp_gpr2,
             ..
         } => {
-            collector.add_mod(src.to_writable_reg());
-            collector.add_def(dst.to_writable_reg());
-            collector.add_def(tmp_gpr1.to_writable_reg());
-            collector.add_def(tmp_gpr2.to_writable_reg());
+            collector.reg_mod(src.to_writable_reg());
+            collector.reg_def(dst.to_writable_reg());
+            collector.reg_early_def(tmp_gpr1.to_writable_reg());
+            collector.reg_early_def(tmp_gpr2.to_writable_reg());
         }
         Inst::CvtFloatToSintSeq {
             src,
@@ -2003,74 +1888,86 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
             tmp_xmm,
             ..
         } => {
-            collector.add_mod(src.to_writable_reg());
-            collector.add_def(dst.to_writable_reg());
-            collector.add_def(tmp_gpr.to_writable_reg());
-            collector.add_def(tmp_xmm.to_writable_reg());
+            collector.reg_mod(src.to_writable_reg());
+            collector.reg_def(dst.to_writable_reg());
+            collector.reg_early_def(tmp_gpr.to_writable_reg());
+            collector.reg_early_def(tmp_xmm.to_writable_reg());
         }
         Inst::MovzxRmR { src, dst, .. } => {
-            src.get_regs_as_uses(collector);
-            collector.add_def(dst.to_writable_reg());
+            collector.reg_def(dst.to_writable_reg());
+            src.get_operands(collector);
         }
         Inst::Mov64MR { src, dst, .. } => {
-            src.get_regs_as_uses(collector);
-            collector.add_def(dst.to_writable_reg())
+            collector.reg_def(dst.to_writable_reg());
+            src.get_operands(collector);
         }
         Inst::LoadEffectiveAddress { addr: src, dst } => {
-            src.get_regs_as_uses(collector);
-            collector.add_def(dst.to_writable_reg())
+            collector.reg_def(dst.to_writable_reg());
+            src.get_operands(collector);
         }
         Inst::MovsxRmR { src, dst, .. } => {
-            src.get_regs_as_uses(collector);
-            collector.add_def(dst.to_writable_reg());
+            collector.reg_def(dst.to_writable_reg());
+            src.get_operands(collector);
         }
         Inst::MovRM { src, dst, .. } => {
-            collector.add_use(src.to_reg());
-            dst.get_regs_as_uses(collector);
+            collector.reg_use(src.to_reg());
+            dst.get_operands(collector);
         }
-        Inst::ShiftR { num_bits, dst, .. } => {
+        Inst::ShiftR {
+            num_bits, src, dst, ..
+        } => {
+            collector.reg_use(src.to_reg());
+            collector.reg_reuse_def(dst.to_writable_reg(), 0);
             if let Imm8Reg::Reg { reg } = num_bits.clone().to_imm8_reg() {
-                debug_assert_eq!(reg, regs::rcx());
-                collector.add_use(regs::rcx());
+                collector.reg_fixed_use(reg, regs::rcx());
             }
-            collector.add_mod(dst.to_writable_reg());
         }
         Inst::CmpRmiR { src, dst, .. } => {
-            src.get_regs_as_uses(collector);
-            collector.add_use(dst.to_reg()); // yes, really `add_use`
+            // N.B.: use, not def (cmp doesn't write its result).
+            collector.reg_use(dst.to_reg());
+            src.get_operands(collector);
         }
         Inst::Setcc { dst, .. } => {
-            collector.add_def(dst.to_writable_reg());
+            collector.reg_def(dst.to_writable_reg());
         }
         Inst::Cmove {
-            consequent: src,
+            consequent,
+            alternative,
             dst,
             ..
         } => {
-            src.get_regs_as_uses(collector);
-            collector.add_mod(dst.to_writable_reg());
+            collector.reg_use(alternative.to_reg());
+            collector.reg_reuse_def(dst.to_writable_reg(), 0);
+            consequent.get_operands(collector);
         }
         Inst::XmmCmove {
-            consequent: src,
+            consequent,
+            alternative,
             dst,
             ..
         } => {
-            src.get_regs_as_uses(collector);
-            collector.add_mod(dst.to_writable_reg());
+            collector.reg_use(alternative.to_reg());
+            collector.reg_reuse_def(dst.to_writable_reg(), 0);
+            consequent.get_operands(collector);
         }
         Inst::Push64 { src } => {
-            src.get_regs_as_uses(collector);
-            collector.add_mod(Writable::from_reg(regs::rsp()));
+            src.get_operands(collector);
         }
         Inst::Pop64 { dst } => {
-            collector.add_def(dst.to_writable_reg());
+            collector.reg_def(dst.to_writable_reg());
         }
 
         Inst::CallKnown {
             ref uses, ref defs, ..
         } => {
-            collector.add_uses(uses);
-            collector.add_defs(defs);
+            for &u in uses {
+                collector.reg_use(u);
+            }
+            for &d in defs {
+                collector.reg_def(d);
+            }
+            // FIXME: keep clobbers separate in the Inst and use
+            // `reg_clobber()`.
         }
 
         Inst::CallUnknown {
@@ -2079,9 +1976,15 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
             dest,
             ..
         } => {
-            collector.add_uses(uses);
-            collector.add_defs(defs);
-            dest.get_regs_as_uses(collector);
+            dest.get_operands(collector);
+            for &u in uses {
+                collector.reg_use(u);
+            }
+            for &d in defs {
+                collector.reg_def(d);
+            }
+            // FIXME: keep clobbers separate in the Inst and use
+            // `reg_clobber()`.
         }
 
         Inst::JmpTableSeq {
@@ -2090,17 +1993,17 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
             ref tmp2,
             ..
         } => {
-            collector.add_use(*idx);
-            collector.add_def(*tmp1);
-            collector.add_def(*tmp2);
+            collector.reg_use(*idx);
+            collector.reg_early_def(*tmp1);
+            collector.reg_early_def(*tmp2);
         }
 
         Inst::JmpUnknown { target } => {
-            target.get_regs_as_uses(collector);
+            target.get_operands(collector);
         }
 
         Inst::LoadExtName { dst, .. } => {
-            collector.add_def(*dst);
+            collector.reg_def(*dst);
         }
 
         Inst::LockCmpxchg {
@@ -2110,23 +2013,30 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
             dst_old,
             ..
         } => {
-            mem.get_regs_as_uses(collector);
-            collector.add_use(*replacement);
-
-            debug_assert_eq!(*expected, regs::rax());
-            debug_assert_eq!(dst_old.to_reg(), regs::rax());
-            collector.add_mod(Writable::from_reg(regs::rax()));
+            collector.reg_use(*replacement);
+            collector.reg_fixed_use(*expected, regs::rax());
+            collector.reg_fixed_def(*dst_old, regs::rax());
+            mem.get_operands(collector);
         }
 
         Inst::AtomicRmwSeq { .. } => {
-            collector.add_use(regs::r9());
-            collector.add_use(regs::r10());
-            collector.add_def(Writable::from_reg(regs::r11()));
-            collector.add_def(Writable::from_reg(regs::rax()));
+            // FIXME: take vreg args, not fixed regs, and just use
+            // reg_fixed_use here.
+            collector.reg_use(regs::r9());
+            collector.reg_use(regs::r10());
+            collector.reg_def(Writable::from_reg(regs::r11()));
+            collector.reg_def(Writable::from_reg(regs::rax()));
         }
 
-        Inst::Ret
-        | Inst::EpiloguePlaceholder
+        Inst::Ret { rets } => {
+            // The return value(s) are live-out; we represent this
+            // with register uses on the return instruction.
+            for &ret in rets {
+                collector.reg_use(ret);
+            }
+        }
+
+        Inst::EpiloguePlaceholder
         | Inst::JmpKnown { .. }
         | Inst::JmpIf { .. }
         | Inst::JmpCond { .. }
@@ -2147,457 +2057,15 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
             // ELF systems; other x86-64 targets with other conventions (i.e.,
             // Windows) use different TLS strategies.
             for reg in X64ABIMachineSpec::get_regs_clobbered_by_call(CallConv::SystemV) {
-                collector.add_def(reg);
+                // FIXME: use actual clobber functionality.
+                collector.reg_def(reg);
             }
         }
 
-        Inst::ValueLabelMarker { reg, .. } => {
-            collector.add_use(*reg);
-        }
-
         Inst::Unwind { .. } => {}
-    }
-}
 
-//=============================================================================
-// Instructions and subcomponents: map_regs
-
-impl Amode {
-    fn map_uses<RM: RegMapper>(&mut self, map: &RM) {
-        match self {
-            Amode::ImmReg { ref mut base, .. } => map.map_use(base),
-            Amode::ImmRegRegShift {
-                ref mut base,
-                ref mut index,
-                ..
-            } => {
-                base.map_use(map);
-                index.map_use(map);
-            }
-            Amode::RipRelative { .. } => {
-                // RIP isn't involved in regalloc.
-            }
-        }
-    }
-
-    /// Offset the amode by a fixed offset.
-    pub(crate) fn offset(&self, offset: u32) -> Self {
-        let mut ret = self.clone();
-        match &mut ret {
-            &mut Amode::ImmReg { ref mut simm32, .. } => *simm32 += offset,
-            &mut Amode::ImmRegRegShift { ref mut simm32, .. } => *simm32 += offset,
-            _ => panic!("Cannot offset amode: {:?}", self),
-        }
-        ret
-    }
-}
-
-impl RegMemImm {
-    fn map_uses<RM: RegMapper>(&mut self, map: &RM) {
-        match self {
-            RegMemImm::Reg { ref mut reg } => map.map_use(reg),
-            RegMemImm::Mem { ref mut addr } => addr.map_uses(map),
-            RegMemImm::Imm { .. } => {}
-        }
-    }
-
-    fn map_as_def<RM: RegMapper>(&mut self, mapper: &RM) {
-        match self {
-            Self::Reg { reg } => {
-                let mut writable_src = Writable::from_reg(*reg);
-                mapper.map_def(&mut writable_src);
-                *self = Self::reg(writable_src.to_reg());
-            }
-            _ => panic!("unexpected RegMemImm kind in map_src_reg_as_def"),
-        }
-    }
-}
-
-impl RegMem {
-    fn map_uses<RM: RegMapper>(&mut self, map: &RM) {
-        match self {
-            RegMem::Reg { ref mut reg } => map.map_use(reg),
-            RegMem::Mem { ref mut addr, .. } => addr.map_uses(map),
-        }
-    }
-
-    fn map_as_def<RM: RegMapper>(&mut self, mapper: &RM) {
-        match self {
-            Self::Reg { reg } => {
-                let mut writable_src = Writable::from_reg(*reg);
-                mapper.map_def(&mut writable_src);
-                *self = Self::reg(writable_src.to_reg());
-            }
-            _ => panic!("unexpected RegMem kind in map_src_reg_as_def"),
-        }
-    }
-}
-
-pub(crate) fn x64_map_regs<RM: RegMapper>(inst: &mut Inst, mapper: &RM) {
-    // Note this must be carefully synchronized with x64_get_regs.
-    let produces_const = inst.produces_const();
-
-    match inst {
-        // ** Nop
-        Inst::AluRmiR {
-            ref mut src1,
-            ref mut src2,
-            ref mut dst,
-            ..
-        } => {
-            debug_assert_eq!(*src1, dst.to_reg());
-            if produces_const {
-                src2.map_as_def(mapper);
-                dst.map_def(mapper);
-                *src1 = dst.to_reg();
-            } else {
-                src2.map_uses(mapper);
-                dst.map_mod(mapper);
-                *src1 = dst.to_reg();
-            }
-        }
-        Inst::Not { src, dst, .. } | Inst::Neg { src, dst, .. } => {
-            debug_assert_eq!(*src, dst.to_reg());
-            dst.map_mod(mapper);
-            *src = dst.to_reg();
-        }
-        Inst::Div { divisor, .. } => divisor.map_uses(mapper),
-        Inst::MulHi { src2, .. } => src2.map_uses(mapper),
-        Inst::CheckedDivOrRemSeq { divisor, tmp, .. } => {
-            divisor.map_mod(mapper);
-            if let Some(tmp) = tmp {
-                tmp.map_def(mapper)
-            }
-        }
-        Inst::SignExtendData { .. } => {}
-        Inst::XmmUnaryRmR {
-            ref mut src,
-            ref mut dst,
-            ..
-        }
-        | Inst::XmmUnaryRmREvex {
-            ref mut src,
-            ref mut dst,
-            ..
-        } => {
-            src.map_uses(mapper);
-            dst.map_def(mapper);
-        }
-        Inst::UnaryRmR {
-            ref mut src,
-            ref mut dst,
-            ..
-        } => {
-            src.map_uses(mapper);
-            dst.map_def(mapper);
-        }
-        Inst::XmmRmRImm {
-            ref op,
-            ref mut src1,
-            ref mut src2,
-            ref mut dst,
-            ..
-        } => {
-            debug_assert_eq!(*src1, dst.to_reg());
-            if produces_const {
-                src2.map_as_def(mapper);
-                mapper.map_def(dst);
-                *src1 = dst.to_reg();
-            } else if *op == SseOpcode::Pextrb
-                || *op == SseOpcode::Pextrw
-                || *op == SseOpcode::Pextrd
-                || *op == SseOpcode::Pshufd
-                || *op == SseOpcode::Roundss
-                || *op == SseOpcode::Roundsd
-                || *op == SseOpcode::Roundps
-                || *op == SseOpcode::Roundpd
-            {
-                src2.map_uses(mapper);
-                mapper.map_def(dst);
-                *src1 = dst.to_reg();
-            } else {
-                src2.map_uses(mapper);
-                mapper.map_mod(dst);
-                *src1 = dst.to_reg();
-            }
-        }
-        Inst::XmmRmR {
-            ref mut src1,
-            ref mut src2,
-            ref mut dst,
-            ..
-        } => {
-            debug_assert_eq!(*src1, dst.to_reg());
-            if produces_const {
-                src2.map_as_def(mapper);
-                dst.map_def(mapper);
-                *src1 = dst.to_reg();
-            } else {
-                src2.map_uses(mapper);
-                dst.map_mod(mapper);
-                *src1 = dst.to_reg();
-            }
-        }
-        Inst::XmmRmREvex {
-            op,
-            ref mut src1,
-            ref mut src2,
-            ref mut dst,
-            ..
-        } => {
-            src1.map_uses(mapper);
-            src2.map_use(mapper);
-            match *op {
-                Avx512Opcode::Vpermi2b => dst.map_mod(mapper),
-                _ => dst.map_def(mapper),
-            }
-        }
-        Inst::XmmRmiReg {
-            ref mut src1,
-            ref mut src2,
-            ref mut dst,
-            ..
-        } => {
-            debug_assert_eq!(*src1, dst.to_reg());
-            src2.map_uses(mapper);
-            dst.map_mod(mapper);
-            *src1 = dst.to_reg();
-        }
-        Inst::XmmUninitializedValue { ref mut dst, .. } => {
-            dst.map_def(mapper);
-        }
-        Inst::XmmLoadConst { ref mut dst, .. } => {
-            mapper.map_def(dst);
-        }
-        Inst::XmmMinMaxSeq {
-            ref mut lhs,
-            ref mut rhs,
-            ref mut dst,
-            ..
-        } => {
-            debug_assert_eq!(*rhs, dst.to_reg());
-            lhs.map_use(mapper);
-            dst.map_mod(mapper);
-            *rhs = dst.to_reg();
-        }
-        Inst::XmmMovRM {
-            ref mut src,
-            ref mut dst,
-            ..
-        } => {
-            mapper.map_use(src);
-            dst.map_uses(mapper);
-        }
-        Inst::XmmCmpRmR {
-            ref mut src,
-            ref mut dst,
-            ..
-        } => {
-            src.map_uses(mapper);
-            dst.map_use(mapper);
-        }
-        Inst::Imm { ref mut dst, .. } => dst.map_def(mapper),
-        Inst::MovRR {
-            ref mut src,
-            ref mut dst,
-            ..
-        } => {
-            src.map_use(mapper);
-            dst.map_def(mapper);
-        }
-        Inst::XmmToGpr {
-            ref mut src,
-            ref mut dst,
-            ..
-        } => {
-            src.map_use(mapper);
-            dst.map_def(mapper);
-        }
-        Inst::GprToXmm {
-            ref mut src,
-            ref mut dst,
-            ..
-        } => {
-            src.map_uses(mapper);
-            dst.map_def(mapper);
-        }
-        Inst::CvtUint64ToFloatSeq {
-            ref mut src,
-            ref mut dst,
-            ref mut tmp_gpr1,
-            ref mut tmp_gpr2,
-            ..
-        } => {
-            src.map_mod(mapper);
-            dst.map_def(mapper);
-            tmp_gpr1.map_def(mapper);
-            tmp_gpr2.map_def(mapper);
-        }
-        Inst::CvtFloatToSintSeq {
-            ref mut src,
-            ref mut dst,
-            ref mut tmp_xmm,
-            ref mut tmp_gpr,
-            ..
-        }
-        | Inst::CvtFloatToUintSeq {
-            ref mut src,
-            ref mut dst,
-            ref mut tmp_gpr,
-            ref mut tmp_xmm,
-            ..
-        } => {
-            src.map_mod(mapper);
-            dst.map_def(mapper);
-            tmp_gpr.map_def(mapper);
-            tmp_xmm.map_def(mapper);
-        }
-        Inst::MovzxRmR {
-            ref mut src,
-            ref mut dst,
-            ..
-        } => {
-            src.map_uses(mapper);
-            dst.map_def(mapper);
-        }
-        Inst::Mov64MR { src, dst, .. } => {
-            src.map_uses(mapper);
-            dst.map_def(mapper);
-        }
-        Inst::LoadEffectiveAddress { addr: src, dst } => {
-            src.map_uses(mapper);
-            dst.map_def(mapper);
-        }
-        Inst::MovsxRmR {
-            ref mut src,
-            ref mut dst,
-            ..
-        } => {
-            src.map_uses(mapper);
-            dst.map_def(mapper);
-        }
-        Inst::MovRM {
-            ref mut src,
-            ref mut dst,
-            ..
-        } => {
-            src.map_use(mapper);
-            dst.map_uses(mapper);
-        }
-        Inst::ShiftR {
-            ref mut src,
-            ref mut dst,
-            ..
-        } => {
-            debug_assert_eq!(*src, dst.to_reg());
-            dst.map_mod(mapper);
-            *src = dst.to_reg();
-        }
-        Inst::CmpRmiR {
-            ref mut src,
-            ref mut dst,
-            ..
-        } => {
-            src.map_uses(mapper);
-            dst.map_use(mapper);
-        }
-        Inst::Setcc { ref mut dst, .. } => dst.map_def(mapper),
-        Inst::Cmove {
-            consequent: ref mut src,
-            ref mut dst,
-            ref mut alternative,
-            ..
-        } => {
-            src.map_uses(mapper);
-            dst.map_mod(mapper);
-            *alternative = dst.to_reg();
-        }
-        Inst::XmmCmove {
-            consequent: ref mut src,
-            ref mut dst,
-            ref mut alternative,
-            ..
-        } => {
-            src.map_uses(mapper);
-            dst.map_mod(mapper);
-            *alternative = dst.to_reg();
-        }
-        Inst::Push64 { ref mut src } => src.map_uses(mapper),
-        Inst::Pop64 { ref mut dst } => {
-            dst.map_def(mapper);
-        }
-
-        Inst::CallKnown {
-            ref mut uses,
-            ref mut defs,
-            ..
-        } => {
-            for r in uses.iter_mut() {
-                mapper.map_use(r);
-            }
-            for r in defs.iter_mut() {
-                mapper.map_def(r);
-            }
-        }
-
-        Inst::CallUnknown {
-            ref mut uses,
-            ref mut defs,
-            ref mut dest,
-            ..
-        } => {
-            for r in uses.iter_mut() {
-                mapper.map_use(r);
-            }
-            for r in defs.iter_mut() {
-                mapper.map_def(r);
-            }
-            dest.map_uses(mapper);
-        }
-
-        Inst::JmpTableSeq {
-            ref mut idx,
-            ref mut tmp1,
-            ref mut tmp2,
-            ..
-        } => {
-            mapper.map_use(idx);
-            mapper.map_def(tmp1);
-            mapper.map_def(tmp2);
-        }
-
-        Inst::JmpUnknown { ref mut target } => target.map_uses(mapper),
-
-        Inst::LoadExtName { ref mut dst, .. } => mapper.map_def(dst),
-
-        Inst::LockCmpxchg {
-            ref mut replacement,
-            ref mut mem,
-            ..
-        } => {
-            mapper.map_use(replacement);
-            mem.map_uses(mapper);
-        }
-
-        Inst::ValueLabelMarker { ref mut reg, .. } => mapper.map_use(reg),
-
-        Inst::Ret
-        | Inst::EpiloguePlaceholder
-        | Inst::JmpKnown { .. }
-        | Inst::JmpCond { .. }
-        | Inst::JmpIf { .. }
-        | Inst::Nop { .. }
-        | Inst::TrapIf { .. }
-        | Inst::VirtualSPOffsetAdj { .. }
-        | Inst::Ud2 { .. }
-        | Inst::Hlt
-        | Inst::AtomicRmwSeq { .. }
-        | Inst::ElfTlsGetAddr { .. }
-        | Inst::MachOTlsGetAddr { .. }
-        | Inst::Fence { .. }
-        | Inst::Unwind { .. } => {
-            // Instruction doesn't explicitly mention any regs, so it can't have any virtual
-            // regs that we'd need to remap.  Hence no action required.
+        Inst::DummyUse { reg } => {
+            collector.reg_use(*reg);
         }
     }
 }
@@ -2606,15 +2074,8 @@ pub(crate) fn x64_map_regs<RM: RegMapper>(inst: &mut Inst, mapper: &RM) {
 // Instructions: misc functions and external interface
 
 impl MachInst for Inst {
-    fn get_regs(&self, collector: &mut RegUsageCollector) {
-        x64_get_regs(&self, collector)
-    }
-
-    fn map_regs<RUM>(&mut self, mapper: &RUM)
-    where
-        RUM: regalloc::RegUsageMapper,
-    {
-        x64_map_regs(self, mapper);
+    fn get_operands<F: Fn(VReg) -> VReg>(&self, collector: &mut OperandCollector<'_, F>) {
+        x64_get_operands(&self, collector)
     }
 
     fn is_move(&self) -> Option<(Writable<Reg>, Reg)> {
@@ -2661,7 +2122,7 @@ impl MachInst for Inst {
     fn is_term<'a>(&'a self) -> MachTerminator<'a> {
         match self {
             // Interesting cases.
-            &Self::Ret | &Self::EpiloguePlaceholder => MachTerminator::Ret,
+            &Self::Ret { .. } | &Self::EpiloguePlaceholder => MachTerminator::Ret,
             &Self::JmpKnown { dst } => MachTerminator::Uncond(dst),
             &Self::JmpCond {
                 taken, not_taken, ..
@@ -2675,28 +2136,6 @@ impl MachInst for Inst {
         }
     }
 
-    fn stack_op_info(&self) -> Option<MachInstStackOpInfo> {
-        match self {
-            Self::VirtualSPOffsetAdj { offset } => Some(MachInstStackOpInfo::NomSPAdj(*offset)),
-            Self::MovRM {
-                size: OperandSize::Size8,
-                src,
-                dst: SyntheticAmode::NominalSPOffset { simm32 },
-            } => Some(MachInstStackOpInfo::StoreNomSPOff(
-                src.to_reg(),
-                *simm32 as i64,
-            )),
-            Self::Mov64MR {
-                src: SyntheticAmode::NominalSPOffset { simm32 },
-                dst,
-            } => Some(MachInstStackOpInfo::LoadNomSPOff(
-                dst.to_reg().to_reg(),
-                *simm32 as i64,
-            )),
-            _ => None,
-        }
-    }
-
     fn gen_move(dst_reg: Writable<Reg>, src_reg: Reg, ty: Type) -> Inst {
         log::trace!(
             "Inst::gen_move {:?} -> {:?} (type: {:?})",
@@ -2704,13 +2143,13 @@ impl MachInst for Inst {
             dst_reg.to_reg(),
             ty
         );
-        let rc_dst = dst_reg.to_reg().get_class();
-        let rc_src = src_reg.get_class();
+        let rc_dst = dst_reg.to_reg().class();
+        let rc_src = src_reg.class();
         // If this isn't true, we have gone way off the rails.
         debug_assert!(rc_dst == rc_src);
         match rc_dst {
-            RegClass::I64 => Inst::mov_r_r(OperandSize::Size64, src_reg, dst_reg),
-            RegClass::V128 => {
+            RegClass::Int => Inst::mov_r_r(OperandSize::Size64, src_reg, dst_reg),
+            RegClass::Float => {
                 // The Intel optimization manual, in "3.5.1.13 Zero-Latency MOV Instructions",
                 // doesn't include MOVSS/MOVSD as instructions with zero-latency. Use movaps for
                 // those, which may write more lanes that we need, but are specified to have
@@ -2723,7 +2162,6 @@ impl MachInst for Inst {
                 };
                 Inst::xmm_unary_rm_r(opcode, RegMem::reg(src_reg), dst_reg)
             }
-            _ => panic!("gen_move(x64): unhandled regclass {:?}", rc_dst),
         }
     }
 
@@ -2731,32 +2169,28 @@ impl MachInst for Inst {
         Inst::nop(std::cmp::min(preferred_size, 15) as u8)
     }
 
-    fn maybe_direct_reload(&self, _reg: VirtualReg, _slot: SpillSlot) -> Option<Inst> {
-        None
-    }
-
     fn rc_for_type(ty: Type) -> CodegenResult<(&'static [RegClass], &'static [Type])> {
         match ty {
-            types::I8 => Ok((&[RegClass::I64], &[types::I8])),
-            types::I16 => Ok((&[RegClass::I64], &[types::I16])),
-            types::I32 => Ok((&[RegClass::I64], &[types::I32])),
-            types::I64 => Ok((&[RegClass::I64], &[types::I64])),
-            types::B1 => Ok((&[RegClass::I64], &[types::B1])),
-            types::B8 => Ok((&[RegClass::I64], &[types::B8])),
-            types::B16 => Ok((&[RegClass::I64], &[types::B16])),
-            types::B32 => Ok((&[RegClass::I64], &[types::B32])),
-            types::B64 => Ok((&[RegClass::I64], &[types::B64])),
+            types::I8 => Ok((&[RegClass::Int], &[types::I8])),
+            types::I16 => Ok((&[RegClass::Int], &[types::I16])),
+            types::I32 => Ok((&[RegClass::Int], &[types::I32])),
+            types::I64 => Ok((&[RegClass::Int], &[types::I64])),
+            types::B1 => Ok((&[RegClass::Int], &[types::B1])),
+            types::B8 => Ok((&[RegClass::Int], &[types::B8])),
+            types::B16 => Ok((&[RegClass::Int], &[types::B16])),
+            types::B32 => Ok((&[RegClass::Int], &[types::B32])),
+            types::B64 => Ok((&[RegClass::Int], &[types::B64])),
             types::R32 => panic!("32-bit reftype pointer should never be seen on x86-64"),
-            types::R64 => Ok((&[RegClass::I64], &[types::R64])),
-            types::F32 => Ok((&[RegClass::V128], &[types::F32])),
-            types::F64 => Ok((&[RegClass::V128], &[types::F64])),
-            types::I128 => Ok((&[RegClass::I64, RegClass::I64], &[types::I64, types::I64])),
-            types::B128 => Ok((&[RegClass::I64, RegClass::I64], &[types::B64, types::B64])),
+            types::R64 => Ok((&[RegClass::Int], &[types::R64])),
+            types::F32 => Ok((&[RegClass::Float], &[types::F32])),
+            types::F64 => Ok((&[RegClass::Float], &[types::F64])),
+            types::I128 => Ok((&[RegClass::Int, RegClass::Int], &[types::I64, types::I64])),
+            types::B128 => Ok((&[RegClass::Int, RegClass::Int], &[types::B64, types::B64])),
             _ if ty.is_vector() => {
                 assert!(ty.bits() <= 128);
-                Ok((&[RegClass::V128], &[types::I8X16]))
+                Ok((&[RegClass::Float], &[types::I8X16]))
             }
-            types::IFLAGS | types::FFLAGS => Ok((&[RegClass::I64], &[types::I64])),
+            types::IFLAGS | types::FFLAGS => Ok((&[RegClass::Int], &[types::I64])),
             _ => Err(CodegenError::Unsupported(format!(
                 "Unexpected SSA-value type: {}",
                 ty
@@ -2764,6 +2198,13 @@ impl MachInst for Inst {
         }
     }
 
+    fn canonical_type_for_rc(rc: RegClass) -> Type {
+        match rc {
+            RegClass::Float => types::I8X16,
+            RegClass::Int => types::I64,
+        }
+    }
+
     fn gen_jump(label: MachLabel) -> Inst {
         Inst::jmp_known(label)
     }
@@ -2877,22 +2318,25 @@ impl MachInst for Inst {
         ret
     }
 
+    fn gen_dummy_use(reg: Reg) -> Self {
+        Inst::DummyUse { reg }
+    }
+
     fn worst_case_size() -> CodeOffset {
         15
     }
 
     fn ref_type_regclass(_: &settings::Flags) -> RegClass {
-        RegClass::I64
+        RegClass::Int
     }
 
-    fn gen_value_label_marker(label: ValueLabel, reg: Reg) -> Self {
-        Inst::ValueLabelMarker { label, reg }
-    }
-
-    fn defines_value_label(&self) -> Option<(ValueLabel, Reg)> {
+    fn is_safepoint(&self) -> bool {
         match self {
-            Inst::ValueLabelMarker { label, reg } => Some((*label, *reg)),
-            _ => None,
+            Inst::CallKnown { .. }
+            | Inst::CallUnknown { .. }
+            | Inst::TrapIf { .. }
+            | Inst::Ud2 { .. } => true,
+            _ => false,
         }
     }
 
@@ -2929,12 +2373,19 @@ impl MachInstEmit for Inst {
     type State = EmitState;
     type Info = EmitInfo;
 
-    fn emit(&self, sink: &mut MachBuffer<Inst>, info: &Self::Info, state: &mut Self::State) {
-        emit::emit(self, sink, info, state);
+    fn emit(
+        &self,
+        allocs: &[Allocation],
+        sink: &mut MachBuffer<Inst>,
+        info: &Self::Info,
+        state: &mut Self::State,
+    ) {
+        let mut allocs = AllocationConsumer::new(allocs);
+        emit::emit(self, &mut allocs, sink, info, state);
     }
 
-    fn pretty_print(&self, mb_rru: Option<&RealRegUniverse>, _: &mut Self::State) -> String {
-        self.show_rru(mb_rru)
+    fn pretty_print_inst(&self, allocs: &[Allocation], _: &mut Self::State) -> String {
+        PrettyPrint::pretty_print(self, 0, &mut AllocationConsumer::new(allocs))
     }
 }
 
diff --git a/cranelift/codegen/src/isa/x64/inst/regs.rs b/cranelift/codegen/src/isa/x64/inst/regs.rs
index ddcf3adc2d..d720951f57 100644
--- a/cranelift/codegen/src/isa/x64/inst/regs.rs
+++ b/cranelift/codegen/src/isa/x64/inst/regs.rs
@@ -1,26 +1,14 @@
-//! Registers, the Universe thereof, and printing.
+//! Register definitions for regalloc2.
 //!
-//! These are ordered by sequence number, as required in the Universe.
+//! We define 16 GPRs, with indices equal to the hardware encoding,
+//! and 16 XMM registers.
 //!
-//! The caller-saved registers are placed first in order to prefer not to clobber (requiring
-//! saves/restores in prologue/epilogue code) when possible. Note that there is no other heuristic
-//! in the backend that will apply such pressure; the register allocator's cost heuristics are not
-//! aware of the cost of clobber-save/restore code.
-//!
-//! One might worry that this pessimizes code with many callsites, where using caller-saves causes
-//! us to have to save them (as we are the caller) frequently. However, the register allocator
-//! *should be* aware of *this* cost, because it sees that the call instruction modifies all of the
-//! caller-saved (i.e., callee-clobbered) registers.
-//!
-//! Hence, this ordering encodes pressure in one direction (prefer not to clobber registers that we
-//! ourselves have to save) and this is balanaced against the RA's pressure in the other direction
-//! at callsites.
+//! Note also that we make use of pinned VRegs to refer to PRegs.
 
+use crate::machinst::{AllocationConsumer, RealReg, Reg};
 use crate::settings;
-use alloc::vec::Vec;
-use regalloc::{
-    PrettyPrint, RealReg, RealRegUniverse, Reg, RegClass, RegClassInfo, NUM_REG_CLASSES,
-};
+use alloc::string::ToString;
+use regalloc2::{MachineEnv, PReg, RegClass, VReg};
 use std::string::String;
 
 // Hardware encodings (note the special rax, rcx, rdx, rbx order).
@@ -42,53 +30,62 @@ pub const ENC_R13: u8 = 13;
 pub const ENC_R14: u8 = 14;
 pub const ENC_R15: u8 = 15;
 
-fn gpr(enc: u8, index: u8) -> Reg {
-    Reg::new_real(RegClass::I64, enc, index)
+// Constructors for Regs.
+
+fn gpr(enc: u8) -> Reg {
+    let preg = PReg::new(enc as usize, RegClass::Int);
+    Reg::from(VReg::new(preg.index(), RegClass::Int))
 }
 
 pub(crate) fn rsi() -> Reg {
-    gpr(ENC_RSI, 16)
+    gpr(ENC_RSI)
 }
 pub(crate) fn rdi() -> Reg {
-    gpr(ENC_RDI, 17)
+    gpr(ENC_RDI)
 }
 pub(crate) fn rax() -> Reg {
-    gpr(ENC_RAX, 18)
+    gpr(ENC_RAX)
 }
 pub(crate) fn rcx() -> Reg {
-    gpr(ENC_RCX, 19)
+    gpr(ENC_RCX)
 }
 pub(crate) fn rdx() -> Reg {
-    gpr(ENC_RDX, 20)
+    gpr(ENC_RDX)
 }
 pub(crate) fn r8() -> Reg {
-    gpr(ENC_R8, 21)
+    gpr(ENC_R8)
 }
 pub(crate) fn r9() -> Reg {
-    gpr(ENC_R9, 22)
+    gpr(ENC_R9)
 }
 pub(crate) fn r10() -> Reg {
-    gpr(ENC_R10, 23)
+    gpr(ENC_R10)
 }
 pub(crate) fn r11() -> Reg {
-    gpr(ENC_R11, 24)
+    gpr(ENC_R11)
 }
 pub(crate) fn r12() -> Reg {
-    gpr(ENC_R12, 25)
+    gpr(ENC_R12)
 }
 pub(crate) fn r13() -> Reg {
-    gpr(ENC_R13, 26)
+    gpr(ENC_R13)
 }
 pub(crate) fn r14() -> Reg {
-    gpr(ENC_R14, 27)
+    gpr(ENC_R14)
 }
 pub(crate) fn rbx() -> Reg {
-    gpr(ENC_RBX, 28)
+    gpr(ENC_RBX)
 }
 
 pub(crate) fn r15() -> Reg {
-    // r15 is put aside since this is the pinned register.
-    gpr(ENC_R15, 29)
+    gpr(ENC_R15)
+}
+
+pub(crate) fn rsp() -> Reg {
+    gpr(ENC_RSP)
+}
+pub(crate) fn rbp() -> Reg {
+    gpr(ENC_RBP)
 }
 
 /// The pinned register on this architecture.
@@ -98,163 +95,177 @@ pub(crate) fn pinned_reg() -> Reg {
     r15()
 }
 
-fn fpr(enc: u8, index: u8) -> Reg {
-    Reg::new_real(RegClass::V128, enc, index)
+fn fpr(enc: u8) -> Reg {
+    let preg = PReg::new(enc as usize, RegClass::Float);
+    Reg::from(VReg::new(preg.index(), RegClass::Float))
 }
 
 pub(crate) fn xmm0() -> Reg {
-    fpr(0, 0)
+    fpr(0)
 }
 pub(crate) fn xmm1() -> Reg {
-    fpr(1, 1)
+    fpr(1)
 }
 pub(crate) fn xmm2() -> Reg {
-    fpr(2, 2)
+    fpr(2)
 }
 pub(crate) fn xmm3() -> Reg {
-    fpr(3, 3)
+    fpr(3)
 }
 pub(crate) fn xmm4() -> Reg {
-    fpr(4, 4)
+    fpr(4)
 }
 pub(crate) fn xmm5() -> Reg {
-    fpr(5, 5)
+    fpr(5)
 }
 pub(crate) fn xmm6() -> Reg {
-    fpr(6, 6)
+    fpr(6)
 }
 pub(crate) fn xmm7() -> Reg {
-    fpr(7, 7)
+    fpr(7)
 }
 pub(crate) fn xmm8() -> Reg {
-    fpr(8, 8)
+    fpr(8)
 }
 pub(crate) fn xmm9() -> Reg {
-    fpr(9, 9)
+    fpr(9)
 }
 pub(crate) fn xmm10() -> Reg {
-    fpr(10, 10)
+    fpr(10)
 }
 pub(crate) fn xmm11() -> Reg {
-    fpr(11, 11)
+    fpr(11)
 }
 pub(crate) fn xmm12() -> Reg {
-    fpr(12, 12)
+    fpr(12)
 }
 pub(crate) fn xmm13() -> Reg {
-    fpr(13, 13)
+    fpr(13)
 }
 pub(crate) fn xmm14() -> Reg {
-    fpr(14, 14)
+    fpr(14)
 }
 pub(crate) fn xmm15() -> Reg {
-    fpr(15, 15)
+    fpr(15)
 }
 
-pub(crate) fn rsp() -> Reg {
-    gpr(ENC_RSP, 30)
-}
-pub(crate) fn rbp() -> Reg {
-    gpr(ENC_RBP, 31)
-}
-
-/// Create the register universe for X64.
-///
-/// The ordering of registers matters, as commented in the file doc comment: assumes the
-/// calling-convention is SystemV, at the moment.
-pub(crate) fn create_reg_universe_systemv(flags: &settings::Flags) -> RealRegUniverse {
-    let mut regs = Vec::<(RealReg, String)>::new();
-    let mut allocable_by_class = [None; NUM_REG_CLASSES];
-
-    let use_pinned_reg = flags.enable_pinned_reg();
-
-    // XMM registers
-    let first_fpr = regs.len();
-    regs.push((xmm0().to_real_reg(), "%xmm0".into()));
-    regs.push((xmm1().to_real_reg(), "%xmm1".into()));
-    regs.push((xmm2().to_real_reg(), "%xmm2".into()));
-    regs.push((xmm3().to_real_reg(), "%xmm3".into()));
-    regs.push((xmm4().to_real_reg(), "%xmm4".into()));
-    regs.push((xmm5().to_real_reg(), "%xmm5".into()));
-    regs.push((xmm6().to_real_reg(), "%xmm6".into()));
-    regs.push((xmm7().to_real_reg(), "%xmm7".into()));
-    regs.push((xmm8().to_real_reg(), "%xmm8".into()));
-    regs.push((xmm9().to_real_reg(), "%xmm9".into()));
-    regs.push((xmm10().to_real_reg(), "%xmm10".into()));
-    regs.push((xmm11().to_real_reg(), "%xmm11".into()));
-    regs.push((xmm12().to_real_reg(), "%xmm12".into()));
-    regs.push((xmm13().to_real_reg(), "%xmm13".into()));
-    regs.push((xmm14().to_real_reg(), "%xmm14".into()));
-    regs.push((xmm15().to_real_reg(), "%xmm15".into()));
-    let last_fpr = regs.len() - 1;
-
-    // Integer regs.
-    let first_gpr = regs.len();
-
-    // Caller-saved, in the SystemV x86_64 ABI.
-    regs.push((rsi().to_real_reg(), "%rsi".into()));
-    regs.push((rdi().to_real_reg(), "%rdi".into()));
-    regs.push((rax().to_real_reg(), "%rax".into()));
-    regs.push((rcx().to_real_reg(), "%rcx".into()));
-    regs.push((rdx().to_real_reg(), "%rdx".into()));
-    regs.push((r8().to_real_reg(), "%r8".into()));
-    regs.push((r9().to_real_reg(), "%r9".into()));
-    regs.push((r10().to_real_reg(), "%r10".into()));
-    regs.push((r11().to_real_reg(), "%r11".into()));
-
-    // Callee-saved, in the SystemV x86_64 ABI.
-    regs.push((r12().to_real_reg(), "%r12".into()));
-    regs.push((r13().to_real_reg(), "%r13".into()));
-    regs.push((r14().to_real_reg(), "%r14".into()));
-
-    regs.push((rbx().to_real_reg(), "%rbx".into()));
-
-    // Other regs, not available to the allocator.
-    debug_assert_eq!(r15(), pinned_reg());
-    let allocable = if use_pinned_reg {
-        // The pinned register is not allocatable in this case, so record the length before adding
-        // it.
-        let len = regs.len();
-        regs.push((r15().to_real_reg(), "%r15/pinned".into()));
-        len
-    } else {
-        regs.push((r15().to_real_reg(), "%r15".into()));
-        regs.len()
-    };
-    let last_gpr = allocable - 1;
-
-    regs.push((rsp().to_real_reg(), "%rsp".into()));
-    regs.push((rbp().to_real_reg(), "%rbp".into()));
-
-    allocable_by_class[RegClass::I64.rc_to_usize()] = Some(RegClassInfo {
-        first: first_gpr,
-        last: last_gpr,
-        suggested_scratch: Some(r12().get_index()),
-    });
-    allocable_by_class[RegClass::V128.rc_to_usize()] = Some(RegClassInfo {
-        first: first_fpr,
-        last: last_fpr,
-        suggested_scratch: Some(xmm15().get_index()),
-    });
-
-    // Sanity-check: the index passed to the Reg ctor must match the order in the register list.
-    for (i, reg) in regs.iter().enumerate() {
-        assert_eq!(i, reg.0.get_index());
+/// Create the register environment for x64.
+pub(crate) fn create_reg_env_systemv(flags: &settings::Flags) -> MachineEnv {
+    fn preg(r: Reg) -> PReg {
+        r.to_real_reg().unwrap().into()
     }
 
-    RealRegUniverse {
-        regs,
-        allocable,
-        allocable_by_class,
+    let mut env = MachineEnv {
+        preferred_regs_by_class: [
+            // Preferred GPRs: caller-saved in the SysV ABI.
+            vec![
+                preg(rsi()),
+                preg(rdi()),
+                preg(rax()),
+                preg(rcx()),
+                preg(rdx()),
+                preg(r8()),
+                preg(r9()),
+                // N.B.: not r10; it is our scratch reg.
+                preg(r11()),
+            ],
+            // Preferred XMMs: all of them.
+            vec![
+                preg(xmm0()),
+                preg(xmm1()),
+                preg(xmm2()),
+                preg(xmm3()),
+                preg(xmm4()),
+                preg(xmm5()),
+                preg(xmm6()),
+                preg(xmm7()),
+                preg(xmm8()),
+                preg(xmm9()),
+                preg(xmm10()),
+                preg(xmm11()),
+                preg(xmm12()),
+                preg(xmm13()),
+                preg(xmm14()),
+                // N.B.: not xmm15; it is our scratch reg.
+            ],
+        ],
+        non_preferred_regs_by_class: [
+            // Non-preferred GPRs: callee-saved in the SysV ABI.
+            vec![preg(rbx()), preg(r12()), preg(r13()), preg(r14())],
+            // Non-preferred XMMs: none.
+            vec![],
+        ],
+        scratch_by_class: [preg(r10()), preg(xmm15())],
+        fixed_stack_slots: vec![],
+    };
+
+    debug_assert_eq!(r15(), pinned_reg());
+    if !flags.enable_pinned_reg() {
+        env.non_preferred_regs_by_class[0].push(preg(r15()));
+    }
+
+    env
+}
+
+/// Give the name of a RealReg.
+pub fn realreg_name(reg: RealReg) -> &'static str {
+    let preg = PReg::from(reg);
+    match preg.class() {
+        RegClass::Int => match preg.hw_enc() as u8 {
+            ENC_RAX => "%rax",
+            ENC_RBX => "%rbx",
+            ENC_RCX => "%rcx",
+            ENC_RDX => "%rdx",
+            ENC_RSI => "%rsi",
+            ENC_RDI => "%rdi",
+            ENC_RBP => "%rbp",
+            ENC_RSP => "%rsp",
+            ENC_R8 => "%r8",
+            ENC_R9 => "%r9",
+            ENC_R10 => "%r10",
+            ENC_R11 => "%r11",
+            ENC_R12 => "%r12",
+            ENC_R13 => "%r13",
+            ENC_R14 => "%r14",
+            ENC_R15 => "%r15",
+            _ => panic!("Invalid PReg: {:?}", preg),
+        },
+        RegClass::Float => match preg.hw_enc() {
+            0 => "%xmm0",
+            1 => "%xmm1",
+            2 => "%xmm2",
+            3 => "%xmm3",
+            4 => "%xmm4",
+            5 => "%xmm5",
+            6 => "%xmm6",
+            7 => "%xmm7",
+            8 => "%xmm8",
+            9 => "%xmm9",
+            10 => "%xmm10",
+            11 => "%xmm11",
+            12 => "%xmm12",
+            13 => "%xmm13",
+            14 => "%xmm14",
+            15 => "%xmm15",
+            _ => panic!("Invalid PReg: {:?}", preg),
+        },
+    }
+}
+
+pub fn show_reg(reg: Reg) -> String {
+    if let Some(rreg) = reg.to_real_reg() {
+        realreg_name(rreg).to_string()
+    } else {
+        format!("%{:?}", reg)
     }
 }
 
 /// If `ireg` denotes an I64-classed reg, make a best-effort attempt to show its name at some
 /// smaller size (4, 2 or 1 bytes).
-pub fn show_ireg_sized(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: u8) -> String {
-    let mut s = reg.show_rru(mb_rru);
+pub fn show_ireg_sized(reg: Reg, size: u8) -> String {
+    let mut s = show_reg(reg);
 
-    if reg.get_class() != RegClass::I64 || size == 8 {
+    if reg.class() != RegClass::Int || size == 8 {
         // We can't do any better.
         return s;
     }
@@ -302,3 +313,15 @@ pub fn show_ireg_sized(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: u8) ->
 
     s
 }
+
+// N.B.: this is not an `impl PrettyPrint for Reg` because it is
+// specific to x64; other backends have analogous functions. The
+// disambiguation happens statically by virtue of higher-level,
+// x64-specific, types calling the right `pretty_print_reg`. (In other
+// words, we can't pretty-print a `Reg` all by itself in a build that
+// may have multiple backends; but we can pretty-print one as part of
+// an x64 Inst or x64 RegMemImm.)
+pub fn pretty_print_reg(reg: Reg, size: u8, allocs: &mut AllocationConsumer<'_>) -> String {
+    let reg = allocs.next(reg);
+    show_ireg_sized(reg, size)
+}
diff --git a/cranelift/codegen/src/isa/x64/inst/unwind/systemv.rs b/cranelift/codegen/src/isa/x64/inst/unwind/systemv.rs
index 68575a18ed..c28ea3b623 100644
--- a/cranelift/codegen/src/isa/x64/inst/unwind/systemv.rs
+++ b/cranelift/codegen/src/isa/x64/inst/unwind/systemv.rs
@@ -1,8 +1,8 @@
 //! Unwind information for System V ABI (x86-64).
 
 use crate::isa::unwind::systemv::RegisterMappingError;
+use crate::machinst::{Reg, RegClass};
 use gimli::{write::CommonInformationEntry, Encoding, Format, Register, X86_64};
-use regalloc::{Reg, RegClass};
 
 /// Creates a new x86-64 common information entry (CIE).
 pub fn create_cie() -> CommonInformationEntry {
@@ -69,14 +69,13 @@ pub fn map_reg(reg: Reg) -> Result<Register, RegisterMappingError> {
         X86_64::XMM15,
     ];
 
-    match reg.get_class() {
-        RegClass::I64 => {
+    match reg.class() {
+        RegClass::Int => {
             // x86 GP registers have a weird mapping to DWARF registers, so we use a
             // lookup table.
-            Ok(X86_GP_REG_MAP[reg.get_hw_encoding() as usize])
+            Ok(X86_GP_REG_MAP[reg.to_real_reg().unwrap().hw_enc() as usize])
         }
-        RegClass::V128 => Ok(X86_XMM_REG_MAP[reg.get_hw_encoding() as usize]),
-        _ => Err(RegisterMappingError::UnsupportedRegisterBank("class?")),
+        RegClass::Float => Ok(X86_XMM_REG_MAP[reg.to_real_reg().unwrap().hw_enc() as usize]),
     }
 }
 
diff --git a/cranelift/codegen/src/isa/x64/inst/unwind/winx64.rs b/cranelift/codegen/src/isa/x64/inst/unwind/winx64.rs
index ffffc5fef5..dc4e6e2b60 100644
--- a/cranelift/codegen/src/isa/x64/inst/unwind/winx64.rs
+++ b/cranelift/codegen/src/isa/x64/inst/unwind/winx64.rs
@@ -1,16 +1,15 @@
 //! Unwind information for Windows x64 ABI.
 
-use regalloc::{Reg, RegClass};
+use crate::machinst::{Reg, RegClass};
 
 pub(crate) struct RegisterMapper;
 
 impl crate::isa::unwind::winx64::RegisterMapper<Reg> for RegisterMapper {
     fn map(reg: Reg) -> crate::isa::unwind::winx64::MappedRegister {
         use crate::isa::unwind::winx64::MappedRegister;
-        match reg.get_class() {
-            RegClass::I64 => MappedRegister::Int(reg.get_hw_encoding()),
-            RegClass::V128 => MappedRegister::Xmm(reg.get_hw_encoding()),
-            _ => unreachable!(),
+        match reg.class() {
+            RegClass::Int => MappedRegister::Int(reg.to_real_reg().unwrap().hw_enc()),
+            RegClass::Float => MappedRegister::Xmm(reg.to_real_reg().unwrap().hw_enc()),
         }
     }
 }
diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle
index 80c99cc70f..42c5e70c49 100644
--- a/cranelift/codegen/src/isa/x64/lower.isle
+++ b/cranelift/codegen/src/isa/x64/lower.isle
@@ -1432,12 +1432,12 @@
 ;;;; Rules for `trap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 (rule (lower (trap code))
-      (safepoint (x64_ud2 code)))
+      (side_effect (x64_ud2 code)))
 
 ;;;; Rules for `resumable_trap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 (rule (lower (resumable_trap code))
-      (safepoint (x64_ud2 code)))
+      (side_effect (x64_ud2 code)))
 
 ;;;; Rules for `icmp` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs
index 48ac3173e5..8af0801732 100644
--- a/cranelift/codegen/src/isa/x64/lower.rs
+++ b/cranelift/codegen/src/isa/x64/lower.rs
@@ -20,7 +20,6 @@ use crate::settings::{Flags, TlsModel};
 use alloc::boxed::Box;
 use alloc::vec::Vec;
 use log::trace;
-use regalloc::{Reg, RegClass, Writable};
 use smallvec::SmallVec;
 use std::convert::TryFrom;
 use target_lexicon::Triple;
@@ -1005,7 +1004,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                 // simply use the flags here.
                 let cc = CC::from_intcc(cond_code);
 
-                ctx.emit_safepoint(Inst::TrapIf { trap_code, cc });
+                ctx.emit(Inst::TrapIf { trap_code, cc });
             } else if op == Opcode::Trapif {
                 let cond_code = ctx.data(insn).cond_code().unwrap();
 
@@ -1014,7 +1013,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                 let cond_code = emit_cmp(ctx, ifcmp, cond_code);
                 let cc = CC::from_intcc(cond_code);
 
-                ctx.emit_safepoint(Inst::TrapIf { trap_code, cc });
+                ctx.emit(Inst::TrapIf { trap_code, cc });
             } else {
                 let cond_code = ctx.data(insn).fp_cond_code().unwrap();
 
@@ -1022,9 +1021,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                 let ffcmp = matches_input(ctx, inputs[0], Opcode::Ffcmp).unwrap();
 
                 match emit_fcmp(ctx, ffcmp, cond_code, FcmpSpec::Normal) {
-                    FcmpCondResult::Condition(cc) => {
-                        ctx.emit_safepoint(Inst::TrapIf { trap_code, cc })
-                    }
+                    FcmpCondResult::Condition(cc) => ctx.emit(Inst::TrapIf { trap_code, cc }),
                     FcmpCondResult::AndConditions(cc1, cc2) => {
                         // A bit unfortunate, but materialize the flags in their own register, and
                         // check against this.
@@ -1038,14 +1035,14 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                             RegMemImm::reg(tmp.to_reg()),
                             tmp2,
                         ));
-                        ctx.emit_safepoint(Inst::TrapIf {
+                        ctx.emit(Inst::TrapIf {
                             trap_code,
                             cc: CC::NZ,
                         });
                     }
                     FcmpCondResult::OrConditions(cc1, cc2) => {
-                        ctx.emit_safepoint(Inst::TrapIf { trap_code, cc: cc1 });
-                        ctx.emit_safepoint(Inst::TrapIf { trap_code, cc: cc2 });
+                        ctx.emit(Inst::TrapIf { trap_code, cc: cc1 });
+                        ctx.emit(Inst::TrapIf { trap_code, cc: cc2 });
                     }
                     FcmpCondResult::InvertedEqualOrConditions(_, _) => unreachable!(),
                 };
@@ -2917,7 +2914,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
             let src_ty = ctx.input_ty(insn, 0);
             debug_assert!(src_ty.is_vector() && src_ty.bits() == 128);
             let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
-            debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+            debug_assert!(dst.to_reg().class() == RegClass::Int);
 
             // The Intel specification allows using both 32-bit and 64-bit GPRs as destination for
             // the "move mask" instructions. This is controlled by the REX.R bit: "In 64-bit mode,
diff --git a/cranelift/codegen/src/isa/x64/lower/isle.rs b/cranelift/codegen/src/isa/x64/lower/isle.rs
index 28df19f896..c8032ecde9 100644
--- a/cranelift/codegen/src/isa/x64/lower/isle.rs
+++ b/cranelift/codegen/src/isa/x64/lower/isle.rs
@@ -2,23 +2,23 @@
 
 // Pull in the ISLE generated code.
 pub(crate) mod generated_code;
+use crate::machinst::{Reg, Writable};
 use generated_code::MInst;
-use regalloc::Writable;
 
 // Types that the generated ISLE code uses via `use super::*`.
-use super::{is_int_or_ref_ty, is_mergeable_load, lower_to_amode, Reg};
+use super::{is_int_or_ref_ty, is_mergeable_load, lower_to_amode};
 use crate::{
     ir::{
         condcodes::{FloatCC, IntCC},
         immediates::*,
         types::*,
-        Inst, InstructionData, MemFlags, Opcode, TrapCode, Value, ValueLabel, ValueList,
+        Inst, InstructionData, MemFlags, Opcode, TrapCode, Value, ValueList,
     },
     isa::{
         settings::Flags,
         unwind::UnwindInst,
         x64::{
-            inst::{args::*, regs, x64_map_regs},
+            inst::{args::*, regs},
             settings::Flags as IsaFlags,
         },
     },
@@ -45,15 +45,9 @@ pub(crate) fn lower<C>(
 where
     C: LowerCtx<I = MInst>,
 {
-    lower_common(
-        lower_ctx,
-        flags,
-        isa_flags,
-        outputs,
-        inst,
-        |cx, insn| generated_code::constructor_lower(cx, insn),
-        x64_map_regs,
-    )
+    lower_common(lower_ctx, flags, isa_flags, outputs, inst, |cx, insn| {
+        generated_code::constructor_lower(cx, insn)
+    })
 }
 
 impl<C> generated_code::Context for IsleContext<'_, C, Flags, IsaFlags, 6>
@@ -269,17 +263,7 @@ where
     }
 
     fn emit(&mut self, inst: &MInst) -> Unit {
-        for inst in inst.clone().mov_mitosis() {
-            self.emitted_insts.push((inst, false));
-        }
-    }
-
-    fn emit_safepoint(&mut self, inst: &MInst) -> Unit {
-        use crate::machinst::MachInst;
-        for inst in inst.clone().mov_mitosis() {
-            let is_safepoint = !inst.is_move().is_some();
-            self.emitted_insts.push((inst, is_safepoint));
-        }
+        self.lower_ctx.emit(inst.clone());
     }
 
     #[inline]
diff --git a/cranelift/codegen/src/isa/x64/lower/isle/generated_code.manifest b/cranelift/codegen/src/isa/x64/lower/isle/generated_code.manifest
index ad3cfca1f8..245b41e5c2 100644
--- a/cranelift/codegen/src/isa/x64/lower/isle/generated_code.manifest
+++ b/cranelift/codegen/src/isa/x64/lower/isle/generated_code.manifest
@@ -1,4 +1,4 @@
 src/clif.isle 443b34b797fc8ace
-src/prelude.isle c0751050a11e2686
-src/isa/x64/inst.isle 1a4206dba9fcf9d8
-src/isa/x64/lower.isle 7e839e6b667bfe77
+src/prelude.isle afd037c4d91c875c
+src/isa/x64/inst.isle f3163ebadf210bb0
+src/isa/x64/lower.isle fd63f3801d58180f
diff --git a/cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs b/cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs
index 81380bb8ab..b2f9e13902 100644
--- a/cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs
+++ b/cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs
@@ -79,7 +79,6 @@ pub trait Context {
     fn def_inst(&mut self, arg0: Value) -> Option<Inst>;
     fn offset32_to_u32(&mut self, arg0: Offset32) -> u32;
     fn emit(&mut self, arg0: &MInst) -> Unit;
-    fn emit_safepoint(&mut self, arg0: &MInst) -> Unit;
     fn trap_code_division_by_zero(&mut self) -> TrapCode;
     fn trap_code_integer_overflow(&mut self) -> TrapCode;
     fn trap_code_bad_conversion_to_integer(&mut self) -> TrapCode;
@@ -162,13 +161,13 @@ pub trait Context {
     fn popcount_low_mask(&mut self) -> VCodeConstant;
 }
 
-/// Internal type SideEffectNoResult: defined at src/prelude.isle line 405.
+/// Internal type SideEffectNoResult: defined at src/prelude.isle line 402.
 #[derive(Clone, Debug)]
 pub enum SideEffectNoResult {
     Inst { inst: MInst },
 }
 
-/// Internal type ProducesFlags: defined at src/prelude.isle line 427.
+/// Internal type ProducesFlags: defined at src/prelude.isle line 418.
 #[derive(Clone, Debug)]
 pub enum ProducesFlags {
     ProducesFlagsSideEffect { inst: MInst },
@@ -176,7 +175,7 @@ pub enum ProducesFlags {
     ProducesFlagsReturnsResultWithConsumer { inst: MInst, result: Reg },
 }
 
-/// Internal type ConsumesFlags: defined at src/prelude.isle line 438.
+/// Internal type ConsumesFlags: defined at src/prelude.isle line 429.
 #[derive(Clone, Debug)]
 pub enum ConsumesFlags {
     ConsumesFlagsReturnsResultWithProducer {
@@ -234,7 +233,8 @@ pub enum MInst {
         size: OperandSize,
         signed: bool,
         divisor: GprMem,
-        dividend: Gpr,
+        dividend_lo: Gpr,
+        dividend_hi: Gpr,
         dst_quotient: WritableGpr,
         dst_remainder: WritableGpr,
     },
@@ -249,7 +249,8 @@ pub enum MInst {
     CheckedDivOrRemSeq {
         kind: DivOrRemKind,
         size: OperandSize,
-        dividend: Gpr,
+        dividend_lo: Gpr,
+        dividend_hi: Gpr,
         divisor: WritableGpr,
         dst_quotient: WritableGpr,
         dst_remainder: WritableGpr,
@@ -437,7 +438,9 @@ pub enum MInst {
         defs: VecWritableReg,
         opcode: Opcode,
     },
-    Ret,
+    Ret {
+        rets: VecReg,
+    },
     EpiloguePlaceholder,
     JmpKnown {
         dst: MachLabel,
@@ -505,16 +508,15 @@ pub enum MInst {
     MachOTlsGetAddr {
         symbol: ExternalName,
     },
-    ValueLabelMarker {
-        reg: Reg,
-        label: ValueLabel,
-    },
     Unwind {
         inst: UnwindInst,
     },
+    DummyUse {
+        reg: Reg,
+    },
 }
 
-/// Internal type ExtendKind: defined at src/isa/x64/inst.isle line 1201.
+/// Internal type ExtendKind: defined at src/isa/x64/inst.isle line 1202.
 #[derive(Copy, Clone, PartialEq, Eq, Debug)]
 pub enum ExtendKind {
     Sign,
@@ -568,7 +570,7 @@ pub fn constructor_side_effect<C: Context>(
         inst: ref pattern1_0,
     } = pattern0_0
     {
-        // Rule at src/prelude.isle line 410.
+        // Rule at src/prelude.isle line 407.
         let expr0_0 = C::emit(ctx, pattern1_0);
         let expr1_0 = C::output_none(ctx);
         return Some(expr1_0);
@@ -576,24 +578,6 @@ pub fn constructor_side_effect<C: Context>(
     return None;
 }
 
-// Generated as internal constructor for term safepoint.
-pub fn constructor_safepoint<C: Context>(
-    ctx: &mut C,
-    arg0: &SideEffectNoResult,
-) -> Option<InstOutput> {
-    let pattern0_0 = arg0;
-    if let &SideEffectNoResult::Inst {
-        inst: ref pattern1_0,
-    } = pattern0_0
-    {
-        // Rule at src/prelude.isle line 416.
-        let expr0_0 = C::emit_safepoint(ctx, pattern1_0);
-        let expr1_0 = C::output_none(ctx);
-        return Some(expr1_0);
-    }
-    return None;
-}
-
 // Generated as internal constructor for term produces_flags_get_reg.
 pub fn constructor_produces_flags_get_reg<C: Context>(
     ctx: &mut C,
@@ -605,7 +589,7 @@ pub fn constructor_produces_flags_get_reg<C: Context>(
         result: pattern1_1,
     } = pattern0_0
     {
-        // Rule at src/prelude.isle line 454.
+        // Rule at src/prelude.isle line 445.
         return Some(pattern1_1);
     }
     return None;
@@ -622,7 +606,7 @@ pub fn constructor_produces_flags_ignore<C: Context>(
             inst: ref pattern1_0,
             result: pattern1_1,
         } => {
-            // Rule at src/prelude.isle line 459.
+            // Rule at src/prelude.isle line 450.
             let expr0_0 = ProducesFlags::ProducesFlagsSideEffect {
                 inst: pattern1_0.clone(),
             };
@@ -632,7 +616,7 @@ pub fn constructor_produces_flags_ignore<C: Context>(
             inst: ref pattern1_0,
             result: pattern1_1,
         } => {
-            // Rule at src/prelude.isle line 461.
+            // Rule at src/prelude.isle line 452.
             let expr0_0 = ProducesFlags::ProducesFlagsSideEffect {
                 inst: pattern1_0.clone(),
             };
@@ -661,7 +645,7 @@ pub fn constructor_consumes_flags_concat<C: Context>(
             result: pattern3_1,
         } = pattern2_0
         {
-            // Rule at src/prelude.isle line 468.
+            // Rule at src/prelude.isle line 459.
             let expr0_0 = C::value_regs(ctx, pattern1_1, pattern3_1);
             let expr1_0 = ConsumesFlags::ConsumesFlagsTwiceReturnsValueRegs {
                 inst1: pattern1_0.clone(),
@@ -691,7 +675,7 @@ pub fn constructor_with_flags<C: Context>(
                     inst: ref pattern3_0,
                     result: pattern3_1,
                 } => {
-                    // Rule at src/prelude.isle line 493.
+                    // Rule at src/prelude.isle line 484.
                     let expr0_0 = C::emit(ctx, pattern1_0);
                     let expr1_0 = C::emit(ctx, pattern3_0);
                     let expr2_0 = C::value_reg(ctx, pattern3_1);
@@ -702,7 +686,7 @@ pub fn constructor_with_flags<C: Context>(
                     inst2: ref pattern3_1,
                     result: pattern3_2,
                 } => {
-                    // Rule at src/prelude.isle line 499.
+                    // Rule at src/prelude.isle line 490.
                     let expr0_0 = C::emit(ctx, pattern1_0);
                     let expr1_0 = C::emit(ctx, pattern3_0);
                     let expr2_0 = C::emit(ctx, pattern3_1);
@@ -715,7 +699,7 @@ pub fn constructor_with_flags<C: Context>(
                     inst4: ref pattern3_3,
                     result: pattern3_4,
                 } => {
-                    // Rule at src/prelude.isle line 511.
+                    // Rule at src/prelude.isle line 502.
                     let expr0_0 = C::emit(ctx, pattern1_0);
                     let expr1_0 = C::emit(ctx, pattern3_0);
                     let expr2_0 = C::emit(ctx, pattern3_1);
@@ -736,7 +720,7 @@ pub fn constructor_with_flags<C: Context>(
                 result: pattern3_1,
             } = pattern2_0
             {
-                // Rule at src/prelude.isle line 487.
+                // Rule at src/prelude.isle line 478.
                 let expr0_0 = C::emit(ctx, pattern1_0);
                 let expr1_0 = C::emit(ctx, pattern3_0);
                 let expr2_0 = C::value_regs(ctx, pattern1_1, pattern3_1);
@@ -756,7 +740,7 @@ pub fn constructor_with_flags_reg<C: Context>(
 ) -> Option<Reg> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/prelude.isle line 528.
+    // Rule at src/prelude.isle line 519.
     let expr0_0 = constructor_with_flags(ctx, pattern0_0, pattern1_0)?;
     let expr1_0: usize = 0;
     let expr2_0 = C::value_regs_get(ctx, expr0_0, expr1_0);
@@ -768,22 +752,22 @@ pub fn constructor_operand_size_bits<C: Context>(ctx: &mut C, arg0: &OperandSize
     let pattern0_0 = arg0;
     match pattern0_0 {
         &OperandSize::Size8 => {
-            // Rule at src/isa/x64/inst.isle line 510.
+            // Rule at src/isa/x64/inst.isle line 511.
             let expr0_0: u16 = 8;
             return Some(expr0_0);
         }
         &OperandSize::Size16 => {
-            // Rule at src/isa/x64/inst.isle line 511.
+            // Rule at src/isa/x64/inst.isle line 512.
             let expr0_0: u16 = 16;
             return Some(expr0_0);
         }
         &OperandSize::Size32 => {
-            // Rule at src/isa/x64/inst.isle line 512.
+            // Rule at src/isa/x64/inst.isle line 513.
             let expr0_0: u16 = 32;
             return Some(expr0_0);
         }
         &OperandSize::Size64 => {
-            // Rule at src/isa/x64/inst.isle line 513.
+            // Rule at src/isa/x64/inst.isle line 514.
             let expr0_0: u16 = 64;
             return Some(expr0_0);
         }
@@ -802,7 +786,7 @@ pub fn constructor_amode_imm_reg_flags<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/x64/inst.isle line 770.
+    // Rule at src/isa/x64/inst.isle line 771.
     let expr0_0 = C::amode_imm_reg(ctx, pattern0_0, pattern1_0);
     let expr1_0 = C::amode_with_flags(ctx, &expr0_0, pattern2_0);
     return Some(expr1_0);
@@ -822,7 +806,7 @@ pub fn constructor_amode_imm_reg_reg_shift_flags<C: Context>(
     let pattern2_0 = arg2;
     let pattern3_0 = arg3;
     let pattern4_0 = arg4;
-    // Rule at src/isa/x64/inst.isle line 777.
+    // Rule at src/isa/x64/inst.isle line 778.
     let expr0_0 = C::amode_imm_reg_reg_shift(ctx, pattern0_0, pattern1_0, pattern2_0, pattern3_0);
     let expr1_0 = C::amode_with_flags(ctx, &expr0_0, pattern4_0);
     return Some(expr1_0);
@@ -870,7 +854,7 @@ pub fn constructor_to_amode<C: Context>(
                                             pattern12_0,
                                             pattern13_0,
                                         ) {
-                                            // Rule at src/isa/x64/inst.isle line 824.
+                                            // Rule at src/isa/x64/inst.isle line 825.
                                             let expr0_0 = constructor_put_in_gpr(ctx, pattern6_1)?;
                                             let expr1_0 = constructor_amode_imm_reg_flags(
                                                 ctx,
@@ -894,7 +878,7 @@ pub fn constructor_to_amode<C: Context>(
                                 if let Some(pattern12_0) = C::const_shift_lt_eq_3(ctx, pattern11_1)
                                 {
                                     let pattern13_0 = arg2;
-                                    // Rule at src/isa/x64/inst.isle line 816.
+                                    // Rule at src/isa/x64/inst.isle line 817.
                                     let expr0_0 = C::offset32_to_u32(ctx, pattern13_0);
                                     let expr1_0 = constructor_put_in_gpr(ctx, pattern6_1)?;
                                     let expr2_0 = constructor_put_in_gpr(ctx, pattern11_0)?;
@@ -946,7 +930,7 @@ pub fn constructor_to_amode<C: Context>(
                                                             pattern19_0,
                                                         )
                                                     {
-                                                        // Rule at src/isa/x64/inst.isle line 829.
+                                                        // Rule at src/isa/x64/inst.isle line 830.
                                                         let expr0_0 = constructor_put_in_gpr(
                                                             ctx, pattern6_1,
                                                         )?;
@@ -992,7 +976,7 @@ pub fn constructor_to_amode<C: Context>(
                                             pattern12_0,
                                             pattern13_0,
                                         ) {
-                                            // Rule at src/isa/x64/inst.isle line 826.
+                                            // Rule at src/isa/x64/inst.isle line 827.
                                             let expr0_0 = constructor_put_in_gpr(ctx, pattern6_0)?;
                                             let expr1_0 = constructor_amode_imm_reg_flags(
                                                 ctx,
@@ -1016,7 +1000,7 @@ pub fn constructor_to_amode<C: Context>(
                                 if let Some(pattern12_0) = C::const_shift_lt_eq_3(ctx, pattern11_1)
                                 {
                                     let pattern13_0 = arg2;
-                                    // Rule at src/isa/x64/inst.isle line 818.
+                                    // Rule at src/isa/x64/inst.isle line 819.
                                     let expr0_0 = C::offset32_to_u32(ctx, pattern13_0);
                                     let expr1_0 = constructor_put_in_gpr(ctx, pattern6_0)?;
                                     let expr2_0 = constructor_put_in_gpr(ctx, pattern11_0)?;
@@ -1068,7 +1052,7 @@ pub fn constructor_to_amode<C: Context>(
                                                             pattern19_0,
                                                         )
                                                     {
-                                                        // Rule at src/isa/x64/inst.isle line 831.
+                                                        // Rule at src/isa/x64/inst.isle line 832.
                                                         let expr0_0 = constructor_put_in_gpr(
                                                             ctx, pattern6_0,
                                                         )?;
@@ -1091,7 +1075,7 @@ pub fn constructor_to_amode<C: Context>(
                     }
                 }
                 let pattern7_0 = arg2;
-                // Rule at src/isa/x64/inst.isle line 834.
+                // Rule at src/isa/x64/inst.isle line 835.
                 let expr0_0 = C::offset32_to_u32(ctx, pattern7_0);
                 let expr1_0 = constructor_put_in_gpr(ctx, pattern6_0)?;
                 let expr2_0 = constructor_put_in_gpr(ctx, pattern6_1)?;
@@ -1104,7 +1088,7 @@ pub fn constructor_to_amode<C: Context>(
         }
     }
     let pattern2_0 = arg2;
-    // Rule at src/isa/x64/inst.isle line 837.
+    // Rule at src/isa/x64/inst.isle line 838.
     let expr0_0 = C::offset32_to_u32(ctx, pattern2_0);
     let expr1_0 = constructor_put_in_gpr(ctx, pattern1_0)?;
     let expr2_0 = constructor_amode_imm_reg_flags(ctx, expr0_0, expr1_0, pattern0_0)?;
@@ -1114,7 +1098,7 @@ pub fn constructor_to_amode<C: Context>(
 // Generated as internal constructor for term reg_to_gpr_mem_imm.
 pub fn constructor_reg_to_gpr_mem_imm<C: Context>(ctx: &mut C, arg0: Reg) -> Option<GprMemImm> {
     let pattern0_0 = arg0;
-    // Rule at src/isa/x64/inst.isle line 1031.
+    // Rule at src/isa/x64/inst.isle line 1032.
     let expr0_0 = C::gpr_new(ctx, pattern0_0);
     let expr1_0 = C::gpr_to_gpr_mem_imm(ctx, expr0_0);
     return Some(expr1_0);
@@ -1123,7 +1107,7 @@ pub fn constructor_reg_to_gpr_mem_imm<C: Context>(ctx: &mut C, arg0: Reg) -> Opt
 // Generated as internal constructor for term put_in_gpr.
 pub fn constructor_put_in_gpr<C: Context>(ctx: &mut C, arg0: Value) -> Option<Gpr> {
     let pattern0_0 = arg0;
-    // Rule at src/isa/x64/inst.isle line 1038.
+    // Rule at src/isa/x64/inst.isle line 1039.
     let expr0_0 = C::put_in_reg(ctx, pattern0_0);
     let expr1_0 = C::gpr_new(ctx, expr0_0);
     return Some(expr1_0);
@@ -1132,7 +1116,7 @@ pub fn constructor_put_in_gpr<C: Context>(ctx: &mut C, arg0: Value) -> Option<Gp
 // Generated as internal constructor for term put_in_gpr_mem.
 pub fn constructor_put_in_gpr_mem<C: Context>(ctx: &mut C, arg0: Value) -> Option<GprMem> {
     let pattern0_0 = arg0;
-    // Rule at src/isa/x64/inst.isle line 1045.
+    // Rule at src/isa/x64/inst.isle line 1046.
     let expr0_0 = C::put_in_reg_mem(ctx, pattern0_0);
     let expr1_0 = C::reg_mem_to_gpr_mem(ctx, &expr0_0);
     return Some(expr1_0);
@@ -1141,7 +1125,7 @@ pub fn constructor_put_in_gpr_mem<C: Context>(ctx: &mut C, arg0: Value) -> Optio
 // Generated as internal constructor for term put_in_gpr_mem_imm.
 pub fn constructor_put_in_gpr_mem_imm<C: Context>(ctx: &mut C, arg0: Value) -> Option<GprMemImm> {
     let pattern0_0 = arg0;
-    // Rule at src/isa/x64/inst.isle line 1052.
+    // Rule at src/isa/x64/inst.isle line 1053.
     let expr0_0 = C::put_in_reg_mem_imm(ctx, pattern0_0);
     let expr1_0 = C::gpr_mem_imm_new(ctx, &expr0_0);
     return Some(expr1_0);
@@ -1150,7 +1134,7 @@ pub fn constructor_put_in_gpr_mem_imm<C: Context>(ctx: &mut C, arg0: Value) -> O
 // Generated as internal constructor for term put_in_xmm.
 pub fn constructor_put_in_xmm<C: Context>(ctx: &mut C, arg0: Value) -> Option<Xmm> {
     let pattern0_0 = arg0;
-    // Rule at src/isa/x64/inst.isle line 1059.
+    // Rule at src/isa/x64/inst.isle line 1060.
     let expr0_0 = C::put_in_reg(ctx, pattern0_0);
     let expr1_0 = C::xmm_new(ctx, expr0_0);
     return Some(expr1_0);
@@ -1159,7 +1143,7 @@ pub fn constructor_put_in_xmm<C: Context>(ctx: &mut C, arg0: Value) -> Option<Xm
 // Generated as internal constructor for term put_in_xmm_mem.
 pub fn constructor_put_in_xmm_mem<C: Context>(ctx: &mut C, arg0: Value) -> Option<XmmMem> {
     let pattern0_0 = arg0;
-    // Rule at src/isa/x64/inst.isle line 1066.
+    // Rule at src/isa/x64/inst.isle line 1067.
     let expr0_0 = C::put_in_reg_mem(ctx, pattern0_0);
     let expr1_0 = C::reg_mem_to_xmm_mem(ctx, &expr0_0);
     return Some(expr1_0);
@@ -1168,7 +1152,7 @@ pub fn constructor_put_in_xmm_mem<C: Context>(ctx: &mut C, arg0: Value) -> Optio
 // Generated as internal constructor for term put_in_xmm_mem_imm.
 pub fn constructor_put_in_xmm_mem_imm<C: Context>(ctx: &mut C, arg0: Value) -> Option<XmmMemImm> {
     let pattern0_0 = arg0;
-    // Rule at src/isa/x64/inst.isle line 1073.
+    // Rule at src/isa/x64/inst.isle line 1074.
     let expr0_0 = C::put_in_reg_mem_imm(ctx, pattern0_0);
     let expr1_0 = C::xmm_mem_imm_new(ctx, &expr0_0);
     return Some(expr1_0);
@@ -1177,7 +1161,7 @@ pub fn constructor_put_in_xmm_mem_imm<C: Context>(ctx: &mut C, arg0: Value) -> O
 // Generated as internal constructor for term output_gpr.
 pub fn constructor_output_gpr<C: Context>(ctx: &mut C, arg0: Gpr) -> Option<InstOutput> {
     let pattern0_0 = arg0;
-    // Rule at src/isa/x64/inst.isle line 1078.
+    // Rule at src/isa/x64/inst.isle line 1079.
     let expr0_0 = C::gpr_to_reg(ctx, pattern0_0);
     let expr1_0 = constructor_output_reg(ctx, expr0_0)?;
     return Some(expr1_0);
@@ -1187,7 +1171,7 @@ pub fn constructor_output_gpr<C: Context>(ctx: &mut C, arg0: Gpr) -> Option<Inst
 pub fn constructor_value_gprs<C: Context>(ctx: &mut C, arg0: Gpr, arg1: Gpr) -> Option<ValueRegs> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 1083.
+    // Rule at src/isa/x64/inst.isle line 1084.
     let expr0_0 = C::gpr_to_reg(ctx, pattern0_0);
     let expr1_0 = C::gpr_to_reg(ctx, pattern1_0);
     let expr2_0 = C::value_regs(ctx, expr0_0, expr1_0);
@@ -1197,7 +1181,7 @@ pub fn constructor_value_gprs<C: Context>(ctx: &mut C, arg0: Gpr, arg1: Gpr) ->
 // Generated as internal constructor for term output_xmm.
 pub fn constructor_output_xmm<C: Context>(ctx: &mut C, arg0: Xmm) -> Option<InstOutput> {
     let pattern0_0 = arg0;
-    // Rule at src/isa/x64/inst.isle line 1088.
+    // Rule at src/isa/x64/inst.isle line 1089.
     let expr0_0 = C::xmm_to_reg(ctx, pattern0_0);
     let expr1_0 = constructor_output_reg(ctx, expr0_0)?;
     return Some(expr1_0);
@@ -1211,7 +1195,7 @@ pub fn constructor_value_regs_get_gpr<C: Context>(
 ) -> Option<Gpr> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 1095.
+    // Rule at src/isa/x64/inst.isle line 1096.
     let expr0_0 = C::value_regs_get(ctx, pattern0_0, pattern1_0);
     let expr1_0 = C::gpr_new(ctx, expr0_0);
     return Some(expr1_0);
@@ -1220,7 +1204,7 @@ pub fn constructor_value_regs_get_gpr<C: Context>(
 // Generated as internal constructor for term lo_gpr.
 pub fn constructor_lo_gpr<C: Context>(ctx: &mut C, arg0: Value) -> Option<Gpr> {
     let pattern0_0 = arg0;
-    // Rule at src/isa/x64/inst.isle line 1108.
+    // Rule at src/isa/x64/inst.isle line 1109.
     let expr0_0 = constructor_lo_reg(ctx, pattern0_0)?;
     let expr1_0 = C::gpr_new(ctx, expr0_0);
     return Some(expr1_0);
@@ -1232,7 +1216,7 @@ pub fn constructor_sink_load_to_gpr_mem_imm<C: Context>(
     arg0: &SinkableLoad,
 ) -> Option<GprMemImm> {
     let pattern0_0 = arg0;
-    // Rule at src/isa/x64/inst.isle line 1191.
+    // Rule at src/isa/x64/inst.isle line 1192.
     let expr0_0 = C::sink_load(ctx, pattern0_0);
     let expr1_0 = C::gpr_mem_imm_new(ctx, &expr0_0);
     return Some(expr1_0);
@@ -1250,12 +1234,12 @@ pub fn constructor_extend_to_gpr<C: Context>(
     let pattern2_0 = arg1;
     if pattern2_0 == pattern1_0 {
         let pattern4_0 = arg2;
-        // Rule at src/isa/x64/inst.isle line 1213.
+        // Rule at src/isa/x64/inst.isle line 1214.
         let expr0_0 = constructor_put_in_gpr(ctx, pattern0_0)?;
         return Some(expr0_0);
     }
     let pattern3_0 = arg2;
-    // Rule at src/isa/x64/inst.isle line 1216.
+    // Rule at src/isa/x64/inst.isle line 1217.
     let expr0_0 = C::ty_bits_u16(ctx, pattern1_0);
     let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern2_0);
     let expr2_0 = constructor_operand_size_bits(ctx, &expr1_0)?;
@@ -1279,7 +1263,7 @@ pub fn constructor_extend<C: Context>(
             let pattern2_0 = arg1;
             let pattern3_0 = arg2;
             let pattern4_0 = arg3;
-            // Rule at src/isa/x64/inst.isle line 1236.
+            // Rule at src/isa/x64/inst.isle line 1237.
             let expr0_0 = constructor_x64_movsx(ctx, pattern3_0, pattern4_0)?;
             return Some(expr0_0);
         }
@@ -1287,7 +1271,7 @@ pub fn constructor_extend<C: Context>(
             let pattern2_0 = arg1;
             let pattern3_0 = arg2;
             let pattern4_0 = arg3;
-            // Rule at src/isa/x64/inst.isle line 1232.
+            // Rule at src/isa/x64/inst.isle line 1233.
             let expr0_0 = constructor_x64_movzx(ctx, pattern3_0, pattern4_0)?;
             return Some(expr0_0);
         }
@@ -1300,17 +1284,17 @@ pub fn constructor_extend<C: Context>(
 pub fn constructor_sse_xor_op<C: Context>(ctx: &mut C, arg0: Type) -> Option<SseOpcode> {
     let pattern0_0 = arg0;
     if pattern0_0 == F32X4 {
-        // Rule at src/isa/x64/inst.isle line 1243.
+        // Rule at src/isa/x64/inst.isle line 1244.
         let expr0_0 = SseOpcode::Xorps;
         return Some(expr0_0);
     }
     if pattern0_0 == F64X2 {
-        // Rule at src/isa/x64/inst.isle line 1244.
+        // Rule at src/isa/x64/inst.isle line 1245.
         let expr0_0 = SseOpcode::Xorpd;
         return Some(expr0_0);
     }
     if let Some((pattern1_0, pattern1_1)) = C::multi_lane(ctx, pattern0_0) {
-        // Rule at src/isa/x64/inst.isle line 1245.
+        // Rule at src/isa/x64/inst.isle line 1246.
         let expr0_0 = SseOpcode::Pxor;
         return Some(expr0_0);
     }
@@ -1327,7 +1311,7 @@ pub fn constructor_sse_xor<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/x64/inst.isle line 1249.
+    // Rule at src/isa/x64/inst.isle line 1250.
     let expr0_0 = constructor_sse_xor_op(ctx, pattern0_0)?;
     let expr1_0 = constructor_xmm_rm_r(ctx, pattern0_0, &expr0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -1337,40 +1321,40 @@ pub fn constructor_sse_xor<C: Context>(
 pub fn constructor_sse_cmp_op<C: Context>(ctx: &mut C, arg0: Type) -> Option<SseOpcode> {
     let pattern0_0 = arg0;
     if pattern0_0 == F32X4 {
-        // Rule at src/isa/x64/inst.isle line 1258.
+        // Rule at src/isa/x64/inst.isle line 1259.
         let expr0_0 = SseOpcode::Cmpps;
         return Some(expr0_0);
     }
     if pattern0_0 == F64X2 {
-        // Rule at src/isa/x64/inst.isle line 1259.
+        // Rule at src/isa/x64/inst.isle line 1260.
         let expr0_0 = SseOpcode::Cmppd;
         return Some(expr0_0);
     }
     if let Some((pattern1_0, pattern1_1)) = C::multi_lane(ctx, pattern0_0) {
         if pattern1_0 == 8 {
             if pattern1_1 == 16 {
-                // Rule at src/isa/x64/inst.isle line 1254.
+                // Rule at src/isa/x64/inst.isle line 1255.
                 let expr0_0 = SseOpcode::Pcmpeqb;
                 return Some(expr0_0);
             }
         }
         if pattern1_0 == 16 {
             if pattern1_1 == 8 {
-                // Rule at src/isa/x64/inst.isle line 1255.
+                // Rule at src/isa/x64/inst.isle line 1256.
                 let expr0_0 = SseOpcode::Pcmpeqw;
                 return Some(expr0_0);
             }
         }
         if pattern1_0 == 32 {
             if pattern1_1 == 4 {
-                // Rule at src/isa/x64/inst.isle line 1256.
+                // Rule at src/isa/x64/inst.isle line 1257.
                 let expr0_0 = SseOpcode::Pcmpeqd;
                 return Some(expr0_0);
             }
         }
         if pattern1_0 == 64 {
             if pattern1_1 == 2 {
-                // Rule at src/isa/x64/inst.isle line 1257.
+                // Rule at src/isa/x64/inst.isle line 1258.
                 let expr0_0 = SseOpcode::Pcmpeqq;
                 return Some(expr0_0);
             }
@@ -1382,7 +1366,7 @@ pub fn constructor_sse_cmp_op<C: Context>(ctx: &mut C, arg0: Type) -> Option<Sse
 // Generated as internal constructor for term vector_all_ones.
 pub fn constructor_vector_all_ones<C: Context>(ctx: &mut C, arg0: Type) -> Option<Xmm> {
     let pattern0_0 = arg0;
-    // Rule at src/isa/x64/inst.isle line 1273.
+    // Rule at src/isa/x64/inst.isle line 1274.
     let expr0_0 = C::temp_writable_xmm(ctx);
     let expr1_0: Type = I32X4;
     let expr2_0 = constructor_sse_cmp_op(ctx, expr1_0)?;
@@ -1407,7 +1391,7 @@ pub fn constructor_make_i64x2_from_lanes<C: Context>(
 ) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 1283.
+    // Rule at src/isa/x64/inst.isle line 1284.
     let expr0_0 = C::temp_writable_xmm(ctx);
     let expr1_0 = C::writable_xmm_to_reg(ctx, expr0_0);
     let expr2_0 = MInst::XmmUninitializedValue { dst: expr0_0 };
@@ -1449,12 +1433,12 @@ pub fn constructor_mov_rmi_to_xmm<C: Context>(ctx: &mut C, arg0: &RegMemImm) ->
     let pattern0_0 = arg0;
     match pattern0_0 {
         &RegMemImm::Imm { simm32: pattern1_0 } => {
-            // Rule at src/isa/x64/inst.isle line 1304.
+            // Rule at src/isa/x64/inst.isle line 1305.
             let expr0_0 = C::xmm_mem_imm_new(ctx, pattern0_0);
             return Some(expr0_0);
         }
         &RegMemImm::Reg { reg: pattern1_0 } => {
-            // Rule at src/isa/x64/inst.isle line 1305.
+            // Rule at src/isa/x64/inst.isle line 1306.
             let expr0_0 = SseOpcode::Movd;
             let expr1_0 = C::reg_to_gpr_mem(ctx, pattern1_0);
             let expr2_0 = OperandSize::Size32;
@@ -1465,7 +1449,7 @@ pub fn constructor_mov_rmi_to_xmm<C: Context>(ctx: &mut C, arg0: &RegMemImm) ->
         &RegMemImm::Mem {
             addr: ref pattern1_0,
         } => {
-            // Rule at src/isa/x64/inst.isle line 1303.
+            // Rule at src/isa/x64/inst.isle line 1304.
             let expr0_0 = C::xmm_mem_imm_new(ctx, pattern0_0);
             return Some(expr0_0);
         }
@@ -1485,7 +1469,7 @@ pub fn constructor_x64_load<C: Context>(
     if pattern0_0 == I64 {
         let pattern2_0 = arg1;
         let pattern3_0 = arg2;
-        // Rule at src/isa/x64/inst.isle line 1319.
+        // Rule at src/isa/x64/inst.isle line 1320.
         let expr0_0 = C::temp_writable_gpr(ctx);
         let expr1_0 = MInst::Mov64MR {
             src: pattern2_0.clone(),
@@ -1498,7 +1482,7 @@ pub fn constructor_x64_load<C: Context>(
     if pattern0_0 == F32 {
         let pattern2_0 = arg1;
         let pattern3_0 = arg2;
-        // Rule at src/isa/x64/inst.isle line 1324.
+        // Rule at src/isa/x64/inst.isle line 1325.
         let expr0_0 = SseOpcode::Movss;
         let expr1_0 = constructor_synthetic_amode_to_xmm_mem(ctx, pattern2_0)?;
         let expr2_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, &expr1_0)?;
@@ -1508,7 +1492,7 @@ pub fn constructor_x64_load<C: Context>(
     if pattern0_0 == F64 {
         let pattern2_0 = arg1;
         let pattern3_0 = arg2;
-        // Rule at src/isa/x64/inst.isle line 1328.
+        // Rule at src/isa/x64/inst.isle line 1329.
         let expr0_0 = SseOpcode::Movsd;
         let expr1_0 = constructor_synthetic_amode_to_xmm_mem(ctx, pattern2_0)?;
         let expr2_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, &expr1_0)?;
@@ -1518,7 +1502,7 @@ pub fn constructor_x64_load<C: Context>(
     if pattern0_0 == F32X4 {
         let pattern2_0 = arg1;
         let pattern3_0 = arg2;
-        // Rule at src/isa/x64/inst.isle line 1332.
+        // Rule at src/isa/x64/inst.isle line 1333.
         let expr0_0 = SseOpcode::Movups;
         let expr1_0 = constructor_synthetic_amode_to_xmm_mem(ctx, pattern2_0)?;
         let expr2_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, &expr1_0)?;
@@ -1528,7 +1512,7 @@ pub fn constructor_x64_load<C: Context>(
     if pattern0_0 == F64X2 {
         let pattern2_0 = arg1;
         let pattern3_0 = arg2;
-        // Rule at src/isa/x64/inst.isle line 1336.
+        // Rule at src/isa/x64/inst.isle line 1337.
         let expr0_0 = SseOpcode::Movupd;
         let expr1_0 = constructor_synthetic_amode_to_xmm_mem(ctx, pattern2_0)?;
         let expr2_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, &expr1_0)?;
@@ -1538,7 +1522,7 @@ pub fn constructor_x64_load<C: Context>(
     if let Some((pattern1_0, pattern1_1)) = C::multi_lane(ctx, pattern0_0) {
         let pattern2_0 = arg1;
         let pattern3_0 = arg2;
-        // Rule at src/isa/x64/inst.isle line 1340.
+        // Rule at src/isa/x64/inst.isle line 1341.
         let expr0_0 = SseOpcode::Movdqu;
         let expr1_0 = constructor_synthetic_amode_to_xmm_mem(ctx, pattern2_0)?;
         let expr2_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, &expr1_0)?;
@@ -1549,7 +1533,7 @@ pub fn constructor_x64_load<C: Context>(
         let pattern2_0 = arg1;
         let pattern3_0 = arg2;
         if let &ExtKind::SignExtend = pattern3_0 {
-            // Rule at src/isa/x64/inst.isle line 1315.
+            // Rule at src/isa/x64/inst.isle line 1316.
             let expr0_0 = C::ty_bytes(ctx, pattern1_0);
             let expr1_0: u16 = 8;
             let expr2_0 = C::ext_mode(ctx, expr0_0, expr1_0);
@@ -1565,7 +1549,7 @@ pub fn constructor_x64_load<C: Context>(
 // Generated as internal constructor for term x64_mov.
 pub fn constructor_x64_mov<C: Context>(ctx: &mut C, arg0: &Amode) -> Option<Reg> {
     let pattern0_0 = arg0;
-    // Rule at src/isa/x64/inst.isle line 1345.
+    // Rule at src/isa/x64/inst.isle line 1346.
     let expr0_0 = C::temp_writable_gpr(ctx);
     let expr1_0 = C::amode_to_synthetic_amode(ctx, pattern0_0);
     let expr2_0 = MInst::Mov64MR {
@@ -1585,7 +1569,7 @@ pub fn constructor_x64_movzx<C: Context>(
 ) -> Option<Gpr> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 1351.
+    // Rule at src/isa/x64/inst.isle line 1352.
     let expr0_0 = C::temp_writable_gpr(ctx);
     let expr1_0 = MInst::MovzxRmR {
         ext_mode: pattern0_0.clone(),
@@ -1605,7 +1589,7 @@ pub fn constructor_x64_movsx<C: Context>(
 ) -> Option<Gpr> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 1357.
+    // Rule at src/isa/x64/inst.isle line 1358.
     let expr0_0 = C::temp_writable_gpr(ctx);
     let expr1_0 = MInst::MovsxRmR {
         ext_mode: pattern0_0.clone(),
@@ -1620,7 +1604,7 @@ pub fn constructor_x64_movsx<C: Context>(
 // Generated as internal constructor for term x64_movss_load.
 pub fn constructor_x64_movss_load<C: Context>(ctx: &mut C, arg0: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
-    // Rule at src/isa/x64/inst.isle line 1363.
+    // Rule at src/isa/x64/inst.isle line 1364.
     let expr0_0 = SseOpcode::Movss;
     let expr1_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, pattern0_0)?;
     return Some(expr1_0);
@@ -1629,7 +1613,7 @@ pub fn constructor_x64_movss_load<C: Context>(ctx: &mut C, arg0: &XmmMem) -> Opt
 // Generated as internal constructor for term x64_movsd_load.
 pub fn constructor_x64_movsd_load<C: Context>(ctx: &mut C, arg0: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
-    // Rule at src/isa/x64/inst.isle line 1367.
+    // Rule at src/isa/x64/inst.isle line 1368.
     let expr0_0 = SseOpcode::Movsd;
     let expr1_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, pattern0_0)?;
     return Some(expr1_0);
@@ -1638,7 +1622,7 @@ pub fn constructor_x64_movsd_load<C: Context>(ctx: &mut C, arg0: &XmmMem) -> Opt
 // Generated as internal constructor for term x64_movups.
 pub fn constructor_x64_movups<C: Context>(ctx: &mut C, arg0: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
-    // Rule at src/isa/x64/inst.isle line 1371.
+    // Rule at src/isa/x64/inst.isle line 1372.
     let expr0_0 = SseOpcode::Movups;
     let expr1_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, pattern0_0)?;
     return Some(expr1_0);
@@ -1647,7 +1631,7 @@ pub fn constructor_x64_movups<C: Context>(ctx: &mut C, arg0: &XmmMem) -> Option<
 // Generated as internal constructor for term x64_movupd.
 pub fn constructor_x64_movupd<C: Context>(ctx: &mut C, arg0: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
-    // Rule at src/isa/x64/inst.isle line 1375.
+    // Rule at src/isa/x64/inst.isle line 1376.
     let expr0_0 = SseOpcode::Movupd;
     let expr1_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, pattern0_0)?;
     return Some(expr1_0);
@@ -1656,7 +1640,7 @@ pub fn constructor_x64_movupd<C: Context>(ctx: &mut C, arg0: &XmmMem) -> Option<
 // Generated as internal constructor for term x64_movdqu.
 pub fn constructor_x64_movdqu<C: Context>(ctx: &mut C, arg0: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
-    // Rule at src/isa/x64/inst.isle line 1379.
+    // Rule at src/isa/x64/inst.isle line 1380.
     let expr0_0 = SseOpcode::Movdqu;
     let expr1_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, pattern0_0)?;
     return Some(expr1_0);
@@ -1665,7 +1649,7 @@ pub fn constructor_x64_movdqu<C: Context>(ctx: &mut C, arg0: &XmmMem) -> Option<
 // Generated as internal constructor for term x64_pmovsxbw.
 pub fn constructor_x64_pmovsxbw<C: Context>(ctx: &mut C, arg0: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
-    // Rule at src/isa/x64/inst.isle line 1383.
+    // Rule at src/isa/x64/inst.isle line 1384.
     let expr0_0 = SseOpcode::Pmovsxbw;
     let expr1_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, pattern0_0)?;
     return Some(expr1_0);
@@ -1674,7 +1658,7 @@ pub fn constructor_x64_pmovsxbw<C: Context>(ctx: &mut C, arg0: &XmmMem) -> Optio
 // Generated as internal constructor for term x64_pmovzxbw.
 pub fn constructor_x64_pmovzxbw<C: Context>(ctx: &mut C, arg0: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
-    // Rule at src/isa/x64/inst.isle line 1387.
+    // Rule at src/isa/x64/inst.isle line 1388.
     let expr0_0 = SseOpcode::Pmovzxbw;
     let expr1_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, pattern0_0)?;
     return Some(expr1_0);
@@ -1683,7 +1667,7 @@ pub fn constructor_x64_pmovzxbw<C: Context>(ctx: &mut C, arg0: &XmmMem) -> Optio
 // Generated as internal constructor for term x64_pmovsxwd.
 pub fn constructor_x64_pmovsxwd<C: Context>(ctx: &mut C, arg0: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
-    // Rule at src/isa/x64/inst.isle line 1391.
+    // Rule at src/isa/x64/inst.isle line 1392.
     let expr0_0 = SseOpcode::Pmovsxwd;
     let expr1_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, pattern0_0)?;
     return Some(expr1_0);
@@ -1692,7 +1676,7 @@ pub fn constructor_x64_pmovsxwd<C: Context>(ctx: &mut C, arg0: &XmmMem) -> Optio
 // Generated as internal constructor for term x64_pmovzxwd.
 pub fn constructor_x64_pmovzxwd<C: Context>(ctx: &mut C, arg0: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
-    // Rule at src/isa/x64/inst.isle line 1395.
+    // Rule at src/isa/x64/inst.isle line 1396.
     let expr0_0 = SseOpcode::Pmovzxwd;
     let expr1_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, pattern0_0)?;
     return Some(expr1_0);
@@ -1701,7 +1685,7 @@ pub fn constructor_x64_pmovzxwd<C: Context>(ctx: &mut C, arg0: &XmmMem) -> Optio
 // Generated as internal constructor for term x64_pmovsxdq.
 pub fn constructor_x64_pmovsxdq<C: Context>(ctx: &mut C, arg0: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
-    // Rule at src/isa/x64/inst.isle line 1399.
+    // Rule at src/isa/x64/inst.isle line 1400.
     let expr0_0 = SseOpcode::Pmovsxdq;
     let expr1_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, pattern0_0)?;
     return Some(expr1_0);
@@ -1710,7 +1694,7 @@ pub fn constructor_x64_pmovsxdq<C: Context>(ctx: &mut C, arg0: &XmmMem) -> Optio
 // Generated as internal constructor for term x64_pmovzxdq.
 pub fn constructor_x64_pmovzxdq<C: Context>(ctx: &mut C, arg0: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
-    // Rule at src/isa/x64/inst.isle line 1403.
+    // Rule at src/isa/x64/inst.isle line 1404.
     let expr0_0 = SseOpcode::Pmovzxdq;
     let expr1_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, pattern0_0)?;
     return Some(expr1_0);
@@ -1724,7 +1708,7 @@ pub fn constructor_x64_xmm_load_const<C: Context>(
 ) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 1408.
+    // Rule at src/isa/x64/inst.isle line 1409.
     let expr0_0 = C::temp_writable_xmm(ctx);
     let expr1_0 = C::writable_xmm_to_reg(ctx, expr0_0);
     let expr2_0 = MInst::XmmLoadConst {
@@ -1749,7 +1733,7 @@ pub fn constructor_alu_rmi_r<C: Context>(
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
     let pattern3_0 = arg3;
-    // Rule at src/isa/x64/inst.isle line 1421.
+    // Rule at src/isa/x64/inst.isle line 1422.
     let expr0_0 = C::temp_writable_gpr(ctx);
     let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern0_0);
     let expr2_0 = MInst::AluRmiR {
@@ -1774,7 +1758,7 @@ pub fn constructor_x64_add<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/x64/inst.isle line 1429.
+    // Rule at src/isa/x64/inst.isle line 1430.
     let expr0_0 = AluRmiROpcode::Add;
     let expr1_0 = constructor_alu_rmi_r(ctx, pattern0_0, &expr0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -1790,7 +1774,7 @@ pub fn constructor_x64_add_with_flags_paired<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/x64/inst.isle line 1437.
+    // Rule at src/isa/x64/inst.isle line 1438.
     let expr0_0 = C::temp_writable_gpr(ctx);
     let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern0_0);
     let expr2_0 = AluRmiROpcode::Add;
@@ -1819,7 +1803,7 @@ pub fn constructor_x64_adc_paired<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/x64/inst.isle line 1449.
+    // Rule at src/isa/x64/inst.isle line 1450.
     let expr0_0 = C::temp_writable_gpr(ctx);
     let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern0_0);
     let expr2_0 = AluRmiROpcode::Adc;
@@ -1848,7 +1832,7 @@ pub fn constructor_x64_sub<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/x64/inst.isle line 1461.
+    // Rule at src/isa/x64/inst.isle line 1462.
     let expr0_0 = AluRmiROpcode::Sub;
     let expr1_0 = constructor_alu_rmi_r(ctx, pattern0_0, &expr0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -1864,7 +1848,7 @@ pub fn constructor_x64_sub_with_flags_paired<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/x64/inst.isle line 1469.
+    // Rule at src/isa/x64/inst.isle line 1470.
     let expr0_0 = C::temp_writable_gpr(ctx);
     let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern0_0);
     let expr2_0 = AluRmiROpcode::Sub;
@@ -1893,7 +1877,7 @@ pub fn constructor_x64_sbb_paired<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/x64/inst.isle line 1481.
+    // Rule at src/isa/x64/inst.isle line 1482.
     let expr0_0 = C::temp_writable_gpr(ctx);
     let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern0_0);
     let expr2_0 = AluRmiROpcode::Sbb;
@@ -1922,7 +1906,7 @@ pub fn constructor_x64_mul<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/x64/inst.isle line 1493.
+    // Rule at src/isa/x64/inst.isle line 1494.
     let expr0_0 = AluRmiROpcode::Mul;
     let expr1_0 = constructor_alu_rmi_r(ctx, pattern0_0, &expr0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -1938,7 +1922,7 @@ pub fn constructor_x64_and<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/x64/inst.isle line 1501.
+    // Rule at src/isa/x64/inst.isle line 1502.
     let expr0_0 = AluRmiROpcode::And;
     let expr1_0 = constructor_alu_rmi_r(ctx, pattern0_0, &expr0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -1954,7 +1938,7 @@ pub fn constructor_x64_and_with_flags_paired<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/x64/inst.isle line 1508.
+    // Rule at src/isa/x64/inst.isle line 1509.
     let expr0_0 = C::temp_writable_gpr(ctx);
     let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern0_0);
     let expr2_0 = AluRmiROpcode::And;
@@ -1979,7 +1963,7 @@ pub fn constructor_x64_or<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/x64/inst.isle line 1519.
+    // Rule at src/isa/x64/inst.isle line 1520.
     let expr0_0 = AluRmiROpcode::Or;
     let expr1_0 = constructor_alu_rmi_r(ctx, pattern0_0, &expr0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -1995,7 +1979,7 @@ pub fn constructor_x64_xor<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/x64/inst.isle line 1527.
+    // Rule at src/isa/x64/inst.isle line 1528.
     let expr0_0 = AluRmiROpcode::Xor;
     let expr1_0 = constructor_alu_rmi_r(ctx, pattern0_0, &expr0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -2007,7 +1991,7 @@ pub fn constructor_imm<C: Context>(ctx: &mut C, arg0: Type, arg1: u64) -> Option
     if pattern0_0 == I64 {
         let pattern2_0 = arg1;
         if let Some(pattern3_0) = C::nonzero_u64_fits_in_u32(ctx, pattern2_0) {
-            // Rule at src/isa/x64/inst.isle line 1567.
+            // Rule at src/isa/x64/inst.isle line 1568.
             let expr0_0 = C::temp_writable_gpr(ctx);
             let expr1_0 = OperandSize::Size32;
             let expr2_0 = MInst::Imm {
@@ -2023,7 +2007,7 @@ pub fn constructor_imm<C: Context>(ctx: &mut C, arg0: Type, arg1: u64) -> Option
     if pattern0_0 == F32 {
         let pattern2_0 = arg1;
         if pattern2_0 == 0 {
-            // Rule at src/isa/x64/inst.isle line 1596.
+            // Rule at src/isa/x64/inst.isle line 1597.
             let expr0_0 = C::temp_writable_xmm(ctx);
             let expr1_0 = C::writable_xmm_to_xmm(ctx, expr0_0);
             let expr2_0 = SseOpcode::Xorps;
@@ -2038,7 +2022,7 @@ pub fn constructor_imm<C: Context>(ctx: &mut C, arg0: Type, arg1: u64) -> Option
             let expr6_0 = C::xmm_to_reg(ctx, expr1_0);
             return Some(expr6_0);
         }
-        // Rule at src/isa/x64/inst.isle line 1544.
+        // Rule at src/isa/x64/inst.isle line 1545.
         let expr0_0 = SseOpcode::Movd;
         let expr1_0: Type = I32;
         let expr2_0 = constructor_imm(ctx, expr1_0, pattern2_0)?;
@@ -2051,7 +2035,7 @@ pub fn constructor_imm<C: Context>(ctx: &mut C, arg0: Type, arg1: u64) -> Option
     if pattern0_0 == F64 {
         let pattern2_0 = arg1;
         if pattern2_0 == 0 {
-            // Rule at src/isa/x64/inst.isle line 1608.
+            // Rule at src/isa/x64/inst.isle line 1609.
             let expr0_0 = C::temp_writable_xmm(ctx);
             let expr1_0 = C::writable_xmm_to_xmm(ctx, expr0_0);
             let expr2_0 = SseOpcode::Xorpd;
@@ -2066,7 +2050,7 @@ pub fn constructor_imm<C: Context>(ctx: &mut C, arg0: Type, arg1: u64) -> Option
             let expr6_0 = C::xmm_to_reg(ctx, expr1_0);
             return Some(expr6_0);
         }
-        // Rule at src/isa/x64/inst.isle line 1550.
+        // Rule at src/isa/x64/inst.isle line 1551.
         let expr0_0 = SseOpcode::Movq;
         let expr1_0: Type = I64;
         let expr2_0 = constructor_imm(ctx, expr1_0, pattern2_0)?;
@@ -2079,7 +2063,7 @@ pub fn constructor_imm<C: Context>(ctx: &mut C, arg0: Type, arg1: u64) -> Option
     if let Some((pattern1_0, pattern1_1)) = C::multi_lane(ctx, pattern0_0) {
         let pattern2_0 = arg1;
         if pattern2_0 == 0 {
-            // Rule at src/isa/x64/inst.isle line 1586.
+            // Rule at src/isa/x64/inst.isle line 1587.
             let expr0_0 = C::temp_writable_xmm(ctx);
             let expr1_0 = C::writable_xmm_to_xmm(ctx, expr0_0);
             let expr2_0 = constructor_sse_xor_op(ctx, pattern0_0)?;
@@ -2098,7 +2082,7 @@ pub fn constructor_imm<C: Context>(ctx: &mut C, arg0: Type, arg1: u64) -> Option
     if let Some(pattern1_0) = C::fits_in_64(ctx, pattern0_0) {
         let pattern2_0 = arg1;
         if pattern2_0 == 0 {
-            // Rule at src/isa/x64/inst.isle line 1573.
+            // Rule at src/isa/x64/inst.isle line 1574.
             let expr0_0 = C::temp_writable_gpr(ctx);
             let expr1_0 = C::writable_gpr_to_gpr(ctx, expr0_0);
             let expr2_0 = C::operand_size_of_type_32_64(ctx, pattern1_0);
@@ -2115,7 +2099,7 @@ pub fn constructor_imm<C: Context>(ctx: &mut C, arg0: Type, arg1: u64) -> Option
             let expr7_0 = C::gpr_to_reg(ctx, expr1_0);
             return Some(expr7_0);
         }
-        // Rule at src/isa/x64/inst.isle line 1537.
+        // Rule at src/isa/x64/inst.isle line 1538.
         let expr0_0 = C::temp_writable_gpr(ctx);
         let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern1_0);
         let expr2_0 = MInst::Imm {
@@ -2134,7 +2118,7 @@ pub fn constructor_imm<C: Context>(ctx: &mut C, arg0: Type, arg1: u64) -> Option
 pub fn constructor_imm_i64<C: Context>(ctx: &mut C, arg0: Type, arg1: i64) -> Option<Reg> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 1559.
+    // Rule at src/isa/x64/inst.isle line 1560.
     let expr0_0 = C::i64_as_u64(ctx, pattern1_0);
     let expr1_0 = constructor_imm(ctx, pattern0_0, expr0_0)?;
     return Some(expr1_0);
@@ -2152,7 +2136,7 @@ pub fn constructor_shift_r<C: Context>(
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
     let pattern3_0 = arg3;
-    // Rule at src/isa/x64/inst.isle line 1621.
+    // Rule at src/isa/x64/inst.isle line 1622.
     let expr0_0 = C::temp_writable_gpr(ctx);
     let expr1_0 = C::raw_operand_size_of_type(ctx, pattern0_0);
     let expr2_0 = MInst::ShiftR {
@@ -2177,7 +2161,7 @@ pub fn constructor_x64_rotl<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/x64/inst.isle line 1631.
+    // Rule at src/isa/x64/inst.isle line 1632.
     let expr0_0 = ShiftKind::RotateLeft;
     let expr1_0 = constructor_shift_r(ctx, pattern0_0, &expr0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -2193,7 +2177,7 @@ pub fn constructor_x64_rotr<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/x64/inst.isle line 1636.
+    // Rule at src/isa/x64/inst.isle line 1637.
     let expr0_0 = ShiftKind::RotateRight;
     let expr1_0 = constructor_shift_r(ctx, pattern0_0, &expr0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -2209,7 +2193,7 @@ pub fn constructor_x64_shl<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/x64/inst.isle line 1641.
+    // Rule at src/isa/x64/inst.isle line 1642.
     let expr0_0 = ShiftKind::ShiftLeft;
     let expr1_0 = constructor_shift_r(ctx, pattern0_0, &expr0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -2225,7 +2209,7 @@ pub fn constructor_x64_shr<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/x64/inst.isle line 1646.
+    // Rule at src/isa/x64/inst.isle line 1647.
     let expr0_0 = ShiftKind::ShiftRightLogical;
     let expr1_0 = constructor_shift_r(ctx, pattern0_0, &expr0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -2241,7 +2225,7 @@ pub fn constructor_x64_sar<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/x64/inst.isle line 1651.
+    // Rule at src/isa/x64/inst.isle line 1652.
     let expr0_0 = ShiftKind::ShiftRightArithmetic;
     let expr1_0 = constructor_shift_r(ctx, pattern0_0, &expr0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -2259,7 +2243,7 @@ pub fn constructor_cmp_rmi_r<C: Context>(
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
     let pattern3_0 = arg3;
-    // Rule at src/isa/x64/inst.isle line 1656.
+    // Rule at src/isa/x64/inst.isle line 1657.
     let expr0_0 = MInst::CmpRmiR {
         size: pattern0_0.clone(),
         opcode: pattern1_0.clone(),
@@ -2280,7 +2264,7 @@ pub fn constructor_x64_cmp<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/x64/inst.isle line 1665.
+    // Rule at src/isa/x64/inst.isle line 1666.
     let expr0_0 = CmpOpcode::Cmp;
     let expr1_0 = constructor_cmp_rmi_r(ctx, pattern0_0, &expr0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -2296,7 +2280,7 @@ pub fn constructor_x64_cmp_imm<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/x64/inst.isle line 1670.
+    // Rule at src/isa/x64/inst.isle line 1671.
     let expr0_0 = CmpOpcode::Cmp;
     let expr1_0 = RegMemImm::Imm { simm32: pattern1_0 };
     let expr2_0 = C::gpr_mem_imm_new(ctx, &expr1_0);
@@ -2314,7 +2298,7 @@ pub fn constructor_xmm_cmp_rm_r<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/x64/inst.isle line 1675.
+    // Rule at src/isa/x64/inst.isle line 1676.
     let expr0_0 = MInst::XmmCmpRmR {
         op: pattern0_0.clone(),
         src: pattern1_0.clone(),
@@ -2334,7 +2318,7 @@ pub fn constructor_x64_ucomis<C: Context>(
     let pattern1_0 = C::value_type(ctx, pattern0_0);
     if pattern1_0 == F32 {
         let pattern3_0 = arg1;
-        // Rule at src/isa/x64/inst.isle line 1681.
+        // Rule at src/isa/x64/inst.isle line 1682.
         let expr0_0 = SseOpcode::Ucomiss;
         let expr1_0 = constructor_put_in_xmm(ctx, pattern0_0)?;
         let expr2_0 = C::xmm_to_xmm_mem(ctx, expr1_0);
@@ -2344,7 +2328,7 @@ pub fn constructor_x64_ucomis<C: Context>(
     }
     if pattern1_0 == F64 {
         let pattern3_0 = arg1;
-        // Rule at src/isa/x64/inst.isle line 1685.
+        // Rule at src/isa/x64/inst.isle line 1686.
         let expr0_0 = SseOpcode::Ucomisd;
         let expr1_0 = constructor_put_in_xmm(ctx, pattern0_0)?;
         let expr2_0 = C::xmm_to_xmm_mem(ctx, expr1_0);
@@ -2365,7 +2349,7 @@ pub fn constructor_x64_test<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/x64/inst.isle line 1690.
+    // Rule at src/isa/x64/inst.isle line 1691.
     let expr0_0 = CmpOpcode::Test;
     let expr1_0 = constructor_cmp_rmi_r(ctx, pattern0_0, &expr0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -2383,7 +2367,7 @@ pub fn constructor_cmove<C: Context>(
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
     let pattern3_0 = arg3;
-    // Rule at src/isa/x64/inst.isle line 1697.
+    // Rule at src/isa/x64/inst.isle line 1698.
     let expr0_0 = C::temp_writable_gpr(ctx);
     let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern0_0);
     let expr2_0 = MInst::Cmove {
@@ -2413,7 +2397,7 @@ pub fn constructor_cmove_xmm<C: Context>(
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
     let pattern3_0 = arg3;
-    // Rule at src/isa/x64/inst.isle line 1705.
+    // Rule at src/isa/x64/inst.isle line 1706.
     let expr0_0 = C::temp_writable_xmm(ctx);
     let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern0_0);
     let expr2_0 = MInst::XmmCmove {
@@ -2444,7 +2428,7 @@ pub fn constructor_cmove_from_values<C: Context>(
         let pattern2_0 = arg1;
         let pattern3_0 = arg2;
         let pattern4_0 = arg3;
-        // Rule at src/isa/x64/inst.isle line 1716.
+        // Rule at src/isa/x64/inst.isle line 1717.
         let expr0_0 = C::put_in_regs(ctx, pattern3_0);
         let expr1_0 = C::put_in_regs(ctx, pattern4_0);
         let expr2_0 = C::temp_writable_gpr(ctx);
@@ -2489,7 +2473,7 @@ pub fn constructor_cmove_from_values<C: Context>(
             let pattern3_0 = arg1;
             let pattern4_0 = arg2;
             let pattern5_0 = arg3;
-            // Rule at src/isa/x64/inst.isle line 1740.
+            // Rule at src/isa/x64/inst.isle line 1741.
             let expr0_0 = constructor_put_in_xmm_mem(ctx, pattern4_0)?;
             let expr1_0 = constructor_put_in_xmm(ctx, pattern5_0)?;
             let expr2_0 = constructor_cmove_xmm(ctx, pattern2_0, pattern3_0, &expr0_0, expr1_0)?;
@@ -2501,7 +2485,7 @@ pub fn constructor_cmove_from_values<C: Context>(
             let pattern3_0 = arg1;
             let pattern4_0 = arg2;
             let pattern5_0 = arg3;
-            // Rule at src/isa/x64/inst.isle line 1737.
+            // Rule at src/isa/x64/inst.isle line 1738.
             let expr0_0 = constructor_put_in_gpr_mem(ctx, pattern4_0)?;
             let expr1_0 = constructor_put_in_gpr(ctx, pattern5_0)?;
             let expr2_0 = constructor_cmove(ctx, pattern2_0, pattern3_0, &expr0_0, expr1_0)?;
@@ -2525,7 +2509,7 @@ pub fn constructor_cmove_or<C: Context>(
     let pattern2_0 = arg2;
     let pattern3_0 = arg3;
     let pattern4_0 = arg4;
-    // Rule at src/isa/x64/inst.isle line 1747.
+    // Rule at src/isa/x64/inst.isle line 1748.
     let expr0_0 = C::temp_writable_gpr(ctx);
     let expr1_0 = C::temp_writable_gpr(ctx);
     let expr2_0 = C::operand_size_of_type_32_64(ctx, pattern0_0);
@@ -2567,7 +2551,7 @@ pub fn constructor_cmove_or_xmm<C: Context>(
     let pattern2_0 = arg2;
     let pattern3_0 = arg3;
     let pattern4_0 = arg4;
-    // Rule at src/isa/x64/inst.isle line 1759.
+    // Rule at src/isa/x64/inst.isle line 1760.
     let expr0_0 = C::temp_writable_xmm(ctx);
     let expr1_0 = C::temp_writable_xmm(ctx);
     let expr2_0 = C::operand_size_of_type_32_64(ctx, pattern0_0);
@@ -2610,7 +2594,7 @@ pub fn constructor_cmove_or_from_values<C: Context>(
         let pattern3_0 = arg2;
         let pattern4_0 = arg3;
         let pattern5_0 = arg4;
-        // Rule at src/isa/x64/inst.isle line 1774.
+        // Rule at src/isa/x64/inst.isle line 1775.
         let expr0_0 = C::put_in_regs(ctx, pattern4_0);
         let expr1_0 = C::put_in_regs(ctx, pattern5_0);
         let expr2_0 = C::temp_writable_gpr(ctx);
@@ -2682,7 +2666,7 @@ pub fn constructor_cmove_or_from_values<C: Context>(
             let pattern4_0 = arg2;
             let pattern5_0 = arg3;
             let pattern6_0 = arg4;
-            // Rule at src/isa/x64/inst.isle line 1796.
+            // Rule at src/isa/x64/inst.isle line 1797.
             let expr0_0 = constructor_put_in_xmm_mem(ctx, pattern5_0)?;
             let expr1_0 = constructor_put_in_xmm(ctx, pattern6_0)?;
             let expr2_0 = constructor_cmove_or_xmm(
@@ -2697,7 +2681,7 @@ pub fn constructor_cmove_or_from_values<C: Context>(
             let pattern4_0 = arg2;
             let pattern5_0 = arg3;
             let pattern6_0 = arg4;
-            // Rule at src/isa/x64/inst.isle line 1793.
+            // Rule at src/isa/x64/inst.isle line 1794.
             let expr0_0 = constructor_put_in_gpr_mem(ctx, pattern5_0)?;
             let expr1_0 = constructor_put_in_gpr(ctx, pattern6_0)?;
             let expr2_0 =
@@ -2711,7 +2695,7 @@ pub fn constructor_cmove_or_from_values<C: Context>(
 // Generated as internal constructor for term x64_setcc.
 pub fn constructor_x64_setcc<C: Context>(ctx: &mut C, arg0: &CC) -> Option<ConsumesFlags> {
     let pattern0_0 = arg0;
-    // Rule at src/isa/x64/inst.isle line 1801.
+    // Rule at src/isa/x64/inst.isle line 1802.
     let expr0_0 = C::temp_writable_gpr(ctx);
     let expr1_0 = MInst::Setcc {
         cc: pattern0_0.clone(),
@@ -2737,7 +2721,7 @@ pub fn constructor_xmm_rm_r<C: Context>(
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
     let pattern3_0 = arg3;
-    // Rule at src/isa/x64/inst.isle line 1809.
+    // Rule at src/isa/x64/inst.isle line 1810.
     let expr0_0 = C::temp_writable_xmm(ctx);
     let expr1_0 = MInst::XmmRmR {
         op: pattern1_0.clone(),
@@ -2754,7 +2738,7 @@ pub fn constructor_xmm_rm_r<C: Context>(
 pub fn constructor_x64_paddb<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 1816.
+    // Rule at src/isa/x64/inst.isle line 1817.
     let expr0_0: Type = I8X16;
     let expr1_0 = SseOpcode::Paddb;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -2765,7 +2749,7 @@ pub fn constructor_x64_paddb<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem)
 pub fn constructor_x64_paddw<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 1821.
+    // Rule at src/isa/x64/inst.isle line 1822.
     let expr0_0: Type = I16X8;
     let expr1_0 = SseOpcode::Paddw;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -2776,7 +2760,7 @@ pub fn constructor_x64_paddw<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem)
 pub fn constructor_x64_paddd<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 1826.
+    // Rule at src/isa/x64/inst.isle line 1827.
     let expr0_0: Type = I32X4;
     let expr1_0 = SseOpcode::Paddd;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -2787,7 +2771,7 @@ pub fn constructor_x64_paddd<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem)
 pub fn constructor_x64_paddq<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 1831.
+    // Rule at src/isa/x64/inst.isle line 1832.
     let expr0_0: Type = I64X2;
     let expr1_0 = SseOpcode::Paddq;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -2798,7 +2782,7 @@ pub fn constructor_x64_paddq<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem)
 pub fn constructor_x64_paddsb<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 1836.
+    // Rule at src/isa/x64/inst.isle line 1837.
     let expr0_0: Type = I8X16;
     let expr1_0 = SseOpcode::Paddsb;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -2809,7 +2793,7 @@ pub fn constructor_x64_paddsb<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem)
 pub fn constructor_x64_paddsw<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 1841.
+    // Rule at src/isa/x64/inst.isle line 1842.
     let expr0_0: Type = I16X8;
     let expr1_0 = SseOpcode::Paddsw;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -2820,7 +2804,7 @@ pub fn constructor_x64_paddsw<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem)
 pub fn constructor_x64_paddusb<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 1846.
+    // Rule at src/isa/x64/inst.isle line 1847.
     let expr0_0: Type = I8X16;
     let expr1_0 = SseOpcode::Paddusb;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -2831,7 +2815,7 @@ pub fn constructor_x64_paddusb<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem
 pub fn constructor_x64_paddusw<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 1851.
+    // Rule at src/isa/x64/inst.isle line 1852.
     let expr0_0: Type = I16X8;
     let expr1_0 = SseOpcode::Paddusw;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -2842,7 +2826,7 @@ pub fn constructor_x64_paddusw<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem
 pub fn constructor_x64_psubb<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 1856.
+    // Rule at src/isa/x64/inst.isle line 1857.
     let expr0_0: Type = I8X16;
     let expr1_0 = SseOpcode::Psubb;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -2853,7 +2837,7 @@ pub fn constructor_x64_psubb<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem)
 pub fn constructor_x64_psubw<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 1861.
+    // Rule at src/isa/x64/inst.isle line 1862.
     let expr0_0: Type = I16X8;
     let expr1_0 = SseOpcode::Psubw;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -2864,7 +2848,7 @@ pub fn constructor_x64_psubw<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem)
 pub fn constructor_x64_psubd<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 1866.
+    // Rule at src/isa/x64/inst.isle line 1867.
     let expr0_0: Type = I32X4;
     let expr1_0 = SseOpcode::Psubd;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -2875,7 +2859,7 @@ pub fn constructor_x64_psubd<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem)
 pub fn constructor_x64_psubq<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 1871.
+    // Rule at src/isa/x64/inst.isle line 1872.
     let expr0_0: Type = I64X2;
     let expr1_0 = SseOpcode::Psubq;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -2886,7 +2870,7 @@ pub fn constructor_x64_psubq<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem)
 pub fn constructor_x64_psubsb<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 1876.
+    // Rule at src/isa/x64/inst.isle line 1877.
     let expr0_0: Type = I8X16;
     let expr1_0 = SseOpcode::Psubsb;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -2897,7 +2881,7 @@ pub fn constructor_x64_psubsb<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem)
 pub fn constructor_x64_psubsw<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 1881.
+    // Rule at src/isa/x64/inst.isle line 1882.
     let expr0_0: Type = I16X8;
     let expr1_0 = SseOpcode::Psubsw;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -2908,7 +2892,7 @@ pub fn constructor_x64_psubsw<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem)
 pub fn constructor_x64_psubusb<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 1886.
+    // Rule at src/isa/x64/inst.isle line 1887.
     let expr0_0: Type = I8X16;
     let expr1_0 = SseOpcode::Psubusb;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -2919,7 +2903,7 @@ pub fn constructor_x64_psubusb<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem
 pub fn constructor_x64_psubusw<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 1891.
+    // Rule at src/isa/x64/inst.isle line 1892.
     let expr0_0: Type = I16X8;
     let expr1_0 = SseOpcode::Psubusw;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -2930,7 +2914,7 @@ pub fn constructor_x64_psubusw<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem
 pub fn constructor_x64_pavgb<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 1896.
+    // Rule at src/isa/x64/inst.isle line 1897.
     let expr0_0: Type = I8X16;
     let expr1_0 = SseOpcode::Pavgb;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -2941,7 +2925,7 @@ pub fn constructor_x64_pavgb<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem)
 pub fn constructor_x64_pavgw<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 1901.
+    // Rule at src/isa/x64/inst.isle line 1902.
     let expr0_0: Type = I16X8;
     let expr1_0 = SseOpcode::Pavgw;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -2952,7 +2936,7 @@ pub fn constructor_x64_pavgw<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem)
 pub fn constructor_x64_pand<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 1906.
+    // Rule at src/isa/x64/inst.isle line 1907.
     let expr0_0: Type = F32X4;
     let expr1_0 = SseOpcode::Pand;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -2963,7 +2947,7 @@ pub fn constructor_x64_pand<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -
 pub fn constructor_x64_andps<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 1911.
+    // Rule at src/isa/x64/inst.isle line 1912.
     let expr0_0: Type = F32X4;
     let expr1_0 = SseOpcode::Andps;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -2974,7 +2958,7 @@ pub fn constructor_x64_andps<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem)
 pub fn constructor_x64_andpd<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 1916.
+    // Rule at src/isa/x64/inst.isle line 1917.
     let expr0_0: Type = F64X2;
     let expr1_0 = SseOpcode::Andpd;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -2985,7 +2969,7 @@ pub fn constructor_x64_andpd<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem)
 pub fn constructor_x64_por<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 1921.
+    // Rule at src/isa/x64/inst.isle line 1922.
     let expr0_0: Type = F32X4;
     let expr1_0 = SseOpcode::Por;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -2996,7 +2980,7 @@ pub fn constructor_x64_por<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) ->
 pub fn constructor_x64_orps<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 1926.
+    // Rule at src/isa/x64/inst.isle line 1927.
     let expr0_0: Type = F32X4;
     let expr1_0 = SseOpcode::Orps;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -3007,7 +2991,7 @@ pub fn constructor_x64_orps<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -
 pub fn constructor_x64_orpd<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 1931.
+    // Rule at src/isa/x64/inst.isle line 1932.
     let expr0_0: Type = F64X2;
     let expr1_0 = SseOpcode::Orpd;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -3018,7 +3002,7 @@ pub fn constructor_x64_orpd<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -
 pub fn constructor_x64_pxor<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 1936.
+    // Rule at src/isa/x64/inst.isle line 1937.
     let expr0_0: Type = I8X16;
     let expr1_0 = SseOpcode::Pxor;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -3029,7 +3013,7 @@ pub fn constructor_x64_pxor<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -
 pub fn constructor_x64_xorps<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 1941.
+    // Rule at src/isa/x64/inst.isle line 1942.
     let expr0_0: Type = F32X4;
     let expr1_0 = SseOpcode::Xorps;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -3040,7 +3024,7 @@ pub fn constructor_x64_xorps<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem)
 pub fn constructor_x64_xorpd<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 1946.
+    // Rule at src/isa/x64/inst.isle line 1947.
     let expr0_0: Type = F64X2;
     let expr1_0 = SseOpcode::Xorpd;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -3051,7 +3035,7 @@ pub fn constructor_x64_xorpd<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem)
 pub fn constructor_x64_pmullw<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 1951.
+    // Rule at src/isa/x64/inst.isle line 1952.
     let expr0_0: Type = I16X8;
     let expr1_0 = SseOpcode::Pmullw;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -3062,7 +3046,7 @@ pub fn constructor_x64_pmullw<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem)
 pub fn constructor_x64_pmulld<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 1956.
+    // Rule at src/isa/x64/inst.isle line 1957.
     let expr0_0: Type = I16X8;
     let expr1_0 = SseOpcode::Pmulld;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -3073,7 +3057,7 @@ pub fn constructor_x64_pmulld<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem)
 pub fn constructor_x64_pmulhw<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 1961.
+    // Rule at src/isa/x64/inst.isle line 1962.
     let expr0_0: Type = I16X8;
     let expr1_0 = SseOpcode::Pmulhw;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -3084,7 +3068,7 @@ pub fn constructor_x64_pmulhw<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem)
 pub fn constructor_x64_pmulhuw<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 1966.
+    // Rule at src/isa/x64/inst.isle line 1967.
     let expr0_0: Type = I16X8;
     let expr1_0 = SseOpcode::Pmulhuw;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -3095,7 +3079,7 @@ pub fn constructor_x64_pmulhuw<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem
 pub fn constructor_x64_pmuldq<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 1971.
+    // Rule at src/isa/x64/inst.isle line 1972.
     let expr0_0: Type = I16X8;
     let expr1_0 = SseOpcode::Pmuldq;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -3106,7 +3090,7 @@ pub fn constructor_x64_pmuldq<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem)
 pub fn constructor_x64_pmuludq<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 1976.
+    // Rule at src/isa/x64/inst.isle line 1977.
     let expr0_0: Type = I64X2;
     let expr1_0 = SseOpcode::Pmuludq;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -3117,7 +3101,7 @@ pub fn constructor_x64_pmuludq<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem
 pub fn constructor_x64_punpckhwd<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 1981.
+    // Rule at src/isa/x64/inst.isle line 1982.
     let expr0_0: Type = I16X8;
     let expr1_0 = SseOpcode::Punpckhwd;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -3128,7 +3112,7 @@ pub fn constructor_x64_punpckhwd<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmM
 pub fn constructor_x64_punpcklwd<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 1986.
+    // Rule at src/isa/x64/inst.isle line 1987.
     let expr0_0: Type = I16X8;
     let expr1_0 = SseOpcode::Punpcklwd;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -3139,7 +3123,7 @@ pub fn constructor_x64_punpcklwd<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmM
 pub fn constructor_x64_andnps<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 1991.
+    // Rule at src/isa/x64/inst.isle line 1992.
     let expr0_0: Type = F32X4;
     let expr1_0 = SseOpcode::Andnps;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -3150,7 +3134,7 @@ pub fn constructor_x64_andnps<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem)
 pub fn constructor_x64_andnpd<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 1996.
+    // Rule at src/isa/x64/inst.isle line 1997.
     let expr0_0: Type = F64X2;
     let expr1_0 = SseOpcode::Andnpd;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -3161,7 +3145,7 @@ pub fn constructor_x64_andnpd<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem)
 pub fn constructor_x64_pandn<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2001.
+    // Rule at src/isa/x64/inst.isle line 2002.
     let expr0_0: Type = F64X2;
     let expr1_0 = SseOpcode::Pandn;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -3172,7 +3156,7 @@ pub fn constructor_x64_pandn<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem)
 pub fn constructor_x64_addss<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2006.
+    // Rule at src/isa/x64/inst.isle line 2007.
     let expr0_0: Type = F32;
     let expr1_0 = SseOpcode::Addss;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -3183,7 +3167,7 @@ pub fn constructor_x64_addss<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem)
 pub fn constructor_x64_addsd<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2011.
+    // Rule at src/isa/x64/inst.isle line 2012.
     let expr0_0: Type = F64;
     let expr1_0 = SseOpcode::Addsd;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -3194,7 +3178,7 @@ pub fn constructor_x64_addsd<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem)
 pub fn constructor_x64_addps<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2016.
+    // Rule at src/isa/x64/inst.isle line 2017.
     let expr0_0: Type = F32;
     let expr1_0 = SseOpcode::Addps;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -3205,7 +3189,7 @@ pub fn constructor_x64_addps<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem)
 pub fn constructor_x64_addpd<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2021.
+    // Rule at src/isa/x64/inst.isle line 2022.
     let expr0_0: Type = F32;
     let expr1_0 = SseOpcode::Addpd;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -3216,7 +3200,7 @@ pub fn constructor_x64_addpd<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem)
 pub fn constructor_x64_subss<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2026.
+    // Rule at src/isa/x64/inst.isle line 2027.
     let expr0_0: Type = F32;
     let expr1_0 = SseOpcode::Subss;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -3227,7 +3211,7 @@ pub fn constructor_x64_subss<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem)
 pub fn constructor_x64_subsd<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2031.
+    // Rule at src/isa/x64/inst.isle line 2032.
     let expr0_0: Type = F64;
     let expr1_0 = SseOpcode::Subsd;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -3238,7 +3222,7 @@ pub fn constructor_x64_subsd<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem)
 pub fn constructor_x64_subps<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2036.
+    // Rule at src/isa/x64/inst.isle line 2037.
     let expr0_0: Type = F32;
     let expr1_0 = SseOpcode::Subps;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -3249,7 +3233,7 @@ pub fn constructor_x64_subps<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem)
 pub fn constructor_x64_subpd<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2041.
+    // Rule at src/isa/x64/inst.isle line 2042.
     let expr0_0: Type = F32;
     let expr1_0 = SseOpcode::Subpd;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -3260,7 +3244,7 @@ pub fn constructor_x64_subpd<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem)
 pub fn constructor_x64_mulss<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2046.
+    // Rule at src/isa/x64/inst.isle line 2047.
     let expr0_0: Type = F32;
     let expr1_0 = SseOpcode::Mulss;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -3271,7 +3255,7 @@ pub fn constructor_x64_mulss<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem)
 pub fn constructor_x64_mulsd<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2051.
+    // Rule at src/isa/x64/inst.isle line 2052.
     let expr0_0: Type = F64;
     let expr1_0 = SseOpcode::Mulsd;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -3282,7 +3266,7 @@ pub fn constructor_x64_mulsd<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem)
 pub fn constructor_x64_mulps<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2056.
+    // Rule at src/isa/x64/inst.isle line 2057.
     let expr0_0: Type = F32;
     let expr1_0 = SseOpcode::Mulps;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -3293,7 +3277,7 @@ pub fn constructor_x64_mulps<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem)
 pub fn constructor_x64_mulpd<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2061.
+    // Rule at src/isa/x64/inst.isle line 2062.
     let expr0_0: Type = F32;
     let expr1_0 = SseOpcode::Mulpd;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -3304,7 +3288,7 @@ pub fn constructor_x64_mulpd<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem)
 pub fn constructor_x64_divss<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2066.
+    // Rule at src/isa/x64/inst.isle line 2067.
     let expr0_0: Type = F32;
     let expr1_0 = SseOpcode::Divss;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -3315,7 +3299,7 @@ pub fn constructor_x64_divss<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem)
 pub fn constructor_x64_divsd<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2071.
+    // Rule at src/isa/x64/inst.isle line 2072.
     let expr0_0: Type = F64;
     let expr1_0 = SseOpcode::Divsd;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -3326,7 +3310,7 @@ pub fn constructor_x64_divsd<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem)
 pub fn constructor_x64_divps<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2076.
+    // Rule at src/isa/x64/inst.isle line 2077.
     let expr0_0: Type = F32;
     let expr1_0 = SseOpcode::Divps;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -3337,7 +3321,7 @@ pub fn constructor_x64_divps<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem)
 pub fn constructor_x64_divpd<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2081.
+    // Rule at src/isa/x64/inst.isle line 2082.
     let expr0_0: Type = F32;
     let expr1_0 = SseOpcode::Divpd;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -3348,17 +3332,17 @@ pub fn constructor_x64_divpd<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem)
 pub fn constructor_sse_blend_op<C: Context>(ctx: &mut C, arg0: Type) -> Option<SseOpcode> {
     let pattern0_0 = arg0;
     if pattern0_0 == F32X4 {
-        // Rule at src/isa/x64/inst.isle line 2085.
+        // Rule at src/isa/x64/inst.isle line 2086.
         let expr0_0 = SseOpcode::Blendvps;
         return Some(expr0_0);
     }
     if pattern0_0 == F64X2 {
-        // Rule at src/isa/x64/inst.isle line 2086.
+        // Rule at src/isa/x64/inst.isle line 2087.
         let expr0_0 = SseOpcode::Blendvpd;
         return Some(expr0_0);
     }
     if let Some((pattern1_0, pattern1_1)) = C::multi_lane(ctx, pattern0_0) {
-        // Rule at src/isa/x64/inst.isle line 2087.
+        // Rule at src/isa/x64/inst.isle line 2088.
         let expr0_0 = SseOpcode::Pblendvb;
         return Some(expr0_0);
     }
@@ -3369,17 +3353,17 @@ pub fn constructor_sse_blend_op<C: Context>(ctx: &mut C, arg0: Type) -> Option<S
 pub fn constructor_sse_mov_op<C: Context>(ctx: &mut C, arg0: Type) -> Option<SseOpcode> {
     let pattern0_0 = arg0;
     if pattern0_0 == F32X4 {
-        // Rule at src/isa/x64/inst.isle line 2090.
+        // Rule at src/isa/x64/inst.isle line 2091.
         let expr0_0 = SseOpcode::Movaps;
         return Some(expr0_0);
     }
     if pattern0_0 == F64X2 {
-        // Rule at src/isa/x64/inst.isle line 2091.
+        // Rule at src/isa/x64/inst.isle line 2092.
         let expr0_0 = SseOpcode::Movapd;
         return Some(expr0_0);
     }
     if let Some((pattern1_0, pattern1_1)) = C::multi_lane(ctx, pattern0_0) {
-        // Rule at src/isa/x64/inst.isle line 2092.
+        // Rule at src/isa/x64/inst.isle line 2093.
         let expr0_0 = SseOpcode::Movdqa;
         return Some(expr0_0);
     }
@@ -3398,7 +3382,7 @@ pub fn constructor_x64_blend<C: Context>(
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
     let pattern3_0 = arg3;
-    // Rule at src/isa/x64/inst.isle line 2096.
+    // Rule at src/isa/x64/inst.isle line 2097.
     let expr0_0 = C::xmm0(ctx);
     let expr1_0 = constructor_sse_mov_op(ctx, pattern0_0)?;
     let expr2_0 = MInst::XmmUnaryRmR {
@@ -3422,7 +3406,7 @@ pub fn constructor_x64_blendvpd<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/x64/inst.isle line 2110.
+    // Rule at src/isa/x64/inst.isle line 2111.
     let expr0_0 = C::xmm0(ctx);
     let expr1_0 = SseOpcode::Movapd;
     let expr2_0 = C::xmm_to_xmm_mem(ctx, pattern2_0);
@@ -3446,7 +3430,7 @@ pub fn constructor_x64_movsd_regmove<C: Context>(
 ) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2124.
+    // Rule at src/isa/x64/inst.isle line 2125.
     let expr0_0: Type = I8X16;
     let expr1_0 = SseOpcode::Movsd;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -3457,7 +3441,7 @@ pub fn constructor_x64_movsd_regmove<C: Context>(
 pub fn constructor_x64_movlhps<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2129.
+    // Rule at src/isa/x64/inst.isle line 2130.
     let expr0_0: Type = I8X16;
     let expr1_0 = SseOpcode::Movlhps;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -3475,21 +3459,21 @@ pub fn constructor_x64_pmaxs<C: Context>(
     if pattern0_0 == I8X16 {
         let pattern2_0 = arg1;
         let pattern3_0 = arg2;
-        // Rule at src/isa/x64/inst.isle line 2134.
+        // Rule at src/isa/x64/inst.isle line 2135.
         let expr0_0 = constructor_x64_pmaxsb(ctx, pattern2_0, pattern3_0)?;
         return Some(expr0_0);
     }
     if pattern0_0 == I16X8 {
         let pattern2_0 = arg1;
         let pattern3_0 = arg2;
-        // Rule at src/isa/x64/inst.isle line 2135.
+        // Rule at src/isa/x64/inst.isle line 2136.
         let expr0_0 = constructor_x64_pmaxsw(ctx, pattern2_0, pattern3_0)?;
         return Some(expr0_0);
     }
     if pattern0_0 == I32X4 {
         let pattern2_0 = arg1;
         let pattern3_0 = arg2;
-        // Rule at src/isa/x64/inst.isle line 2136.
+        // Rule at src/isa/x64/inst.isle line 2137.
         let expr0_0 = constructor_x64_pmaxsd(ctx, pattern2_0, pattern3_0)?;
         return Some(expr0_0);
     }
@@ -3500,7 +3484,7 @@ pub fn constructor_x64_pmaxs<C: Context>(
 pub fn constructor_x64_pmaxsb<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2139.
+    // Rule at src/isa/x64/inst.isle line 2140.
     let expr0_0: Type = I8X16;
     let expr1_0 = SseOpcode::Pmaxsb;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -3511,7 +3495,7 @@ pub fn constructor_x64_pmaxsb<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem)
 pub fn constructor_x64_pmaxsw<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2141.
+    // Rule at src/isa/x64/inst.isle line 2142.
     let expr0_0: Type = I8X16;
     let expr1_0 = SseOpcode::Pmaxsw;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -3522,7 +3506,7 @@ pub fn constructor_x64_pmaxsw<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem)
 pub fn constructor_x64_pmaxsd<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2143.
+    // Rule at src/isa/x64/inst.isle line 2144.
     let expr0_0: Type = I8X16;
     let expr1_0 = SseOpcode::Pmaxsd;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -3540,21 +3524,21 @@ pub fn constructor_x64_pmins<C: Context>(
     if pattern0_0 == I8X16 {
         let pattern2_0 = arg1;
         let pattern3_0 = arg2;
-        // Rule at src/isa/x64/inst.isle line 2147.
+        // Rule at src/isa/x64/inst.isle line 2148.
         let expr0_0 = constructor_x64_pminsb(ctx, pattern2_0, pattern3_0)?;
         return Some(expr0_0);
     }
     if pattern0_0 == I16X8 {
         let pattern2_0 = arg1;
         let pattern3_0 = arg2;
-        // Rule at src/isa/x64/inst.isle line 2148.
+        // Rule at src/isa/x64/inst.isle line 2149.
         let expr0_0 = constructor_x64_pminsw(ctx, pattern2_0, pattern3_0)?;
         return Some(expr0_0);
     }
     if pattern0_0 == I32X4 {
         let pattern2_0 = arg1;
         let pattern3_0 = arg2;
-        // Rule at src/isa/x64/inst.isle line 2149.
+        // Rule at src/isa/x64/inst.isle line 2150.
         let expr0_0 = constructor_x64_pminsd(ctx, pattern2_0, pattern3_0)?;
         return Some(expr0_0);
     }
@@ -3565,7 +3549,7 @@ pub fn constructor_x64_pmins<C: Context>(
 pub fn constructor_x64_pminsb<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2152.
+    // Rule at src/isa/x64/inst.isle line 2153.
     let expr0_0: Type = I8X16;
     let expr1_0 = SseOpcode::Pminsb;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -3576,7 +3560,7 @@ pub fn constructor_x64_pminsb<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem)
 pub fn constructor_x64_pminsw<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2154.
+    // Rule at src/isa/x64/inst.isle line 2155.
     let expr0_0: Type = I16X8;
     let expr1_0 = SseOpcode::Pminsw;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -3587,7 +3571,7 @@ pub fn constructor_x64_pminsw<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem)
 pub fn constructor_x64_pminsd<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2156.
+    // Rule at src/isa/x64/inst.isle line 2157.
     let expr0_0: Type = I32X4;
     let expr1_0 = SseOpcode::Pminsd;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -3605,21 +3589,21 @@ pub fn constructor_x64_pmaxu<C: Context>(
     if pattern0_0 == I8X16 {
         let pattern2_0 = arg1;
         let pattern3_0 = arg2;
-        // Rule at src/isa/x64/inst.isle line 2160.
+        // Rule at src/isa/x64/inst.isle line 2161.
         let expr0_0 = constructor_x64_pmaxub(ctx, pattern2_0, pattern3_0)?;
         return Some(expr0_0);
     }
     if pattern0_0 == I16X8 {
         let pattern2_0 = arg1;
         let pattern3_0 = arg2;
-        // Rule at src/isa/x64/inst.isle line 2161.
+        // Rule at src/isa/x64/inst.isle line 2162.
         let expr0_0 = constructor_x64_pmaxuw(ctx, pattern2_0, pattern3_0)?;
         return Some(expr0_0);
     }
     if pattern0_0 == I32X4 {
         let pattern2_0 = arg1;
         let pattern3_0 = arg2;
-        // Rule at src/isa/x64/inst.isle line 2162.
+        // Rule at src/isa/x64/inst.isle line 2163.
         let expr0_0 = constructor_x64_pmaxud(ctx, pattern2_0, pattern3_0)?;
         return Some(expr0_0);
     }
@@ -3630,7 +3614,7 @@ pub fn constructor_x64_pmaxu<C: Context>(
 pub fn constructor_x64_pmaxub<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2165.
+    // Rule at src/isa/x64/inst.isle line 2166.
     let expr0_0: Type = I8X16;
     let expr1_0 = SseOpcode::Pmaxub;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -3641,7 +3625,7 @@ pub fn constructor_x64_pmaxub<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem)
 pub fn constructor_x64_pmaxuw<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2167.
+    // Rule at src/isa/x64/inst.isle line 2168.
     let expr0_0: Type = I8X16;
     let expr1_0 = SseOpcode::Pmaxuw;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -3652,7 +3636,7 @@ pub fn constructor_x64_pmaxuw<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem)
 pub fn constructor_x64_pmaxud<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2169.
+    // Rule at src/isa/x64/inst.isle line 2170.
     let expr0_0: Type = I8X16;
     let expr1_0 = SseOpcode::Pmaxud;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -3670,21 +3654,21 @@ pub fn constructor_x64_pminu<C: Context>(
     if pattern0_0 == I8X16 {
         let pattern2_0 = arg1;
         let pattern3_0 = arg2;
-        // Rule at src/isa/x64/inst.isle line 2173.
+        // Rule at src/isa/x64/inst.isle line 2174.
         let expr0_0 = constructor_x64_pminub(ctx, pattern2_0, pattern3_0)?;
         return Some(expr0_0);
     }
     if pattern0_0 == I16X8 {
         let pattern2_0 = arg1;
         let pattern3_0 = arg2;
-        // Rule at src/isa/x64/inst.isle line 2174.
+        // Rule at src/isa/x64/inst.isle line 2175.
         let expr0_0 = constructor_x64_pminuw(ctx, pattern2_0, pattern3_0)?;
         return Some(expr0_0);
     }
     if pattern0_0 == I32X4 {
         let pattern2_0 = arg1;
         let pattern3_0 = arg2;
-        // Rule at src/isa/x64/inst.isle line 2175.
+        // Rule at src/isa/x64/inst.isle line 2176.
         let expr0_0 = constructor_x64_pminud(ctx, pattern2_0, pattern3_0)?;
         return Some(expr0_0);
     }
@@ -3695,7 +3679,7 @@ pub fn constructor_x64_pminu<C: Context>(
 pub fn constructor_x64_pminub<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2178.
+    // Rule at src/isa/x64/inst.isle line 2179.
     let expr0_0: Type = I8X16;
     let expr1_0 = SseOpcode::Pminub;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -3706,7 +3690,7 @@ pub fn constructor_x64_pminub<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem)
 pub fn constructor_x64_pminuw<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2180.
+    // Rule at src/isa/x64/inst.isle line 2181.
     let expr0_0: Type = I8X16;
     let expr1_0 = SseOpcode::Pminuw;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -3717,7 +3701,7 @@ pub fn constructor_x64_pminuw<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem)
 pub fn constructor_x64_pminud<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2182.
+    // Rule at src/isa/x64/inst.isle line 2183.
     let expr0_0: Type = I8X16;
     let expr1_0 = SseOpcode::Pminud;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -3728,7 +3712,7 @@ pub fn constructor_x64_pminud<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem)
 pub fn constructor_x64_punpcklbw<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2186.
+    // Rule at src/isa/x64/inst.isle line 2187.
     let expr0_0: Type = I8X16;
     let expr1_0 = SseOpcode::Punpcklbw;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -3739,7 +3723,7 @@ pub fn constructor_x64_punpcklbw<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmM
 pub fn constructor_x64_punpckhbw<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2191.
+    // Rule at src/isa/x64/inst.isle line 2192.
     let expr0_0: Type = I8X16;
     let expr1_0 = SseOpcode::Punpckhbw;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -3750,7 +3734,7 @@ pub fn constructor_x64_punpckhbw<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmM
 pub fn constructor_x64_packsswb<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2196.
+    // Rule at src/isa/x64/inst.isle line 2197.
     let expr0_0: Type = I8X16;
     let expr1_0 = SseOpcode::Packsswb;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -3771,7 +3755,7 @@ pub fn constructor_xmm_rm_r_imm<C: Context>(
     let pattern2_0 = arg2;
     let pattern3_0 = arg3;
     let pattern4_0 = arg4;
-    // Rule at src/isa/x64/inst.isle line 2201.
+    // Rule at src/isa/x64/inst.isle line 2202.
     let expr0_0 = C::temp_writable_xmm(ctx);
     let expr1_0 = C::writable_xmm_to_reg(ctx, expr0_0);
     let expr2_0 = MInst::XmmRmRImm {
@@ -3799,7 +3783,7 @@ pub fn constructor_x64_palignr<C: Context>(
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
     let pattern3_0 = arg3;
-    // Rule at src/isa/x64/inst.isle line 2213.
+    // Rule at src/isa/x64/inst.isle line 2214.
     let expr0_0 = SseOpcode::Palignr;
     let expr1_0 = C::xmm_to_reg(ctx, pattern0_0);
     let expr2_0 = C::xmm_mem_to_reg_mem(ctx, pattern1_0);
@@ -3821,7 +3805,7 @@ pub fn constructor_x64_cmpp<C: Context>(
         let pattern2_0 = arg1;
         let pattern3_0 = arg2;
         let pattern4_0 = arg3;
-        // Rule at src/isa/x64/inst.isle line 2222.
+        // Rule at src/isa/x64/inst.isle line 2223.
         let expr0_0 = constructor_x64_cmpps(ctx, pattern2_0, pattern3_0, pattern4_0)?;
         return Some(expr0_0);
     }
@@ -3829,7 +3813,7 @@ pub fn constructor_x64_cmpp<C: Context>(
         let pattern2_0 = arg1;
         let pattern3_0 = arg2;
         let pattern4_0 = arg3;
-        // Rule at src/isa/x64/inst.isle line 2223.
+        // Rule at src/isa/x64/inst.isle line 2224.
         let expr0_0 = constructor_x64_cmppd(ctx, pattern2_0, pattern3_0, pattern4_0)?;
         return Some(expr0_0);
     }
@@ -3846,7 +3830,7 @@ pub fn constructor_x64_cmpps<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/x64/inst.isle line 2226.
+    // Rule at src/isa/x64/inst.isle line 2227.
     let expr0_0 = SseOpcode::Cmpps;
     let expr1_0 = C::xmm_to_reg(ctx, pattern0_0);
     let expr2_0 = C::xmm_mem_to_reg_mem(ctx, pattern1_0);
@@ -3866,7 +3850,7 @@ pub fn constructor_x64_cmppd<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/x64/inst.isle line 2237.
+    // Rule at src/isa/x64/inst.isle line 2238.
     let expr0_0 = SseOpcode::Cmppd;
     let expr1_0 = C::xmm_to_reg(ctx, pattern0_0);
     let expr2_0 = C::xmm_mem_to_reg_mem(ctx, pattern1_0);
@@ -3886,7 +3870,7 @@ pub fn constructor_x64_pinsrb<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/x64/inst.isle line 2246.
+    // Rule at src/isa/x64/inst.isle line 2247.
     let expr0_0 = SseOpcode::Pinsrb;
     let expr1_0 = C::xmm_to_reg(ctx, pattern0_0);
     let expr2_0 = C::gpr_mem_to_reg_mem(ctx, pattern1_0);
@@ -3905,7 +3889,7 @@ pub fn constructor_x64_pinsrw<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/x64/inst.isle line 2255.
+    // Rule at src/isa/x64/inst.isle line 2256.
     let expr0_0 = SseOpcode::Pinsrw;
     let expr1_0 = C::xmm_to_reg(ctx, pattern0_0);
     let expr2_0 = C::gpr_mem_to_reg_mem(ctx, pattern1_0);
@@ -3926,7 +3910,7 @@ pub fn constructor_x64_pinsrd<C: Context>(
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
     let pattern3_0 = arg3;
-    // Rule at src/isa/x64/inst.isle line 2264.
+    // Rule at src/isa/x64/inst.isle line 2265.
     let expr0_0 = SseOpcode::Pinsrd;
     let expr1_0 = C::xmm_to_reg(ctx, pattern0_0);
     let expr2_0 = C::gpr_mem_to_reg_mem(ctx, pattern1_0);
@@ -3939,7 +3923,7 @@ pub fn constructor_x64_pinsrd<C: Context>(
 pub fn constructor_x64_pmaddwd<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2273.
+    // Rule at src/isa/x64/inst.isle line 2274.
     let expr0_0 = C::temp_writable_xmm(ctx);
     let expr1_0 = SseOpcode::Pmaddwd;
     let expr2_0 = MInst::XmmRmR {
@@ -3963,7 +3947,7 @@ pub fn constructor_x64_insertps<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/x64/inst.isle line 2283.
+    // Rule at src/isa/x64/inst.isle line 2284.
     let expr0_0 = SseOpcode::Insertps;
     let expr1_0 = C::xmm_to_reg(ctx, pattern0_0);
     let expr2_0 = C::xmm_mem_to_reg_mem(ctx, pattern1_0);
@@ -3982,7 +3966,7 @@ pub fn constructor_x64_pshufd<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/x64/inst.isle line 2292.
+    // Rule at src/isa/x64/inst.isle line 2293.
     let expr0_0 = C::temp_writable_xmm(ctx);
     let expr1_0 = SseOpcode::Pshufd;
     let expr2_0 = constructor_writable_xmm_to_r_reg(ctx, expr0_0)?;
@@ -4005,7 +3989,7 @@ pub fn constructor_x64_pshufd<C: Context>(
 pub fn constructor_x64_pshufb<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2304.
+    // Rule at src/isa/x64/inst.isle line 2305.
     let expr0_0 = C::temp_writable_xmm(ctx);
     let expr1_0 = SseOpcode::Pshufb;
     let expr2_0 = MInst::XmmRmR {
@@ -4027,7 +4011,7 @@ pub fn constructor_xmm_unary_rm_r<C: Context>(
 ) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2314.
+    // Rule at src/isa/x64/inst.isle line 2315.
     let expr0_0 = C::temp_writable_xmm(ctx);
     let expr1_0 = MInst::XmmUnaryRmR {
         op: pattern0_0.clone(),
@@ -4042,7 +4026,7 @@ pub fn constructor_xmm_unary_rm_r<C: Context>(
 // Generated as internal constructor for term x64_pabsb.
 pub fn constructor_x64_pabsb<C: Context>(ctx: &mut C, arg0: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
-    // Rule at src/isa/x64/inst.isle line 2321.
+    // Rule at src/isa/x64/inst.isle line 2322.
     let expr0_0 = SseOpcode::Pabsb;
     let expr1_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, pattern0_0)?;
     return Some(expr1_0);
@@ -4051,7 +4035,7 @@ pub fn constructor_x64_pabsb<C: Context>(ctx: &mut C, arg0: &XmmMem) -> Option<X
 // Generated as internal constructor for term x64_pabsw.
 pub fn constructor_x64_pabsw<C: Context>(ctx: &mut C, arg0: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
-    // Rule at src/isa/x64/inst.isle line 2326.
+    // Rule at src/isa/x64/inst.isle line 2327.
     let expr0_0 = SseOpcode::Pabsw;
     let expr1_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, pattern0_0)?;
     return Some(expr1_0);
@@ -4060,7 +4044,7 @@ pub fn constructor_x64_pabsw<C: Context>(ctx: &mut C, arg0: &XmmMem) -> Option<X
 // Generated as internal constructor for term x64_pabsd.
 pub fn constructor_x64_pabsd<C: Context>(ctx: &mut C, arg0: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
-    // Rule at src/isa/x64/inst.isle line 2331.
+    // Rule at src/isa/x64/inst.isle line 2332.
     let expr0_0 = SseOpcode::Pabsd;
     let expr1_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, pattern0_0)?;
     return Some(expr1_0);
@@ -4074,7 +4058,7 @@ pub fn constructor_xmm_unary_rm_r_evex<C: Context>(
 ) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2336.
+    // Rule at src/isa/x64/inst.isle line 2337.
     let expr0_0 = C::temp_writable_xmm(ctx);
     let expr1_0 = MInst::XmmUnaryRmREvex {
         op: pattern0_0.clone(),
@@ -4089,7 +4073,7 @@ pub fn constructor_xmm_unary_rm_r_evex<C: Context>(
 // Generated as internal constructor for term x64_vpabsq.
 pub fn constructor_x64_vpabsq<C: Context>(ctx: &mut C, arg0: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
-    // Rule at src/isa/x64/inst.isle line 2343.
+    // Rule at src/isa/x64/inst.isle line 2344.
     let expr0_0 = Avx512Opcode::Vpabsq;
     let expr1_0 = constructor_xmm_unary_rm_r_evex(ctx, &expr0_0, pattern0_0)?;
     return Some(expr1_0);
@@ -4098,7 +4082,7 @@ pub fn constructor_x64_vpabsq<C: Context>(ctx: &mut C, arg0: &XmmMem) -> Option<
 // Generated as internal constructor for term x64_vpopcntb.
 pub fn constructor_x64_vpopcntb<C: Context>(ctx: &mut C, arg0: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
-    // Rule at src/isa/x64/inst.isle line 2348.
+    // Rule at src/isa/x64/inst.isle line 2349.
     let expr0_0 = Avx512Opcode::Vpopcntb;
     let expr1_0 = constructor_xmm_unary_rm_r_evex(ctx, &expr0_0, pattern0_0)?;
     return Some(expr1_0);
@@ -4114,7 +4098,7 @@ pub fn constructor_xmm_rm_r_evex<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/x64/inst.isle line 2353.
+    // Rule at src/isa/x64/inst.isle line 2354.
     let expr0_0 = C::temp_writable_xmm(ctx);
     let expr1_0 = MInst::XmmRmREvex {
         op: pattern0_0.clone(),
@@ -4131,7 +4115,7 @@ pub fn constructor_xmm_rm_r_evex<C: Context>(
 pub fn constructor_x64_vpmullq<C: Context>(ctx: &mut C, arg0: &XmmMem, arg1: Xmm) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2365.
+    // Rule at src/isa/x64/inst.isle line 2366.
     let expr0_0 = Avx512Opcode::Vpmullq;
     let expr1_0 = constructor_xmm_rm_r_evex(ctx, &expr0_0, pattern0_0, pattern1_0)?;
     return Some(expr1_0);
@@ -4149,7 +4133,7 @@ pub fn constructor_mul_hi<C: Context>(
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
     let pattern3_0 = arg3;
-    // Rule at src/isa/x64/inst.isle line 2374.
+    // Rule at src/isa/x64/inst.isle line 2375.
     let expr0_0 = C::temp_writable_gpr(ctx);
     let expr1_0 = C::temp_writable_gpr(ctx);
     let expr2_0 = C::operand_size_of_type_32_64(ctx, pattern0_0);
@@ -4178,7 +4162,7 @@ pub fn constructor_mulhi_u<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/x64/inst.isle line 2389.
+    // Rule at src/isa/x64/inst.isle line 2390.
     let expr0_0: bool = false;
     let expr1_0 = constructor_mul_hi(ctx, pattern0_0, expr0_0, pattern1_0, pattern2_0)?;
     return Some(expr1_0);
@@ -4194,7 +4178,7 @@ pub fn constructor_xmm_rmi_xmm<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/x64/inst.isle line 2394.
+    // Rule at src/isa/x64/inst.isle line 2395.
     let expr0_0 = C::temp_writable_xmm(ctx);
     let expr1_0 = MInst::XmmRmiReg {
         opcode: pattern0_0.clone(),
@@ -4211,7 +4195,7 @@ pub fn constructor_xmm_rmi_xmm<C: Context>(
 pub fn constructor_x64_psllw<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMemImm) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2404.
+    // Rule at src/isa/x64/inst.isle line 2405.
     let expr0_0 = SseOpcode::Psllw;
     let expr1_0 = constructor_xmm_rmi_xmm(ctx, &expr0_0, pattern0_0, pattern1_0)?;
     return Some(expr1_0);
@@ -4221,7 +4205,7 @@ pub fn constructor_x64_psllw<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMemIm
 pub fn constructor_x64_pslld<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMemImm) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2409.
+    // Rule at src/isa/x64/inst.isle line 2410.
     let expr0_0 = SseOpcode::Pslld;
     let expr1_0 = constructor_xmm_rmi_xmm(ctx, &expr0_0, pattern0_0, pattern1_0)?;
     return Some(expr1_0);
@@ -4231,7 +4215,7 @@ pub fn constructor_x64_pslld<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMemIm
 pub fn constructor_x64_psllq<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMemImm) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2414.
+    // Rule at src/isa/x64/inst.isle line 2415.
     let expr0_0 = SseOpcode::Psllq;
     let expr1_0 = constructor_xmm_rmi_xmm(ctx, &expr0_0, pattern0_0, pattern1_0)?;
     return Some(expr1_0);
@@ -4241,7 +4225,7 @@ pub fn constructor_x64_psllq<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMemIm
 pub fn constructor_x64_psrlw<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMemImm) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2419.
+    // Rule at src/isa/x64/inst.isle line 2420.
     let expr0_0 = SseOpcode::Psrlw;
     let expr1_0 = constructor_xmm_rmi_xmm(ctx, &expr0_0, pattern0_0, pattern1_0)?;
     return Some(expr1_0);
@@ -4251,7 +4235,7 @@ pub fn constructor_x64_psrlw<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMemIm
 pub fn constructor_x64_psrld<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMemImm) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2424.
+    // Rule at src/isa/x64/inst.isle line 2425.
     let expr0_0 = SseOpcode::Psrld;
     let expr1_0 = constructor_xmm_rmi_xmm(ctx, &expr0_0, pattern0_0, pattern1_0)?;
     return Some(expr1_0);
@@ -4261,7 +4245,7 @@ pub fn constructor_x64_psrld<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMemIm
 pub fn constructor_x64_psrlq<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMemImm) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2429.
+    // Rule at src/isa/x64/inst.isle line 2430.
     let expr0_0 = SseOpcode::Psrlq;
     let expr1_0 = constructor_xmm_rmi_xmm(ctx, &expr0_0, pattern0_0, pattern1_0)?;
     return Some(expr1_0);
@@ -4271,7 +4255,7 @@ pub fn constructor_x64_psrlq<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMemIm
 pub fn constructor_x64_psraw<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMemImm) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2434.
+    // Rule at src/isa/x64/inst.isle line 2435.
     let expr0_0 = SseOpcode::Psraw;
     let expr1_0 = constructor_xmm_rmi_xmm(ctx, &expr0_0, pattern0_0, pattern1_0)?;
     return Some(expr1_0);
@@ -4281,7 +4265,7 @@ pub fn constructor_x64_psraw<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMemIm
 pub fn constructor_x64_psrad<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMemImm) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2439.
+    // Rule at src/isa/x64/inst.isle line 2440.
     let expr0_0 = SseOpcode::Psrad;
     let expr1_0 = constructor_xmm_rmi_xmm(ctx, &expr0_0, pattern0_0, pattern1_0)?;
     return Some(expr1_0);
@@ -4297,7 +4281,7 @@ pub fn constructor_x64_pextrd<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/x64/inst.isle line 2444.
+    // Rule at src/isa/x64/inst.isle line 2445.
     let expr0_0 = C::temp_writable_gpr(ctx);
     let expr1_0 = SseOpcode::Pextrd;
     let expr2_0 = constructor_writable_gpr_to_r_reg(ctx, expr0_0)?;
@@ -4330,7 +4314,7 @@ pub fn constructor_gpr_to_xmm<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/x64/inst.isle line 2456.
+    // Rule at src/isa/x64/inst.isle line 2457.
     let expr0_0 = C::temp_writable_xmm(ctx);
     let expr1_0 = MInst::GprToXmm {
         op: pattern0_0.clone(),
@@ -4347,7 +4331,7 @@ pub fn constructor_gpr_to_xmm<C: Context>(
 pub fn constructor_x64_not<C: Context>(ctx: &mut C, arg0: Type, arg1: Gpr) -> Option<Gpr> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2463.
+    // Rule at src/isa/x64/inst.isle line 2464.
     let expr0_0 = C::temp_writable_gpr(ctx);
     let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern0_0);
     let expr2_0 = MInst::Not {
@@ -4364,7 +4348,7 @@ pub fn constructor_x64_not<C: Context>(ctx: &mut C, arg0: Type, arg1: Gpr) -> Op
 pub fn constructor_x64_neg<C: Context>(ctx: &mut C, arg0: Type, arg1: Gpr) -> Option<Gpr> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2471.
+    // Rule at src/isa/x64/inst.isle line 2472.
     let expr0_0 = C::temp_writable_gpr(ctx);
     let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern0_0);
     let expr2_0 = MInst::Neg {
@@ -4380,7 +4364,7 @@ pub fn constructor_x64_neg<C: Context>(ctx: &mut C, arg0: Type, arg1: Gpr) -> Op
 // Generated as internal constructor for term x64_lea.
 pub fn constructor_x64_lea<C: Context>(ctx: &mut C, arg0: &SyntheticAmode) -> Option<Gpr> {
     let pattern0_0 = arg0;
-    // Rule at src/isa/x64/inst.isle line 2478.
+    // Rule at src/isa/x64/inst.isle line 2479.
     let expr0_0 = C::temp_writable_gpr(ctx);
     let expr1_0 = MInst::LoadEffectiveAddress {
         addr: pattern0_0.clone(),
@@ -4394,7 +4378,7 @@ pub fn constructor_x64_lea<C: Context>(ctx: &mut C, arg0: &SyntheticAmode) -> Op
 // Generated as internal constructor for term x64_ud2.
 pub fn constructor_x64_ud2<C: Context>(ctx: &mut C, arg0: &TrapCode) -> Option<SideEffectNoResult> {
     let pattern0_0 = arg0;
-    // Rule at src/isa/x64/inst.isle line 2485.
+    // Rule at src/isa/x64/inst.isle line 2486.
     let expr0_0 = MInst::Ud2 {
         trap_code: pattern0_0.clone(),
     };
@@ -4404,7 +4388,7 @@ pub fn constructor_x64_ud2<C: Context>(ctx: &mut C, arg0: &TrapCode) -> Option<S
 
 // Generated as internal constructor for term x64_hlt.
 pub fn constructor_x64_hlt<C: Context>(ctx: &mut C) -> Option<SideEffectNoResult> {
-    // Rule at src/isa/x64/inst.isle line 2490.
+    // Rule at src/isa/x64/inst.isle line 2491.
     let expr0_0 = MInst::Hlt;
     let expr1_0 = SideEffectNoResult::Inst { inst: expr0_0 };
     return Some(expr1_0);
@@ -4414,7 +4398,7 @@ pub fn constructor_x64_hlt<C: Context>(ctx: &mut C) -> Option<SideEffectNoResult
 pub fn constructor_x64_lzcnt<C: Context>(ctx: &mut C, arg0: Type, arg1: Gpr) -> Option<Gpr> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2495.
+    // Rule at src/isa/x64/inst.isle line 2496.
     let expr0_0 = C::temp_writable_gpr(ctx);
     let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern0_0);
     let expr2_0 = UnaryRmROpcode::Lzcnt;
@@ -4434,7 +4418,7 @@ pub fn constructor_x64_lzcnt<C: Context>(ctx: &mut C, arg0: Type, arg1: Gpr) ->
 pub fn constructor_x64_tzcnt<C: Context>(ctx: &mut C, arg0: Type, arg1: Gpr) -> Option<Gpr> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2503.
+    // Rule at src/isa/x64/inst.isle line 2504.
     let expr0_0 = C::temp_writable_gpr(ctx);
     let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern0_0);
     let expr2_0 = UnaryRmROpcode::Tzcnt;
@@ -4458,7 +4442,7 @@ pub fn constructor_x64_bsr<C: Context>(
 ) -> Option<ProducesFlags> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2511.
+    // Rule at src/isa/x64/inst.isle line 2512.
     let expr0_0 = C::temp_writable_gpr(ctx);
     let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern0_0);
     let expr2_0 = UnaryRmROpcode::Bsr;
@@ -4487,7 +4471,7 @@ pub fn constructor_bsr_or_else<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/x64/inst.isle line 2520.
+    // Rule at src/isa/x64/inst.isle line 2521.
     let expr0_0 = constructor_x64_bsr(ctx, pattern0_0, pattern1_0)?;
     let expr1_0 = constructor_produces_flags_get_reg(ctx, &expr0_0)?;
     let expr2_0 = C::gpr_new(ctx, expr1_0);
@@ -4508,7 +4492,7 @@ pub fn constructor_x64_bsf<C: Context>(
 ) -> Option<ProducesFlags> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2531.
+    // Rule at src/isa/x64/inst.isle line 2532.
     let expr0_0 = C::temp_writable_gpr(ctx);
     let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern0_0);
     let expr2_0 = UnaryRmROpcode::Bsf;
@@ -4537,7 +4521,7 @@ pub fn constructor_bsf_or_else<C: Context>(
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
-    // Rule at src/isa/x64/inst.isle line 2540.
+    // Rule at src/isa/x64/inst.isle line 2541.
     let expr0_0 = constructor_x64_bsf(ctx, pattern0_0, pattern1_0)?;
     let expr1_0 = constructor_produces_flags_get_reg(ctx, &expr0_0)?;
     let expr2_0 = C::gpr_new(ctx, expr1_0);
@@ -4554,7 +4538,7 @@ pub fn constructor_bsf_or_else<C: Context>(
 pub fn constructor_x64_popcnt<C: Context>(ctx: &mut C, arg0: Type, arg1: Gpr) -> Option<Gpr> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2551.
+    // Rule at src/isa/x64/inst.isle line 2552.
     let expr0_0 = C::temp_writable_gpr(ctx);
     let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern0_0);
     let expr2_0 = UnaryRmROpcode::Popcnt;
@@ -4582,7 +4566,7 @@ pub fn constructor_xmm_min_max_seq<C: Context>(
     let pattern1_0 = arg1;
     let pattern2_0 = arg2;
     let pattern3_0 = arg3;
-    // Rule at src/isa/x64/inst.isle line 2559.
+    // Rule at src/isa/x64/inst.isle line 2560.
     let expr0_0 = C::temp_writable_xmm(ctx);
     let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern0_0);
     let expr2_0 = MInst::XmmMinMaxSeq {
@@ -4601,7 +4585,7 @@ pub fn constructor_xmm_min_max_seq<C: Context>(
 pub fn constructor_x64_minss<C: Context>(ctx: &mut C, arg0: Xmm, arg1: Xmm) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2567.
+    // Rule at src/isa/x64/inst.isle line 2568.
     let expr0_0 = C::temp_writable_xmm(ctx);
     let expr1_0 = SseOpcode::Minss;
     let expr2_0 = C::xmm_to_xmm_mem(ctx, pattern1_0);
@@ -4620,7 +4604,7 @@ pub fn constructor_x64_minss<C: Context>(ctx: &mut C, arg0: Xmm, arg1: Xmm) -> O
 pub fn constructor_x64_minsd<C: Context>(ctx: &mut C, arg0: Xmm, arg1: Xmm) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2574.
+    // Rule at src/isa/x64/inst.isle line 2575.
     let expr0_0 = C::temp_writable_xmm(ctx);
     let expr1_0 = SseOpcode::Minsd;
     let expr2_0 = C::xmm_to_xmm_mem(ctx, pattern1_0);
@@ -4639,7 +4623,7 @@ pub fn constructor_x64_minsd<C: Context>(ctx: &mut C, arg0: Xmm, arg1: Xmm) -> O
 pub fn constructor_x64_minps<C: Context>(ctx: &mut C, arg0: Xmm, arg1: Xmm) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2582.
+    // Rule at src/isa/x64/inst.isle line 2583.
     let expr0_0 = C::temp_writable_xmm(ctx);
     let expr1_0 = SseOpcode::Minps;
     let expr2_0 = C::xmm_to_xmm_mem(ctx, pattern1_0);
@@ -4658,7 +4642,7 @@ pub fn constructor_x64_minps<C: Context>(ctx: &mut C, arg0: Xmm, arg1: Xmm) -> O
 pub fn constructor_x64_minpd<C: Context>(ctx: &mut C, arg0: Xmm, arg1: Xmm) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2589.
+    // Rule at src/isa/x64/inst.isle line 2590.
     let expr0_0 = C::temp_writable_xmm(ctx);
     let expr1_0 = SseOpcode::Minpd;
     let expr2_0 = C::xmm_to_xmm_mem(ctx, pattern1_0);
@@ -4677,7 +4661,7 @@ pub fn constructor_x64_minpd<C: Context>(ctx: &mut C, arg0: Xmm, arg1: Xmm) -> O
 pub fn constructor_x64_maxss<C: Context>(ctx: &mut C, arg0: Xmm, arg1: Xmm) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2596.
+    // Rule at src/isa/x64/inst.isle line 2597.
     let expr0_0 = C::temp_writable_xmm(ctx);
     let expr1_0 = SseOpcode::Maxss;
     let expr2_0 = C::xmm_to_xmm_mem(ctx, pattern1_0);
@@ -4696,7 +4680,7 @@ pub fn constructor_x64_maxss<C: Context>(ctx: &mut C, arg0: Xmm, arg1: Xmm) -> O
 pub fn constructor_x64_maxsd<C: Context>(ctx: &mut C, arg0: Xmm, arg1: Xmm) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2603.
+    // Rule at src/isa/x64/inst.isle line 2604.
     let expr0_0 = C::temp_writable_xmm(ctx);
     let expr1_0 = SseOpcode::Maxsd;
     let expr2_0 = C::xmm_to_xmm_mem(ctx, pattern1_0);
@@ -4715,7 +4699,7 @@ pub fn constructor_x64_maxsd<C: Context>(ctx: &mut C, arg0: Xmm, arg1: Xmm) -> O
 pub fn constructor_x64_maxps<C: Context>(ctx: &mut C, arg0: Xmm, arg1: Xmm) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2610.
+    // Rule at src/isa/x64/inst.isle line 2611.
     let expr0_0 = C::temp_writable_xmm(ctx);
     let expr1_0 = SseOpcode::Maxps;
     let expr2_0 = C::xmm_to_xmm_mem(ctx, pattern1_0);
@@ -4734,7 +4718,7 @@ pub fn constructor_x64_maxps<C: Context>(ctx: &mut C, arg0: Xmm, arg1: Xmm) -> O
 pub fn constructor_x64_maxpd<C: Context>(ctx: &mut C, arg0: Xmm, arg1: Xmm) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2617.
+    // Rule at src/isa/x64/inst.isle line 2618.
     let expr0_0 = C::temp_writable_xmm(ctx);
     let expr1_0 = SseOpcode::Maxpd;
     let expr2_0 = C::xmm_to_xmm_mem(ctx, pattern1_0);
@@ -4760,28 +4744,28 @@ pub fn constructor_x64_pcmpeq<C: Context>(
     if pattern0_0 == I8X16 {
         let pattern2_0 = arg1;
         let pattern3_0 = arg2;
-        // Rule at src/isa/x64/inst.isle line 2624.
+        // Rule at src/isa/x64/inst.isle line 2625.
         let expr0_0 = constructor_x64_pcmpeqb(ctx, pattern2_0, pattern3_0)?;
         return Some(expr0_0);
     }
     if pattern0_0 == I16X8 {
         let pattern2_0 = arg1;
         let pattern3_0 = arg2;
-        // Rule at src/isa/x64/inst.isle line 2625.
+        // Rule at src/isa/x64/inst.isle line 2626.
         let expr0_0 = constructor_x64_pcmpeqw(ctx, pattern2_0, pattern3_0)?;
         return Some(expr0_0);
     }
     if pattern0_0 == I32X4 {
         let pattern2_0 = arg1;
         let pattern3_0 = arg2;
-        // Rule at src/isa/x64/inst.isle line 2626.
+        // Rule at src/isa/x64/inst.isle line 2627.
         let expr0_0 = constructor_x64_pcmpeqd(ctx, pattern2_0, pattern3_0)?;
         return Some(expr0_0);
     }
     if pattern0_0 == I64X2 {
         let pattern2_0 = arg1;
         let pattern3_0 = arg2;
-        // Rule at src/isa/x64/inst.isle line 2627.
+        // Rule at src/isa/x64/inst.isle line 2628.
         let expr0_0 = constructor_x64_pcmpeqq(ctx, pattern2_0, pattern3_0)?;
         return Some(expr0_0);
     }
@@ -4792,7 +4776,7 @@ pub fn constructor_x64_pcmpeq<C: Context>(
 pub fn constructor_x64_pcmpeqb<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2630.
+    // Rule at src/isa/x64/inst.isle line 2631.
     let expr0_0: Type = I8X16;
     let expr1_0 = SseOpcode::Pcmpeqb;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -4803,7 +4787,7 @@ pub fn constructor_x64_pcmpeqb<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem
 pub fn constructor_x64_pcmpeqw<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2632.
+    // Rule at src/isa/x64/inst.isle line 2633.
     let expr0_0: Type = I16X8;
     let expr1_0 = SseOpcode::Pcmpeqw;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -4814,7 +4798,7 @@ pub fn constructor_x64_pcmpeqw<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem
 pub fn constructor_x64_pcmpeqd<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2634.
+    // Rule at src/isa/x64/inst.isle line 2635.
     let expr0_0: Type = I32X4;
     let expr1_0 = SseOpcode::Pcmpeqd;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -4825,7 +4809,7 @@ pub fn constructor_x64_pcmpeqd<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem
 pub fn constructor_x64_pcmpeqq<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2636.
+    // Rule at src/isa/x64/inst.isle line 2637.
     let expr0_0: Type = I64X2;
     let expr1_0 = SseOpcode::Pcmpeqq;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -4843,28 +4827,28 @@ pub fn constructor_x64_pcmpgt<C: Context>(
     if pattern0_0 == I8X16 {
         let pattern2_0 = arg1;
         let pattern3_0 = arg2;
-        // Rule at src/isa/x64/inst.isle line 2640.
+        // Rule at src/isa/x64/inst.isle line 2641.
         let expr0_0 = constructor_x64_pcmpgtb(ctx, pattern2_0, pattern3_0)?;
         return Some(expr0_0);
     }
     if pattern0_0 == I16X8 {
         let pattern2_0 = arg1;
         let pattern3_0 = arg2;
-        // Rule at src/isa/x64/inst.isle line 2641.
+        // Rule at src/isa/x64/inst.isle line 2642.
         let expr0_0 = constructor_x64_pcmpgtw(ctx, pattern2_0, pattern3_0)?;
         return Some(expr0_0);
     }
     if pattern0_0 == I32X4 {
         let pattern2_0 = arg1;
         let pattern3_0 = arg2;
-        // Rule at src/isa/x64/inst.isle line 2642.
+        // Rule at src/isa/x64/inst.isle line 2643.
         let expr0_0 = constructor_x64_pcmpgtd(ctx, pattern2_0, pattern3_0)?;
         return Some(expr0_0);
     }
     if pattern0_0 == I64X2 {
         let pattern2_0 = arg1;
         let pattern3_0 = arg2;
-        // Rule at src/isa/x64/inst.isle line 2643.
+        // Rule at src/isa/x64/inst.isle line 2644.
         let expr0_0 = constructor_x64_pcmpgtq(ctx, pattern2_0, pattern3_0)?;
         return Some(expr0_0);
     }
@@ -4875,7 +4859,7 @@ pub fn constructor_x64_pcmpgt<C: Context>(
 pub fn constructor_x64_pcmpgtb<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2646.
+    // Rule at src/isa/x64/inst.isle line 2647.
     let expr0_0: Type = I8X16;
     let expr1_0 = SseOpcode::Pcmpgtb;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -4886,7 +4870,7 @@ pub fn constructor_x64_pcmpgtb<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem
 pub fn constructor_x64_pcmpgtw<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2648.
+    // Rule at src/isa/x64/inst.isle line 2649.
     let expr0_0: Type = I16X8;
     let expr1_0 = SseOpcode::Pcmpgtw;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -4897,7 +4881,7 @@ pub fn constructor_x64_pcmpgtw<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem
 pub fn constructor_x64_pcmpgtd<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2650.
+    // Rule at src/isa/x64/inst.isle line 2651.
     let expr0_0: Type = I32X4;
     let expr1_0 = SseOpcode::Pcmpgtd;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -4908,7 +4892,7 @@ pub fn constructor_x64_pcmpgtd<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem
 pub fn constructor_x64_pcmpgtq<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option<Xmm> {
     let pattern0_0 = arg0;
     let pattern1_0 = arg1;
-    // Rule at src/isa/x64/inst.isle line 2652.
+    // Rule at src/isa/x64/inst.isle line 2653.
     let expr0_0: Type = I64X2;
     let expr1_0 = SseOpcode::Pcmpgtq;
     let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
@@ -4918,7 +4902,7 @@ pub fn constructor_x64_pcmpgtq<C: Context>(ctx: &mut C, arg0: Xmm, arg1: &XmmMem
 // Generated as internal constructor for term reg_to_xmm_mem.
 pub fn constructor_reg_to_xmm_mem<C: Context>(ctx: &mut C, arg0: Reg) -> Option<XmmMem> {
     let pattern0_0 = arg0;
-    // Rule at src/isa/x64/inst.isle line 2708.
+    // Rule at src/isa/x64/inst.isle line 2709.
     let expr0_0 = C::xmm_new(ctx, pattern0_0);
     let expr1_0 = C::xmm_to_xmm_mem(ctx, expr0_0);
     return Some(expr1_0);
@@ -4927,7 +4911,7 @@ pub fn constructor_reg_to_xmm_mem<C: Context>(ctx: &mut C, arg0: Reg) -> Option<
 // Generated as internal constructor for term xmm_to_reg_mem.
 pub fn constructor_xmm_to_reg_mem<C: Context>(ctx: &mut C, arg0: Reg) -> Option<XmmMem> {
     let pattern0_0 = arg0;
-    // Rule at src/isa/x64/inst.isle line 2711.
+    // Rule at src/isa/x64/inst.isle line 2712.
     let expr0_0 = C::xmm_new(ctx, pattern0_0);
     let expr1_0 = C::xmm_to_reg(ctx, expr0_0);
     let expr2_0 = RegMem::Reg { reg: expr1_0 };
@@ -4941,7 +4925,7 @@ pub fn constructor_writable_gpr_to_r_reg<C: Context>(
     arg0: WritableGpr,
 ) -> Option<Reg> {
     let pattern0_0 = arg0;
-    // Rule at src/isa/x64/inst.isle line 2715.
+    // Rule at src/isa/x64/inst.isle line 2716.
     let expr0_0 = C::writable_gpr_to_reg(ctx, pattern0_0);
     let expr1_0 = C::writable_reg_to_reg(ctx, expr0_0);
     return Some(expr1_0);
@@ -4953,7 +4937,7 @@ pub fn constructor_writable_gpr_to_gpr_mem<C: Context>(
     arg0: WritableGpr,
 ) -> Option<GprMem> {
     let pattern0_0 = arg0;
-    // Rule at src/isa/x64/inst.isle line 2718.
+    // Rule at src/isa/x64/inst.isle line 2719.
     let expr0_0 = C::writable_gpr_to_gpr(ctx, pattern0_0);
     let expr1_0 = C::gpr_to_gpr_mem(ctx, expr0_0);
     return Some(expr1_0);
@@ -4965,7 +4949,7 @@ pub fn constructor_writable_gpr_to_value_regs<C: Context>(
     arg0: WritableGpr,
 ) -> Option<ValueRegs> {
     let pattern0_0 = arg0;
-    // Rule at src/isa/x64/inst.isle line 2721.
+    // Rule at src/isa/x64/inst.isle line 2722.
     let expr0_0 = constructor_writable_gpr_to_r_reg(ctx, pattern0_0)?;
     let expr1_0 = C::value_reg(ctx, expr0_0);
     return Some(expr1_0);
@@ -4977,7 +4961,7 @@ pub fn constructor_writable_xmm_to_r_reg<C: Context>(
     arg0: WritableXmm,
 ) -> Option<Reg> {
     let pattern0_0 = arg0;
-    // Rule at src/isa/x64/inst.isle line 2724.
+    // Rule at src/isa/x64/inst.isle line 2725.
     let expr0_0 = C::writable_xmm_to_reg(ctx, pattern0_0);
     let expr1_0 = C::writable_reg_to_reg(ctx, expr0_0);
     return Some(expr1_0);
@@ -4989,7 +4973,7 @@ pub fn constructor_writable_xmm_to_xmm_mem<C: Context>(
     arg0: WritableXmm,
 ) -> Option<XmmMem> {
     let pattern0_0 = arg0;
-    // Rule at src/isa/x64/inst.isle line 2727.
+    // Rule at src/isa/x64/inst.isle line 2728.
     let expr0_0 = C::writable_xmm_to_xmm(ctx, pattern0_0);
     let expr1_0 = C::xmm_to_xmm_mem(ctx, expr0_0);
     return Some(expr1_0);
@@ -5001,7 +4985,7 @@ pub fn constructor_writable_xmm_to_value_regs<C: Context>(
     arg0: WritableXmm,
 ) -> Option<ValueRegs> {
     let pattern0_0 = arg0;
-    // Rule at src/isa/x64/inst.isle line 2730.
+    // Rule at src/isa/x64/inst.isle line 2731.
     let expr0_0 = constructor_writable_xmm_to_r_reg(ctx, pattern0_0)?;
     let expr1_0 = C::value_reg(ctx, expr0_0);
     return Some(expr1_0);
@@ -5013,7 +4997,7 @@ pub fn constructor_synthetic_amode_to_gpr_mem<C: Context>(
     arg0: &SyntheticAmode,
 ) -> Option<GprMem> {
     let pattern0_0 = arg0;
-    // Rule at src/isa/x64/inst.isle line 2737.
+    // Rule at src/isa/x64/inst.isle line 2738.
     let expr0_0 = C::synthetic_amode_to_reg_mem(ctx, pattern0_0);
     let expr1_0 = C::reg_mem_to_gpr_mem(ctx, &expr0_0);
     return Some(expr1_0);
@@ -5022,7 +5006,7 @@ pub fn constructor_synthetic_amode_to_gpr_mem<C: Context>(
 // Generated as internal constructor for term amode_to_gpr_mem.
 pub fn constructor_amode_to_gpr_mem<C: Context>(ctx: &mut C, arg0: &Amode) -> Option<GprMem> {
     let pattern0_0 = arg0;
-    // Rule at src/isa/x64/inst.isle line 2735.
+    // Rule at src/isa/x64/inst.isle line 2736.
     let expr0_0 = C::amode_to_synthetic_amode(ctx, pattern0_0);
     let expr1_0 = constructor_synthetic_amode_to_gpr_mem(ctx, &expr0_0)?;
     return Some(expr1_0);
@@ -5031,7 +5015,7 @@ pub fn constructor_amode_to_gpr_mem<C: Context>(ctx: &mut C, arg0: &Amode) -> Op
 // Generated as internal constructor for term amode_to_xmm_mem.
 pub fn constructor_amode_to_xmm_mem<C: Context>(ctx: &mut C, arg0: &Amode) -> Option<XmmMem> {
     let pattern0_0 = arg0;
-    // Rule at src/isa/x64/inst.isle line 2740.
+    // Rule at src/isa/x64/inst.isle line 2741.
     let expr0_0 = C::amode_to_synthetic_amode(ctx, pattern0_0);
     let expr1_0 = constructor_synthetic_amode_to_xmm_mem(ctx, &expr0_0)?;
     return Some(expr1_0);
@@ -5043,7 +5027,7 @@ pub fn constructor_synthetic_amode_to_xmm_mem<C: Context>(
     arg0: &SyntheticAmode,
 ) -> Option<XmmMem> {
     let pattern0_0 = arg0;
-    // Rule at src/isa/x64/inst.isle line 2743.
+    // Rule at src/isa/x64/inst.isle line 2744.
     let expr0_0 = C::synthetic_amode_to_reg_mem(ctx, pattern0_0);
     let expr1_0 = C::reg_mem_to_xmm_mem(ctx, &expr0_0);
     return Some(expr1_0);
@@ -5228,13 +5212,13 @@ pub fn constructor_lower<C: Context>(ctx: &mut C, arg0: Inst) -> Option<InstOutp
                 &Opcode::Trap => {
                     // Rule at src/isa/x64/lower.isle line 1434.
                     let expr0_0 = constructor_x64_ud2(ctx, pattern2_1)?;
-                    let expr1_0 = constructor_safepoint(ctx, &expr0_0)?;
+                    let expr1_0 = constructor_side_effect(ctx, &expr0_0)?;
                     return Some(expr1_0);
                 }
                 &Opcode::ResumableTrap => {
                     // Rule at src/isa/x64/lower.isle line 1439.
                     let expr0_0 = constructor_x64_ud2(ctx, pattern2_1)?;
-                    let expr1_0 = constructor_safepoint(ctx, &expr0_0)?;
+                    let expr1_0 = constructor_side_effect(ctx, &expr0_0)?;
                     return Some(expr1_0);
                 }
                 _ => {}
diff --git a/cranelift/codegen/src/isa/x64/mod.rs b/cranelift/codegen/src/isa/x64/mod.rs
index a86ffc34fb..6e8256f075 100644
--- a/cranelift/codegen/src/isa/x64/mod.rs
+++ b/cranelift/codegen/src/isa/x64/mod.rs
@@ -6,8 +6,9 @@ use super::TargetIsa;
 use crate::ir::{condcodes::IntCC, Function};
 #[cfg(feature = "unwind")]
 use crate::isa::unwind::systemv;
-use crate::isa::x64::{inst::regs::create_reg_universe_systemv, settings as x64_settings};
+use crate::isa::x64::{inst::regs::create_reg_env_systemv, settings as x64_settings};
 use crate::isa::Builder as IsaBuilder;
+use crate::machinst::Reg;
 use crate::machinst::{
     compile, MachCompileResult, MachTextSectionBuilder, TextSectionBuilder, VCode,
 };
@@ -15,8 +16,7 @@ use crate::result::{CodegenError, CodegenResult};
 use crate::settings::{self as shared_settings, Flags};
 use alloc::{boxed::Box, vec::Vec};
 use core::fmt;
-
-use regalloc::{PrettyPrint, RealRegUniverse, Reg};
+use regalloc2::MachineEnv;
 use target_lexicon::Triple;
 
 mod abi;
@@ -30,27 +30,31 @@ pub(crate) struct X64Backend {
     triple: Triple,
     flags: Flags,
     x64_flags: x64_settings::Flags,
-    reg_universe: RealRegUniverse,
+    reg_env: MachineEnv,
 }
 
 impl X64Backend {
     /// Create a new X64 backend with the given (shared) flags.
     fn new_with_flags(triple: Triple, flags: Flags, x64_flags: x64_settings::Flags) -> Self {
-        let reg_universe = create_reg_universe_systemv(&flags);
+        let reg_env = create_reg_env_systemv(&flags);
         Self {
             triple,
             flags,
             x64_flags,
-            reg_universe,
+            reg_env,
         }
     }
 
-    fn compile_vcode(&self, func: &Function, flags: Flags) -> CodegenResult<VCode<inst::Inst>> {
+    fn compile_vcode(
+        &self,
+        func: &Function,
+        flags: Flags,
+    ) -> CodegenResult<(VCode<inst::Inst>, regalloc2::Output)> {
         // This performs lowering to VCode, register-allocates the code, computes
         // block layout and finalizes branches. The result is ready for binary emission.
         let emit_info = EmitInfo::new(flags.clone(), self.x64_flags.clone());
         let abi = Box::new(abi::X64ABICallee::new(&func, flags, self.isa_flags())?);
-        compile::compile::<Self>(&func, self, abi, &self.reg_universe, emit_info)
+        compile::compile::<Self>(&func, self, abi, &self.reg_env, emit_info)
     }
 }
 
@@ -61,28 +65,27 @@ impl TargetIsa for X64Backend {
         want_disasm: bool,
     ) -> CodegenResult<MachCompileResult> {
         let flags = self.flags();
-        let vcode = self.compile_vcode(func, flags.clone())?;
+        let (vcode, regalloc_result) = self.compile_vcode(func, flags.clone())?;
 
-        let (buffer, bb_starts, bb_edges) = vcode.emit();
-        let buffer = buffer.finish();
-        let frame_size = vcode.frame_size();
-        let value_labels_ranges = vcode.value_labels_ranges();
-        let stackslot_offsets = vcode.stackslot_offsets().clone();
+        let want_disasm = want_disasm || log::log_enabled!(log::Level::Debug);
+        let emit_result = vcode.emit(&regalloc_result, want_disasm, flags.machine_code_cfg_info());
+        let frame_size = emit_result.frame_size;
+        let value_labels_ranges = emit_result.value_labels_ranges;
+        let buffer = emit_result.buffer.finish();
+        let stackslot_offsets = emit_result.stackslot_offsets;
 
-        let disasm = if want_disasm {
-            Some(vcode.show_rru(Some(&create_reg_universe_systemv(flags))))
-        } else {
-            None
-        };
+        if let Some(disasm) = emit_result.disasm.as_ref() {
+            log::debug!("disassembly:\n{}", disasm);
+        }
 
         Ok(MachCompileResult {
             buffer,
             frame_size,
-            disasm,
+            disasm: emit_result.disasm,
             value_labels_ranges,
             stackslot_offsets,
-            bb_starts,
-            bb_edges,
+            bb_starts: emit_result.bb_offsets,
+            bb_edges: emit_result.bb_edges,
         })
     }
 
@@ -319,30 +322,29 @@ mod test {
 
         // 00000000  55                push rbp
         // 00000001  4889E5            mov rbp,rsp
-        // 00000004  4889FE            mov rsi,rdi
-        // 00000007  81C634120000      add esi,0x1234
-        // 0000000D  85F6              test esi,esi
-        // 0000000F  0F841B000000      jz near 0x30
-        // 00000015  4889F7            mov rdi,rsi
-        // 00000018  4889F0            mov rax,rsi
-        // 0000001B  81E834120000      sub eax,0x1234
-        // 00000021  01F8              add eax,edi
-        // 00000023  85F6              test esi,esi
-        // 00000025  0F8505000000      jnz near 0x30
-        // 0000002B  4889EC            mov rsp,rbp
-        // 0000002E  5D                pop rbp
-        // 0000002F  C3                ret
-        // 00000030  4889F7            mov rdi,rsi    <--- cold block
-        // 00000033  81C734120000      add edi,0x1234
-        // 00000039  85FF              test edi,edi
-        // 0000003B  0F85EFFFFFFF      jnz near 0x30
-        // 00000041  E9D2FFFFFF        jmp 0x18
+        // 00000004  81C734120000      add edi,0x1234
+        // 0000000A  85FF              test edi,edi
+        // 0000000C  0F841C000000      jz near 0x2e
+        // 00000012  4989F8            mov r8,rdi
+        // 00000015  4889F8            mov rax,rdi
+        // 00000018  81E834120000      sub eax,0x1234
+        // 0000001E  4401C0            add eax,r8d
+        // 00000021  85FF              test edi,edi
+        // 00000023  0F8505000000      jnz near 0x2e
+        // 00000029  4889EC            mov rsp,rbp
+        // 0000002C  5D                pop rbp
+        // 0000002D  C3                ret
+        // 0000002E  4989F8            mov r8,rdi
+        // 00000031  4181C034120000    add r8d,0x1234
+        // 00000038  4585C0            test r8d,r8d
+        // 0000003B  0F85EDFFFFFF      jnz near 0x2e
+        // 00000041  E9CFFFFFFF        jmp 0x15
 
         let golden = vec![
-            85, 72, 137, 229, 72, 137, 254, 129, 198, 52, 18, 0, 0, 133, 246, 15, 132, 27, 0, 0, 0,
-            72, 137, 247, 72, 137, 240, 129, 232, 52, 18, 0, 0, 1, 248, 133, 246, 15, 133, 5, 0, 0,
-            0, 72, 137, 236, 93, 195, 72, 137, 247, 129, 199, 52, 18, 0, 0, 133, 255, 15, 133, 239,
-            255, 255, 255, 233, 210, 255, 255, 255,
+            85, 72, 137, 229, 129, 199, 52, 18, 0, 0, 133, 255, 15, 132, 28, 0, 0, 0, 73, 137, 248,
+            72, 137, 248, 129, 232, 52, 18, 0, 0, 68, 1, 192, 133, 255, 15, 133, 5, 0, 0, 0, 72,
+            137, 236, 93, 195, 73, 137, 248, 65, 129, 192, 52, 18, 0, 0, 69, 133, 192, 15, 133,
+            237, 255, 255, 255, 233, 207, 255, 255, 255,
         ];
 
         assert_eq!(code, &golden[..]);
diff --git a/cranelift/codegen/src/lib.rs b/cranelift/codegen/src/lib.rs
index 3d5ca20035..2cd498f9c1 100644
--- a/cranelift/codegen/src/lib.rs
+++ b/cranelift/codegen/src/lib.rs
@@ -101,7 +101,6 @@ mod inst_predicates;
 mod iterators;
 mod legalizer;
 mod licm;
-mod log;
 mod nan_canonicalization;
 mod remove_constant_phis;
 mod result;
diff --git a/cranelift/codegen/src/log.rs b/cranelift/codegen/src/log.rs
deleted file mode 100644
index c5bd59aa58..0000000000
--- a/cranelift/codegen/src/log.rs
+++ /dev/null
@@ -1,39 +0,0 @@
-//! This module implements deferred display helpers.
-//!
-//! These are particularly useful in logging contexts, where the maximum logging level filter might
-//! be enabled, but we don't want the arguments to be evaluated early:
-//!
-//! ```
-//! log::set_max_level(log::LevelFilter::max());
-//! fn expensive_calculation() -> String {
-//!   "a string that is very slow to generate".into()
-//! }
-//! log::debug!("{}", expensive_calculation());
-//! ```
-//!
-//! If the associated log implementation filters out log debug entries, the expensive calculation
-//! would have been spurious. In this case, we can wrap the expensive computation within an
-//! `DeferredDisplay`, so that the computation only happens when the actual `fmt` function is
-//! called.
-
-use core::fmt;
-
-pub(crate) struct DeferredDisplay<F>(F);
-
-impl<F: Fn() -> T, T: fmt::Display> DeferredDisplay<F> {
-    pub(crate) fn new(f: F) -> Self {
-        Self(f)
-    }
-}
-
-impl<F: Fn() -> T, T: fmt::Display> fmt::Display for DeferredDisplay<F> {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        self.0().fmt(f)
-    }
-}
-
-impl<F: Fn() -> T, T: fmt::Debug> fmt::Debug for DeferredDisplay<F> {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        self.0().fmt(f)
-    }
-}
diff --git a/cranelift/codegen/src/machinst/abi.rs b/cranelift/codegen/src/machinst/abi.rs
index d32993d1ce..4d7442b670 100644
--- a/cranelift/codegen/src/machinst/abi.rs
+++ b/cranelift/codegen/src/machinst/abi.rs
@@ -5,7 +5,6 @@ use crate::ir::{Signature, StackSlot};
 use crate::isa::CallConv;
 use crate::machinst::*;
 use crate::settings;
-use regalloc::{Reg, Set, SpillSlot, Writable};
 use smallvec::SmallVec;
 
 /// A small vector of instructions (with some reasonable size); appropriate for
@@ -42,12 +41,6 @@ pub trait ABICallee {
     /// Get the calling convention implemented by this ABI object.
     fn call_conv(&self) -> CallConv;
 
-    /// Get the liveins of the function.
-    fn liveins(&self) -> Set<RealReg>;
-
-    /// Get the liveouts of the function.
-    fn liveouts(&self) -> Set<RealReg>;
-
     /// Number of arguments.
     fn num_args(&self) -> usize;
 
@@ -106,7 +99,7 @@ pub trait ABICallee {
     fn set_num_spillslots(&mut self, slots: usize);
 
     /// Update with the clobbered registers, post-regalloc.
-    fn set_clobbered(&mut self, clobbered: Set<Writable<RealReg>>);
+    fn set_clobbered(&mut self, clobbered: Vec<Writable<RealReg>>);
 
     /// Get the address of a stackslot.
     fn stackslot_addr(&self, slot: StackSlot, offset: u32, into_reg: Writable<Reg>) -> Self::I;
diff --git a/cranelift/codegen/src/machinst/abi_impl.rs b/cranelift/codegen/src/machinst/abi_impl.rs
index 065ee7074c..58648ad9f6 100644
--- a/cranelift/codegen/src/machinst/abi_impl.rs
+++ b/cranelift/codegen/src/machinst/abi_impl.rs
@@ -125,6 +125,7 @@
 
 use super::abi::*;
 use crate::binemit::StackMap;
+use crate::fx::FxHashSet;
 use crate::ir::types::*;
 use crate::ir::{ArgumentExtension, ArgumentPurpose, StackSlot};
 use crate::machinst::*;
@@ -132,7 +133,6 @@ use crate::settings;
 use crate::CodegenResult;
 use crate::{ir, isa};
 use alloc::vec::Vec;
-use regalloc::{RealReg, Reg, RegClass, Set, SpillSlot, Writable};
 use smallvec::{smallvec, SmallVec};
 use std::convert::TryFrom;
 use std::marker::PhantomData;
@@ -257,16 +257,6 @@ pub enum ArgsOrRets {
     Rets,
 }
 
-/// Is an instruction returned by an ABI machine-specific backend a safepoint,
-/// or not?
-#[derive(Clone, Copy, Debug, PartialEq, Eq)]
-pub enum InstIsSafepoint {
-    /// The instruction is a safepoint.
-    Yes,
-    /// The instruction is not a safepoint.
-    No,
-}
-
 /// Abstract location for a machine-specific ABI impl to translate into the
 /// appropriate addressing mode.
 #[derive(Clone, Copy, Debug)]
@@ -319,11 +309,7 @@ pub trait ABIMachineSpec {
 
     /// Returns word register class.
     fn word_reg_class() -> RegClass {
-        match Self::word_bits() {
-            32 => RegClass::I32,
-            64 => RegClass::I64,
-            _ => unreachable!(),
-        }
+        RegClass::Int
     }
 
     /// Returns required stack alignment in bytes.
@@ -366,7 +352,7 @@ pub trait ABIMachineSpec {
     ) -> Self::I;
 
     /// Generate a return instruction.
-    fn gen_ret() -> Self::I;
+    fn gen_ret(rets: Vec<Reg>) -> Self::I;
 
     /// Generate an "epilogue placeholder" instruction, recognized by lowering
     /// when using the Baldrdash ABI.
@@ -442,7 +428,7 @@ pub trait ABIMachineSpec {
     /// contains the registers in a sorted order.
     fn get_clobbered_callee_saves(
         call_conv: isa::CallConv,
-        regs: &Set<Writable<RealReg>>,
+        regs: &[Writable<RealReg>],
     ) -> Vec<Writable<RealReg>>;
 
     /// Determine whether it is necessary to generate the usual frame-setup
@@ -466,7 +452,7 @@ pub trait ABIMachineSpec {
         call_conv: isa::CallConv,
         setup_frame: bool,
         flags: &settings::Flags,
-        clobbered_callee_saves: &Vec<Writable<RealReg>>,
+        clobbered_callee_saves: &[Writable<RealReg>],
         fixed_frame_storage_size: u32,
         outgoing_args_size: u32,
     ) -> (u64, SmallVec<[Self::I; 16]>);
@@ -478,7 +464,7 @@ pub trait ABIMachineSpec {
     fn gen_clobber_restore(
         call_conv: isa::CallConv,
         flags: &settings::Flags,
-        clobbers: &Set<Writable<RealReg>>,
+        clobbers: &[Writable<RealReg>],
         fixed_frame_storage_size: u32,
         outgoing_args_size: u32,
     ) -> SmallVec<[Self::I; 16]>;
@@ -493,7 +479,7 @@ pub trait ABIMachineSpec {
         tmp: Writable<Reg>,
         callee_conv: isa::CallConv,
         callee_conv: isa::CallConv,
-    ) -> SmallVec<[(InstIsSafepoint, Self::I); 2]>;
+    ) -> SmallVec<[Self::I; 2]>;
 
     /// Generate a memcpy invocation. Used to set up struct args. May clobber
     /// caller-save registers; we only memcpy before we start to set up args for
@@ -530,6 +516,7 @@ pub trait ABIMachineSpec {
 }
 
 /// ABI information shared between body (callee) and caller.
+#[derive(Clone)]
 struct ABISig {
     /// Argument locations (regs or stack slots). Stack offsets are relative to
     /// SP on entry to function.
@@ -604,7 +591,7 @@ pub struct ABICalleeImpl<M: ABIMachineSpec> {
     /// Stack size to be reserved for outgoing arguments.
     outgoing_args_size: u32,
     /// Clobbered registers, from regalloc.
-    clobbered: Set<Writable<RealReg>>,
+    clobbered: Vec<Writable<RealReg>>,
     /// Total number of spillslots, from regalloc.
     spillslots: Option<usize>,
     /// Storage allocated for the fixed part of the stack frame.  This is
@@ -655,24 +642,13 @@ fn get_special_purpose_param_register(
     let idx = f.signature.special_param_index(purpose)?;
     match &abi.args[idx] {
         &ABIArg::Slots { ref slots, .. } => match &slots[0] {
-            &ABIArgSlot::Reg { reg, .. } => Some(reg.to_reg()),
+            &ABIArgSlot::Reg { reg, .. } => Some(reg.into()),
             _ => None,
         },
         _ => None,
     }
 }
 
-fn ty_from_class(class: RegClass) -> Type {
-    match class {
-        RegClass::I32 => I32,
-        RegClass::I64 => I64,
-        RegClass::F32 => F32,
-        RegClass::F64 => F64,
-        RegClass::V128 => I8X16,
-        _ => panic!("Unknown regclass: {:?}", class),
-    }
-}
-
 impl<M: ABIMachineSpec> ABICalleeImpl<M> {
     /// Create a new body ABI instance.
     pub fn new(
@@ -739,7 +715,7 @@ impl<M: ABIMachineSpec> ABICalleeImpl<M> {
             stackslots,
             stackslots_size: stack_offset,
             outgoing_args_size: 0,
-            clobbered: Set::empty(),
+            clobbered: vec![],
             spillslots: None,
             fixed_frame_storage_size: 0,
             total_frame_size: None,
@@ -961,34 +937,6 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
         self.sig.call_conv
     }
 
-    fn liveins(&self) -> Set<RealReg> {
-        let mut set: Set<RealReg> = Set::empty();
-        for arg in &self.sig.args {
-            if let &ABIArg::Slots { ref slots, .. } = arg {
-                for slot in slots {
-                    if let ABIArgSlot::Reg { reg, .. } = slot {
-                        set.insert(*reg);
-                    }
-                }
-            }
-        }
-        set
-    }
-
-    fn liveouts(&self) -> Set<RealReg> {
-        let mut set: Set<RealReg> = Set::empty();
-        for ret in &self.sig.rets {
-            if let &ABIArg::Slots { ref slots, .. } = ret {
-                for slot in slots {
-                    if let ABIArgSlot::Reg { reg, .. } = slot {
-                        set.insert(*reg);
-                    }
-                }
-            }
-        }
-        set
-    }
-
     fn num_args(&self) -> usize {
         self.sig.args.len()
     }
@@ -1019,7 +967,7 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
                         // Extension mode doesn't matter (we're copying out, not in; we
                         // ignore high bits by convention).
                         &ABIArgSlot::Reg { reg, ty, .. } => {
-                            insts.push(M::gen_move(*into_reg, reg.to_reg(), ty));
+                            insts.push(M::gen_move(*into_reg, reg.into(), ty));
                         }
                         &ABIArgSlot::Stack { offset, ty, .. } => {
                             insts.push(M::gen_load_stack(
@@ -1069,20 +1017,21 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
         match &self.sig.rets[idx] {
             &ABIArg::Slots { ref slots, .. } => {
                 assert_eq!(from_regs.len(), slots.len());
-                for (slot, from_reg) in slots.iter().zip(from_regs.regs().iter()) {
+                for (slot, &from_reg) in slots.iter().zip(from_regs.regs().iter()) {
                     match slot {
                         &ABIArgSlot::Reg {
                             reg, ty, extension, ..
                         } => {
                             let from_bits = ty_bits(ty) as u8;
                             let ext = M::get_ext_mode(self.sig.call_conv, extension);
+                            let reg: Writable<Reg> = Writable::from_reg(Reg::from(reg));
                             match (ext, from_bits) {
                                 (ArgumentExtension::Uext, n) | (ArgumentExtension::Sext, n)
                                     if n < word_bits =>
                                 {
                                     let signed = ext == ArgumentExtension::Sext;
                                     ret.push(M::gen_extend(
-                                        Writable::from_reg(reg.to_reg()),
+                                        reg,
                                         from_reg.to_reg(),
                                         signed,
                                         from_bits,
@@ -1090,11 +1039,7 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
                                     ));
                                 }
                                 _ => {
-                                    ret.push(M::gen_move(
-                                        Writable::from_reg(reg.to_reg()),
-                                        from_reg.to_reg(),
-                                        ty,
-                                    ));
+                                    ret.push(M::gen_move(reg, from_reg.to_reg(), ty));
                                 }
                             };
                         }
@@ -1118,7 +1063,7 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
                                 (ArgumentExtension::Uext, n) | (ArgumentExtension::Sext, n)
                                     if n < word_bits =>
                                 {
-                                    assert_eq!(M::word_reg_class(), from_reg.to_reg().get_class());
+                                    assert_eq!(M::word_reg_class(), from_reg.to_reg().class());
                                     let signed = ext == ArgumentExtension::Sext;
                                     ret.push(M::gen_extend(
                                         Writable::from_reg(from_reg.to_reg()),
@@ -1166,7 +1111,22 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
     }
 
     fn gen_ret(&self) -> Self::I {
-        M::gen_ret()
+        let mut rets = vec![];
+        for ret in &self.sig.rets {
+            match ret {
+                ABIArg::Slots { slots, .. } => {
+                    for slot in slots {
+                        match slot {
+                            ABIArgSlot::Reg { reg, .. } => rets.push(Reg::from(*reg)),
+                            _ => {}
+                        }
+                    }
+                }
+                _ => {}
+            }
+        }
+
+        M::gen_ret(rets)
     }
 
     fn gen_epilogue_placeholder(&self) -> Self::I {
@@ -1177,7 +1137,7 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
         self.spillslots = Some(slots);
     }
 
-    fn set_clobbered(&mut self, clobbered: Set<Writable<RealReg>>) {
+    fn set_clobbered(&mut self, clobbered: Vec<Writable<RealReg>>) {
         self.clobbered = clobbered;
     }
 
@@ -1198,7 +1158,7 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
         into_regs: ValueRegs<Writable<Reg>>,
     ) -> SmallInstVec<Self::I> {
         // Offset from beginning of spillslot area, which is at nominal SP + stackslots_size.
-        let islot = slot.get() as i64;
+        let islot = slot.index() as i64;
         let spill_off = islot * M::word_bytes() as i64;
         let sp_off = self.stackslots_size as i64 + spill_off;
         log::trace!("load_spillslot: slot {:?} -> sp_off {}", slot, sp_off);
@@ -1214,7 +1174,7 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
         from_regs: ValueRegs<Reg>,
     ) -> SmallInstVec<Self::I> {
         // Offset from beginning of spillslot area, which is at nominal SP + stackslots_size.
-        let islot = slot.get() as i64;
+        let islot = slot.index() as i64;
         let spill_off = islot * M::word_bytes() as i64;
         let sp_off = self.stackslots_size as i64 + spill_off;
         log::trace!("store_spillslot: slot {:?} -> sp_off {}", slot, sp_off);
@@ -1245,7 +1205,7 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
         let first_spillslot_word =
             ((self.stackslots_size + virtual_sp_offset as u32) / bytes) as usize;
         for &slot in slots {
-            let slot = slot.get() as usize;
+            let slot = slot.index();
             bits[first_spillslot_word + slot] = true;
         }
 
@@ -1347,7 +1307,10 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
                 insts.extend(M::gen_epilogue_frame_restore(&self.flags));
             }
 
-            insts.push(M::gen_ret());
+            // This `ret` doesn't need any return registers attached
+            // because we are post-regalloc and don't need to
+            // represent the implicit uses anymore.
+            insts.push(M::gen_ret(vec![]));
         }
 
         log::trace!("Epilogue: {:?}", insts);
@@ -1368,19 +1331,19 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
     }
 
     fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg) -> Self::I {
-        let ty = ty_from_class(from_reg.to_reg().get_class());
-        self.store_spillslot(to_slot, ty, ValueRegs::one(from_reg.to_reg()))
+        let ty = Self::I::canonical_type_for_rc(Reg::from(from_reg).class());
+        self.store_spillslot(to_slot, ty, ValueRegs::one(Reg::from(from_reg)))
             .into_iter()
             .next()
             .unwrap()
     }
 
     fn gen_reload(&self, to_reg: Writable<RealReg>, from_slot: SpillSlot) -> Self::I {
-        let ty = ty_from_class(to_reg.to_reg().get_class());
+        let ty = Self::I::canonical_type_for_rc(to_reg.to_reg().class());
         self.load_spillslot(
             from_slot,
             ty,
-            writable_value_regs(ValueRegs::one(to_reg.to_reg().to_reg())),
+            writable_value_regs(ValueRegs::one(Reg::from(to_reg.to_reg()))),
         )
         .into_iter()
         .next()
@@ -1390,13 +1353,13 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
 
 fn abisig_to_uses_and_defs<M: ABIMachineSpec>(sig: &ABISig) -> (Vec<Reg>, Vec<Writable<Reg>>) {
     // Compute uses: all arg regs.
-    let mut uses = Vec::new();
+    let mut uses = FxHashSet::default();
     for arg in &sig.args {
         if let &ABIArg::Slots { ref slots, .. } = arg {
             for slot in slots {
                 match slot {
                     &ABIArgSlot::Reg { reg, .. } => {
-                        uses.push(reg.to_reg());
+                        uses.insert(Reg::from(reg));
                     }
                     _ => {}
                 }
@@ -1405,13 +1368,15 @@ fn abisig_to_uses_and_defs<M: ABIMachineSpec>(sig: &ABISig) -> (Vec<Reg>, Vec<Wr
     }
 
     // Compute defs: all retval regs, and all caller-save (clobbered) regs.
-    let mut defs = M::get_regs_clobbered_by_call(sig.call_conv);
+    let mut defs: FxHashSet<_> = M::get_regs_clobbered_by_call(sig.call_conv)
+        .into_iter()
+        .collect();
     for ret in &sig.rets {
         if let &ABIArg::Slots { ref slots, .. } = ret {
             for slot in slots {
                 match slot {
                     &ABIArgSlot::Reg { reg, .. } => {
-                        defs.push(Writable::from_reg(reg.to_reg()));
+                        defs.insert(Writable::from_reg(Reg::from(reg)));
                     }
                     _ => {}
                 }
@@ -1419,6 +1384,11 @@ fn abisig_to_uses_and_defs<M: ABIMachineSpec>(sig: &ABISig) -> (Vec<Reg>, Vec<Wr
         }
     }
 
+    let mut uses = uses.into_iter().collect::<Vec<_>>();
+    let mut defs = defs.into_iter().collect::<Vec<_>>();
+    uses.sort_unstable();
+    defs.sort_unstable();
+
     (uses, defs)
 }
 
@@ -1567,14 +1537,14 @@ impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
                         } => {
                             let ext = M::get_ext_mode(self.sig.call_conv, extension);
                             if ext != ir::ArgumentExtension::None && ty_bits(ty) < word_bits {
-                                assert_eq!(word_rc, reg.get_class());
+                                assert_eq!(word_rc, reg.class());
                                 let signed = match ext {
                                     ir::ArgumentExtension::Uext => false,
                                     ir::ArgumentExtension::Sext => true,
                                     _ => unreachable!(),
                                 };
                                 ctx.emit(M::gen_extend(
-                                    Writable::from_reg(reg.to_reg()),
+                                    Writable::from_reg(Reg::from(reg)),
                                     *from_reg,
                                     signed,
                                     ty_bits(ty) as u8,
@@ -1582,7 +1552,7 @@ impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
                                 ));
                             } else {
                                 ctx.emit(M::gen_move(
-                                    Writable::from_reg(reg.to_reg()),
+                                    Writable::from_reg(Reg::from(reg)),
                                     *from_reg,
                                     ty,
                                 ));
@@ -1597,7 +1567,7 @@ impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
                             let mut ty = ty;
                             let ext = M::get_ext_mode(self.sig.call_conv, extension);
                             if ext != ir::ArgumentExtension::None && ty_bits(ty) < word_bits {
-                                assert_eq!(word_rc, from_reg.get_class());
+                                assert_eq!(word_rc, from_reg.class());
                                 let signed = match ext {
                                     ir::ArgumentExtension::Uext => false,
                                     ir::ArgumentExtension::Sext => true,
@@ -1680,7 +1650,7 @@ impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
                         // Extension mode doesn't matter because we're copying out, not in,
                         // and we ignore high bits in our own registers by convention.
                         &ABIArgSlot::Reg { reg, ty, .. } => {
-                            ctx.emit(M::gen_move(*into_reg, reg.to_reg(), ty));
+                            ctx.emit(M::gen_move(*into_reg, Reg::from(reg), ty));
                         }
                         &ABIArgSlot::Stack { offset, ty, .. } => {
                             let ret_area_base = self.sig.stack_arg_space;
@@ -1716,7 +1686,7 @@ impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
             self.emit_copy_regs_to_arg(ctx, i, ValueRegs::one(rd.to_reg()));
         }
         let tmp = ctx.alloc_tmp(word_type).only_reg().unwrap();
-        for (is_safepoint, inst) in M::gen_call(
+        for inst in M::gen_call(
             &self.dest,
             uses,
             defs,
@@ -1727,10 +1697,7 @@ impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
         )
         .into_iter()
         {
-            match is_safepoint {
-                InstIsSafepoint::Yes => ctx.emit_safepoint(inst),
-                InstIsSafepoint::No => ctx.emit(inst),
-            }
+            ctx.emit(inst);
         }
     }
 }
diff --git a/cranelift/codegen/src/machinst/blockorder.rs b/cranelift/codegen/src/machinst/blockorder.rs
index 5c805c4029..718abcd229 100644
--- a/cranelift/codegen/src/machinst/blockorder.rs
+++ b/cranelift/codegen/src/machinst/blockorder.rs
@@ -127,6 +127,9 @@ pub enum LoweredBlock {
         /// to the next, i.e., corresponding to the included edge-block. This
         /// will be an instruction in `block`.
         edge_inst: Inst,
+        /// The successor index in this edge, to distinguish multiple
+        /// edges between the same block pair.
+        succ_idx: usize,
         /// The successor CLIF block.
         succ: Block,
     },
@@ -138,6 +141,9 @@ pub enum LoweredBlock {
         /// The edge (jump) instruction corresponding to the included
         /// edge-block. This will be an instruction in `pred`.
         edge_inst: Inst,
+        /// The successor index in this edge, to distinguish multiple
+        /// edges between the same block pair.
+        succ_idx: usize,
         /// The original CLIF block included in this lowered block.
         block: Block,
     },
@@ -150,6 +156,9 @@ pub enum LoweredBlock {
         /// The edge (jump) instruction corresponding to this edge's transition.
         /// This will be an instruction in `pred`.
         edge_inst: Inst,
+        /// The successor index in this edge, to distinguish multiple
+        /// edges between the same block pair.
+        succ_idx: usize,
         /// The successor CLIF block.
         succ: Block,
     },
@@ -168,29 +177,34 @@ impl LoweredBlock {
     }
 
     /// The associated in-edge, if any.
+    #[cfg(test)]
     pub fn in_edge(self) -> Option<(Block, Inst, Block)> {
         match self {
             LoweredBlock::EdgeAndOrig {
                 pred,
                 edge_inst,
                 block,
+                ..
             } => Some((pred, edge_inst, block)),
             _ => None,
         }
     }
 
     /// the associated out-edge, if any. Also includes edge-only blocks.
+    #[cfg(test)]
     pub fn out_edge(self) -> Option<(Block, Inst, Block)> {
         match self {
             LoweredBlock::OrigAndEdge {
                 block,
                 edge_inst,
                 succ,
+                ..
             } => Some((block, edge_inst, succ)),
             LoweredBlock::Edge {
                 pred,
                 edge_inst,
                 succ,
+                ..
             } => Some((pred, edge_inst, succ)),
             _ => None,
         }
@@ -207,15 +221,17 @@ impl BlockLoweringOrder {
         let mut block_out_count = SecondaryMap::with_default(0);
 
         // Cache the block successors to avoid re-examining branches below.
-        let mut block_succs: SmallVec<[(Inst, Block); 128]> = SmallVec::new();
+        let mut block_succs: SmallVec<[(Inst, usize, Block); 128]> = SmallVec::new();
         let mut block_succ_range = SecondaryMap::with_default((0, 0));
         let mut fallthrough_return_block = None;
         for block in f.layout.blocks() {
             let block_succ_start = block_succs.len();
+            let mut succ_idx = 0;
             visit_block_succs(f, block, |inst, succ| {
                 block_out_count[block] += 1;
                 block_in_count[succ] += 1;
-                block_succs.push((inst, succ));
+                block_succs.push((inst, succ_idx, succ));
+                succ_idx += 1;
             });
             let block_succ_end = block_succs.len();
             block_succ_range[block] = (block_succ_start, block_succ_end);
@@ -262,13 +278,14 @@ impl BlockLoweringOrder {
                     // At an orig block; successors are always edge blocks,
                     // possibly with orig blocks following.
                     let range = block_succ_range[block];
-                    for &(edge_inst, succ) in &block_succs[range.0..range.1] {
+                    for &(edge_inst, succ_idx, succ) in &block_succs[range.0..range.1] {
                         if block_in_count[succ] == 1 {
                             ret.push((
                                 edge_inst,
                                 LoweredBlock::EdgeAndOrig {
                                     pred: block,
                                     edge_inst,
+                                    succ_idx,
                                     block: succ,
                                 },
                             ));
@@ -278,6 +295,7 @@ impl BlockLoweringOrder {
                                 LoweredBlock::Edge {
                                     pred: block,
                                     edge_inst,
+                                    succ_idx,
                                     succ,
                                 },
                             ));
@@ -298,12 +316,13 @@ impl BlockLoweringOrder {
                         // implicit return succ).
                         if range.1 - range.0 > 0 {
                             debug_assert!(range.1 - range.0 == 1);
-                            let (succ_edge_inst, succ_succ) = block_succs[range.0];
+                            let (succ_edge_inst, succ_succ_idx, succ_succ) = block_succs[range.0];
                             ret.push((
                                 edge_inst,
                                 LoweredBlock::OrigAndEdge {
                                     block: succ,
                                     edge_inst: succ_edge_inst,
+                                    succ_idx: succ_succ_idx,
                                     succ: succ_succ,
                                 },
                             ));
@@ -395,7 +414,7 @@ impl BlockLoweringOrder {
         let mut lowered_succ_ranges = vec![];
         let mut lb_to_bindex = FxHashMap::default();
         for (block, succ_range) in rpo.into_iter() {
-            let index = lowered_order.len() as BlockIndex;
+            let index = BlockIndex::new(lowered_order.len());
             lb_to_bindex.insert(block, index);
             lowered_order.push(block);
             lowered_succ_ranges.push(succ_range);
@@ -416,7 +435,7 @@ impl BlockLoweringOrder {
 
         let mut orig_map = SecondaryMap::with_default(None);
         for (i, lb) in lowered_order.iter().enumerate() {
-            let i = i as BlockIndex;
+            let i = BlockIndex::new(i);
             if let Some(b) = lb.orig_block() {
                 orig_map[b] = Some(i);
             }
@@ -441,7 +460,7 @@ impl BlockLoweringOrder {
 
     /// Get the successor indices for a lowered block.
     pub fn succ_indices(&self, block: BlockIndex) -> &[(Inst, BlockIndex)] {
-        let range = self.lowered_succ_ranges[block as usize];
+        let range = self.lowered_succ_ranges[block.index()];
         &self.lowered_succ_indices[range.0..range.1]
     }
 
diff --git a/cranelift/codegen/src/machinst/buffer.rs b/cranelift/codegen/src/machinst/buffer.rs
index ffc96c1abb..7802b44e72 100644
--- a/cranelift/codegen/src/machinst/buffer.rs
+++ b/cranelift/codegen/src/machinst/buffer.rs
@@ -269,7 +269,7 @@ impl MachLabel {
     /// Get a label for a block. (The first N MachLabels are always reseved for
     /// the N blocks in the vcode.)
     pub fn from_block(bindex: BlockIndex) -> MachLabel {
-        MachLabel(bindex)
+        MachLabel(bindex.index() as u32)
     }
 
     /// Get the numeric label index.
@@ -334,7 +334,7 @@ impl<I: VCodeInst> MachBuffer<I> {
     /// times, e.g. after calling `add_{cond,uncond}_branch()` and
     /// before emitting branch bytes.
     fn check_label_branch_invariants(&self) {
-        if !cfg!(debug_assertions) || cfg!(fuzzing) {
+        if !cfg!(fuzzing) {
             return;
         }
         let cur_off = self.cur_offset();
@@ -489,12 +489,11 @@ impl<I: VCodeInst> MachBuffer<I> {
     }
 
     /// Reserve the first N MachLabels for blocks.
-    pub fn reserve_labels_for_blocks(&mut self, blocks: BlockIndex) {
+    pub fn reserve_labels_for_blocks(&mut self, blocks: usize) {
         trace!("MachBuffer: first {} labels are for blocks", blocks);
         debug_assert!(self.label_offsets.is_empty());
-        self.label_offsets
-            .resize(blocks as usize, UNKNOWN_LABEL_OFFSET);
-        self.label_aliases.resize(blocks as usize, UNKNOWN_LABEL);
+        self.label_offsets.resize(blocks, UNKNOWN_LABEL_OFFSET);
+        self.label_aliases.resize(blocks, UNKNOWN_LABEL);
 
         // Post-invariant: as for `get_label()`.
     }
@@ -1599,14 +1598,14 @@ impl MachBranch {
 /// resolving labels internally in the buffer.
 pub struct MachTextSectionBuilder<I: VCodeInst> {
     buf: MachBuffer<I>,
-    next_func: u32,
+    next_func: usize,
     force_veneers: bool,
 }
 
 impl<I: VCodeInst> MachTextSectionBuilder<I> {
     pub fn new(num_funcs: u32) -> MachTextSectionBuilder<I> {
         let mut buf = MachBuffer::new();
-        buf.reserve_labels_for_blocks(num_funcs);
+        buf.reserve_labels_for_blocks(num_funcs as usize);
         MachTextSectionBuilder {
             buf,
             next_func: 0,
@@ -1627,7 +1626,8 @@ impl<I: VCodeInst> TextSectionBuilder for MachTextSectionBuilder<I> {
         self.buf.align_to(align.unwrap_or(I::LabelUse::ALIGN));
         let pos = self.buf.cur_offset();
         if named {
-            self.buf.bind_label(MachLabel::from_block(self.next_func));
+            self.buf
+                .bind_label(MachLabel::from_block(BlockIndex::new(self.next_func)));
             self.next_func += 1;
         }
         self.buf.put_data(func);
@@ -1635,7 +1635,7 @@ impl<I: VCodeInst> TextSectionBuilder for MachTextSectionBuilder<I> {
     }
 
     fn resolve_reloc(&mut self, offset: u64, reloc: Reloc, addend: Addend, target: u32) -> bool {
-        let label = MachLabel::from_block(target);
+        let label = MachLabel::from_block(BlockIndex::new(target as usize));
         let offset = u32::try_from(offset).unwrap();
         match I::LabelUse::from_reloc(reloc, addend) {
             Some(label_use) => {
@@ -1652,7 +1652,7 @@ impl<I: VCodeInst> TextSectionBuilder for MachTextSectionBuilder<I> {
 
     fn finish(&mut self) -> Vec<u8> {
         // Double-check all functions were pushed.
-        assert_eq!(self.next_func, self.buf.label_offsets.len() as u32);
+        assert_eq!(self.next_func, self.buf.label_offsets.len());
 
         // Finish up any veneers, if necessary.
         self.buf
@@ -1675,7 +1675,7 @@ mod test {
     use std::vec::Vec;
 
     fn label(n: u32) -> MachLabel {
-        MachLabel::from_block(n)
+        MachLabel::from_block(BlockIndex::new(n as usize))
     }
     fn target(n: u32) -> BranchTarget {
         BranchTarget::Label(label(n))
@@ -1690,7 +1690,7 @@ mod test {
         buf.reserve_labels_for_blocks(2);
         buf.bind_label(label(0));
         let inst = Inst::Jump { dest: target(1) };
-        inst.emit(&mut buf, &info, &mut state);
+        inst.emit(&[], &mut buf, &info, &mut state);
         buf.bind_label(label(1));
         let buf = buf.finish();
         assert_eq!(0, buf.total_size());
@@ -1710,15 +1710,15 @@ mod test {
             taken: target(1),
             not_taken: target(2),
         };
-        inst.emit(&mut buf, &info, &mut state);
+        inst.emit(&[], &mut buf, &info, &mut state);
 
         buf.bind_label(label(1));
         let inst = Inst::Jump { dest: target(3) };
-        inst.emit(&mut buf, &info, &mut state);
+        inst.emit(&[], &mut buf, &info, &mut state);
 
         buf.bind_label(label(2));
         let inst = Inst::Jump { dest: target(3) };
-        inst.emit(&mut buf, &info, &mut state);
+        inst.emit(&[], &mut buf, &info, &mut state);
 
         buf.bind_label(label(3));
 
@@ -1740,17 +1740,17 @@ mod test {
             taken: target(1),
             not_taken: target(2),
         };
-        inst.emit(&mut buf, &info, &mut state);
+        inst.emit(&[], &mut buf, &info, &mut state);
 
         buf.bind_label(label(1));
         let inst = Inst::Udf {
             trap_code: TrapCode::Interrupt,
         };
-        inst.emit(&mut buf, &info, &mut state);
+        inst.emit(&[], &mut buf, &info, &mut state);
 
         buf.bind_label(label(2));
         let inst = Inst::Nop4;
-        inst.emit(&mut buf, &info, &mut state);
+        inst.emit(&[], &mut buf, &info, &mut state);
 
         buf.bind_label(label(3));
 
@@ -1762,9 +1762,9 @@ mod test {
             kind: CondBrKind::NotZero(xreg(0)),
             trap_code: TrapCode::Interrupt,
         };
-        inst.emit(&mut buf2, &info, &mut state);
+        inst.emit(&[], &mut buf2, &info, &mut state);
         let inst = Inst::Nop4;
-        inst.emit(&mut buf2, &info, &mut state);
+        inst.emit(&[], &mut buf2, &info, &mut state);
 
         let buf2 = buf2.finish();
 
@@ -1785,7 +1785,7 @@ mod test {
             taken: target(2),
             not_taken: target(3),
         };
-        inst.emit(&mut buf, &info, &mut state);
+        inst.emit(&[], &mut buf, &info, &mut state);
 
         buf.bind_label(label(1));
         while buf.cur_offset() < 2000000 {
@@ -1793,16 +1793,16 @@ mod test {
                 buf.emit_island(0);
             }
             let inst = Inst::Nop4;
-            inst.emit(&mut buf, &info, &mut state);
+            inst.emit(&[], &mut buf, &info, &mut state);
         }
 
         buf.bind_label(label(2));
         let inst = Inst::Nop4;
-        inst.emit(&mut buf, &info, &mut state);
+        inst.emit(&[], &mut buf, &info, &mut state);
 
         buf.bind_label(label(3));
         let inst = Inst::Nop4;
-        inst.emit(&mut buf, &info, &mut state);
+        inst.emit(&[], &mut buf, &info, &mut state);
 
         let buf = buf.finish();
 
@@ -1831,7 +1831,7 @@ mod test {
             // go directly to the target.
             not_taken: BranchTarget::ResolvedOffset(2000000 + 4 - 4),
         };
-        inst.emit(&mut buf2, &info, &mut state);
+        inst.emit(&[], &mut buf2, &info, &mut state);
 
         let buf2 = buf2.finish();
 
@@ -1848,16 +1848,16 @@ mod test {
 
         buf.bind_label(label(0));
         let inst = Inst::Nop4;
-        inst.emit(&mut buf, &info, &mut state);
+        inst.emit(&[], &mut buf, &info, &mut state);
 
         buf.bind_label(label(1));
         let inst = Inst::Nop4;
-        inst.emit(&mut buf, &info, &mut state);
+        inst.emit(&[], &mut buf, &info, &mut state);
 
         buf.bind_label(label(2));
         while buf.cur_offset() < 2000000 {
             let inst = Inst::Nop4;
-            inst.emit(&mut buf, &info, &mut state);
+            inst.emit(&[], &mut buf, &info, &mut state);
         }
 
         buf.bind_label(label(3));
@@ -1866,7 +1866,7 @@ mod test {
             taken: target(0),
             not_taken: target(1),
         };
-        inst.emit(&mut buf, &info, &mut state);
+        inst.emit(&[], &mut buf, &info, &mut state);
 
         let buf = buf.finish();
 
@@ -1879,11 +1879,11 @@ mod test {
             taken: BranchTarget::ResolvedOffset(8),
             not_taken: BranchTarget::ResolvedOffset(4 - (2000000 + 4)),
         };
-        inst.emit(&mut buf2, &info, &mut state);
+        inst.emit(&[], &mut buf2, &info, &mut state);
         let inst = Inst::Jump {
             dest: BranchTarget::ResolvedOffset(-(2000000 + 8)),
         };
-        inst.emit(&mut buf2, &info, &mut state);
+        inst.emit(&[], &mut buf2, &info, &mut state);
 
         let buf2 = buf2.finish();
 
@@ -1937,38 +1937,38 @@ mod test {
             taken: target(1),
             not_taken: target(2),
         };
-        inst.emit(&mut buf, &info, &mut state);
+        inst.emit(&[], &mut buf, &info, &mut state);
 
         buf.bind_label(label(1));
         let inst = Inst::Jump { dest: target(3) };
-        inst.emit(&mut buf, &info, &mut state);
+        inst.emit(&[], &mut buf, &info, &mut state);
 
         buf.bind_label(label(2));
         let inst = Inst::Nop4;
-        inst.emit(&mut buf, &info, &mut state);
-        inst.emit(&mut buf, &info, &mut state);
+        inst.emit(&[], &mut buf, &info, &mut state);
+        inst.emit(&[], &mut buf, &info, &mut state);
         let inst = Inst::Jump { dest: target(0) };
-        inst.emit(&mut buf, &info, &mut state);
+        inst.emit(&[], &mut buf, &info, &mut state);
 
         buf.bind_label(label(3));
         let inst = Inst::Jump { dest: target(4) };
-        inst.emit(&mut buf, &info, &mut state);
+        inst.emit(&[], &mut buf, &info, &mut state);
 
         buf.bind_label(label(4));
         let inst = Inst::Jump { dest: target(5) };
-        inst.emit(&mut buf, &info, &mut state);
+        inst.emit(&[], &mut buf, &info, &mut state);
 
         buf.bind_label(label(5));
         let inst = Inst::Jump { dest: target(7) };
-        inst.emit(&mut buf, &info, &mut state);
+        inst.emit(&[], &mut buf, &info, &mut state);
 
         buf.bind_label(label(6));
         let inst = Inst::Nop4;
-        inst.emit(&mut buf, &info, &mut state);
+        inst.emit(&[], &mut buf, &info, &mut state);
 
         buf.bind_label(label(7));
-        let inst = Inst::Ret;
-        inst.emit(&mut buf, &info, &mut state);
+        let inst = Inst::Ret { rets: vec![] };
+        inst.emit(&[], &mut buf, &info, &mut state);
 
         let buf = buf.finish();
 
@@ -2009,23 +2009,23 @@ mod test {
 
         buf.bind_label(label(0));
         let inst = Inst::Jump { dest: target(1) };
-        inst.emit(&mut buf, &info, &mut state);
+        inst.emit(&[], &mut buf, &info, &mut state);
 
         buf.bind_label(label(1));
         let inst = Inst::Jump { dest: target(2) };
-        inst.emit(&mut buf, &info, &mut state);
+        inst.emit(&[], &mut buf, &info, &mut state);
 
         buf.bind_label(label(2));
         let inst = Inst::Jump { dest: target(3) };
-        inst.emit(&mut buf, &info, &mut state);
+        inst.emit(&[], &mut buf, &info, &mut state);
 
         buf.bind_label(label(3));
         let inst = Inst::Jump { dest: target(4) };
-        inst.emit(&mut buf, &info, &mut state);
+        inst.emit(&[], &mut buf, &info, &mut state);
 
         buf.bind_label(label(4));
         let inst = Inst::Jump { dest: target(1) };
-        inst.emit(&mut buf, &info, &mut state);
+        inst.emit(&[], &mut buf, &info, &mut state);
 
         let buf = buf.finish();
 
diff --git a/cranelift/codegen/src/machinst/compile.rs b/cranelift/codegen/src/machinst/compile.rs
index 67593d940a..1bdfe24b52 100644
--- a/cranelift/codegen/src/machinst/compile.rs
+++ b/cranelift/codegen/src/machinst/compile.rs
@@ -2,12 +2,11 @@
 
 use crate::ir::Function;
 use crate::isa::TargetIsa;
-use crate::log::DeferredDisplay;
 use crate::machinst::*;
-use crate::settings;
 use crate::timing;
 
-use regalloc::{allocate_registers_with_opts, Algorithm, Options, PrettyPrint};
+use regalloc2::RegallocOptions;
+use regalloc2::{self, MachineEnv};
 
 /// Compile the given function down to VCode with allocated registers, ready
 /// for binary emission.
@@ -15,100 +14,38 @@ pub fn compile<B: LowerBackend + TargetIsa>(
     f: &Function,
     b: &B,
     abi: Box<dyn ABICallee<I = B::MInst>>,
-    reg_universe: &RealRegUniverse,
+    machine_env: &MachineEnv,
     emit_info: <B::MInst as MachInstEmit>::Info,
-) -> CodegenResult<VCode<B::MInst>>
-where
-    B::MInst: PrettyPrint,
-{
+) -> CodegenResult<(VCode<B::MInst>, regalloc2::Output)> {
     // Compute lowered block order.
     let block_order = BlockLoweringOrder::new(f);
     // Build the lowering context.
     let lower = Lower::new(f, abi, emit_info, block_order)?;
     // Lower the IR.
-    let (mut vcode, stack_map_request_info) = {
+    let vcode = {
         let _tt = timing::vcode_lower();
         lower.lower(b)?
     };
 
-    // Creating the vcode string representation may be costly for large functions, so defer its
-    // rendering.
-    log::trace!(
-        "vcode from lowering: \n{}",
-        DeferredDisplay::new(|| vcode.show_rru(Some(reg_universe)))
-    );
+    log::trace!("vcode from lowering: \n{:?}", vcode);
 
     // Perform register allocation.
-    let (run_checker, algorithm) = match vcode.flags().regalloc() {
-        settings::Regalloc::Backtracking => (false, Algorithm::Backtracking(Default::default())),
-        settings::Regalloc::BacktrackingChecked => {
-            (true, Algorithm::Backtracking(Default::default()))
-        }
-        settings::Regalloc::ExperimentalLinearScan => {
-            (false, Algorithm::LinearScan(Default::default()))
-        }
-        settings::Regalloc::ExperimentalLinearScanChecked => {
-            (true, Algorithm::LinearScan(Default::default()))
-        }
-    };
-
-    #[cfg(feature = "regalloc-snapshot")]
-    {
-        use std::fs;
-        use std::path::Path;
-        if let Some(path) = std::env::var("SERIALIZE_REGALLOC").ok() {
-            let snapshot = regalloc::IRSnapshot::from_function(&vcode, reg_universe);
-            let serialized = bincode::serialize(&snapshot).expect("couldn't serialize snapshot");
-
-            let file_path = Path::new(&path).join(Path::new(&format!("ir{}.bin", f.name)));
-            fs::write(file_path, &serialized).expect("couldn't write IR snapshot file");
-        }
-    }
-
-    // If either there are no reference-typed values, or else there are
-    // but there are no safepoints at which we need to know about them,
-    // then we don't need stack maps.
-    let sri = if stack_map_request_info.reftyped_vregs.len() > 0
-        && stack_map_request_info.safepoint_insns.len() > 0
-    {
-        Some(&stack_map_request_info)
-    } else {
-        None
-    };
-
-    let result = {
+    let regalloc_result = {
         let _tt = timing::regalloc();
-        allocate_registers_with_opts(
-            &mut vcode,
-            reg_universe,
-            sri,
-            Options {
-                run_checker,
-                algorithm,
-            },
-        )
-        .map_err(|err| {
-            log::error!(
-                "Register allocation error for vcode\n{}\nError: {:?}",
-                vcode.show_rru(Some(reg_universe)),
+        let mut options = RegallocOptions::default();
+        options.verbose_log = log::log_enabled!(log::Level::Trace);
+        regalloc2::run(&vcode, machine_env, &options)
+            .map_err(|err| {
+                log::error!(
+                    "Register allocation error for vcode\n{:?}\nError: {:?}\nCLIF for error:\n{:?}",
+                    vcode,
+                    err,
+                    f,
+                );
                 err
-            );
-            err
-        })
-        .expect("register allocation")
+            })
+            .expect("register allocation")
     };
 
-    // Reorder vcode into final order and copy out final instruction sequence
-    // all at once. This also inserts prologues/epilogues.
-    {
-        let _tt = timing::vcode_post_ra();
-        vcode.replace_insns_from_regalloc(result);
-    }
-
-    log::trace!(
-        "vcode after regalloc: final version:\n{}",
-        DeferredDisplay::new(|| vcode.show_rru(Some(reg_universe)))
-    );
-
-    Ok(vcode)
+    Ok((vcode, regalloc_result))
 }
diff --git a/cranelift/codegen/src/machinst/debug.rs b/cranelift/codegen/src/machinst/debug.rs
deleted file mode 100644
index e2eafe50b4..0000000000
--- a/cranelift/codegen/src/machinst/debug.rs
+++ /dev/null
@@ -1,525 +0,0 @@
-//! Debug info analysis: computes value-label ranges from value-label markers in
-//! generated VCode.
-//!
-//! We "reverse-engineer" debug info like this because it is far more reliable
-//! than generating it while emitting code and keeping it in sync.
-//!
-//! This works by (i) observing "value-label marker" instructions, which are
-//! semantically just an assignment from a register to a "value label" (which
-//! one can think of as another register; they represent, e.g., Wasm locals) at
-//! a certain point in the code, and (ii) observing loads and stores to the
-//! stack and register moves.
-//!
-//! We track, at every program point, the correspondence between each value
-//! label and *all* locations in which it resides. E.g., if it is stored to the
-//! stack, we remember that it is in both a register and the stack slot; but if
-//! the register is later overwritten, then we have it just in the stack slot.
-//! This allows us to avoid false-positives observing loads/stores that we think
-//! are spillslots but really aren't.
-//!
-//! We do a standard forward dataflow analysis to compute this info.
-
-use crate::ir::ValueLabel;
-use crate::machinst::*;
-use crate::value_label::{LabelValueLoc, ValueLabelsRanges, ValueLocRange};
-use log::trace;
-use regalloc::{Reg, RegUsageCollector};
-use std::collections::{HashMap, HashSet};
-use std::hash::Hash;
-
-/// Location of a labeled value: in a register or in a stack slot. Note that a
-/// value may live in more than one location; `AnalysisInfo` maps each
-/// value-label to multiple `ValueLoc`s.
-#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
-enum ValueLoc {
-    Reg(Reg),
-    /// Nominal-SP offset.
-    Stack(i64),
-}
-
-impl From<ValueLoc> for LabelValueLoc {
-    fn from(v: ValueLoc) -> Self {
-        match v {
-            ValueLoc::Reg(r) => LabelValueLoc::Reg(r),
-            ValueLoc::Stack(off) => LabelValueLoc::SPOffset(off),
-        }
-    }
-}
-
-impl ValueLoc {
-    fn is_reg(self) -> bool {
-        match self {
-            ValueLoc::Reg(_) => true,
-            _ => false,
-        }
-    }
-    fn is_stack(self) -> bool {
-        match self {
-            ValueLoc::Stack(_) => true,
-            _ => false,
-        }
-    }
-}
-
-/// Mappings at one program point.
-#[derive(Clone, Debug)]
-struct AnalysisInfo {
-    /// Nominal SP relative to real SP. If `None`, then the offset is
-    /// indeterminate (i.e., we merged to the lattice 'bottom' element). This
-    /// should not happen in well-formed code.
-    nominal_sp_offset: Option<i64>,
-    /// Forward map from labeled values to sets of locations.
-    label_to_locs: HashMap<ValueLabel, HashSet<ValueLoc>>,
-    /// Reverse map for each register indicating the value it holds, if any.
-    reg_to_label: HashMap<Reg, ValueLabel>,
-    /// Reverse map for each stack offset indicating the value it holds, if any.
-    stack_to_label: HashMap<i64, ValueLabel>,
-}
-
-/// Get the registers written (mod'd or def'd) by a machine instruction.
-fn get_inst_writes<M: MachInst>(m: &M) -> Vec<Reg> {
-    // TODO: expose this part of regalloc.rs's interface publicly.
-    let mut vecs = RegUsageCollector::get_empty_reg_vecs_test_framework_only(false);
-    let mut coll = RegUsageCollector::new(&mut vecs);
-    m.get_regs(&mut coll);
-    vecs.defs.extend(vecs.mods.into_iter());
-    vecs.defs
-}
-
-impl AnalysisInfo {
-    /// Create a new analysis state. This is the "top" lattice element at which
-    /// the fixpoint dataflow analysis starts.
-    fn new() -> Self {
-        AnalysisInfo {
-            nominal_sp_offset: Some(0),
-            label_to_locs: HashMap::new(),
-            reg_to_label: HashMap::new(),
-            stack_to_label: HashMap::new(),
-        }
-    }
-
-    /// Remove all locations for a given labeled value. Used when the labeled
-    /// value is redefined (so old values become stale).
-    fn clear_label(&mut self, label: ValueLabel) {
-        if let Some(locs) = self.label_to_locs.remove(&label) {
-            for loc in locs {
-                match loc {
-                    ValueLoc::Reg(r) => {
-                        self.reg_to_label.remove(&r);
-                    }
-                    ValueLoc::Stack(off) => {
-                        self.stack_to_label.remove(&off);
-                    }
-                }
-            }
-        }
-    }
-
-    /// Remove a label from a register, if any. Used, e.g., if the register is
-    /// overwritten.
-    fn clear_reg(&mut self, reg: Reg) {
-        if let Some(label) = self.reg_to_label.remove(&reg) {
-            if let Some(locs) = self.label_to_locs.get_mut(&label) {
-                locs.remove(&ValueLoc::Reg(reg));
-            }
-        }
-    }
-
-    /// Remove a label from a stack offset, if any. Used, e.g., when the stack
-    /// slot is overwritten.
-    fn clear_stack_off(&mut self, off: i64) {
-        if let Some(label) = self.stack_to_label.remove(&off) {
-            if let Some(locs) = self.label_to_locs.get_mut(&label) {
-                locs.remove(&ValueLoc::Stack(off));
-            }
-        }
-    }
-
-    /// Indicate that a labeled value is newly defined and its new value is in
-    /// `reg`.
-    fn def_label_at_reg(&mut self, label: ValueLabel, reg: Reg) {
-        self.clear_label(label);
-        self.label_to_locs
-            .entry(label)
-            .or_insert_with(|| HashSet::new())
-            .insert(ValueLoc::Reg(reg));
-        self.reg_to_label.insert(reg, label);
-    }
-
-    /// Process a store from a register to a stack slot (offset).
-    fn store_reg(&mut self, reg: Reg, off: i64) {
-        self.clear_stack_off(off);
-        if let Some(label) = self.reg_to_label.get(&reg) {
-            if let Some(locs) = self.label_to_locs.get_mut(label) {
-                locs.insert(ValueLoc::Stack(off));
-            }
-            self.stack_to_label.insert(off, *label);
-        }
-    }
-
-    /// Process a load from a stack slot (offset) to a register.
-    fn load_reg(&mut self, reg: Reg, off: i64) {
-        self.clear_reg(reg);
-        if let Some(&label) = self.stack_to_label.get(&off) {
-            if let Some(locs) = self.label_to_locs.get_mut(&label) {
-                locs.insert(ValueLoc::Reg(reg));
-            }
-            self.reg_to_label.insert(reg, label);
-        }
-    }
-
-    /// Process a move from one register to another.
-    fn move_reg(&mut self, to: Reg, from: Reg) {
-        self.clear_reg(to);
-        if let Some(&label) = self.reg_to_label.get(&from) {
-            if let Some(locs) = self.label_to_locs.get_mut(&label) {
-                locs.insert(ValueLoc::Reg(to));
-            }
-            self.reg_to_label.insert(to, label);
-        }
-    }
-
-    /// Update the analysis state w.r.t. an instruction's effects. Given the
-    /// state just before `inst`, this method updates `self` to be the state
-    /// just after `inst`.
-    fn step<M: MachInst>(&mut self, inst: &M) {
-        for write in get_inst_writes(inst) {
-            self.clear_reg(write);
-        }
-        if let Some((label, reg)) = inst.defines_value_label() {
-            self.def_label_at_reg(label, reg);
-        }
-        match inst.stack_op_info() {
-            Some(MachInstStackOpInfo::LoadNomSPOff(reg, offset)) => {
-                self.load_reg(reg, offset + self.nominal_sp_offset.unwrap());
-            }
-            Some(MachInstStackOpInfo::StoreNomSPOff(reg, offset)) => {
-                self.store_reg(reg, offset + self.nominal_sp_offset.unwrap());
-            }
-            Some(MachInstStackOpInfo::NomSPAdj(offset)) => {
-                if self.nominal_sp_offset.is_some() {
-                    self.nominal_sp_offset = Some(self.nominal_sp_offset.unwrap() + offset);
-                }
-            }
-            _ => {}
-        }
-        if let Some((to, from)) = inst.is_move() {
-            let to = to.to_reg();
-            self.move_reg(to, from);
-        }
-    }
-}
-
-/// Trait used to implement the dataflow analysis' meet (intersect) function
-/// onthe `AnalysisInfo` components. For efficiency, this is implemented as a
-/// mutation on the LHS, rather than a pure functional operation.
-trait IntersectFrom {
-    fn intersect_from(&mut self, other: &Self) -> IntersectResult;
-}
-
-/// Result of an intersection operation. Indicates whether the mutated LHS
-/// (which becomes the intersection result) differs from the original LHS. Also
-/// indicates if the value has become "empty" and should be removed from a
-/// parent container, if any.
-struct IntersectResult {
-    /// Did the intersection change the LHS input (the one that was mutated into
-    /// the result)? This is needed to drive the fixpoint loop; when no more
-    /// changes occur, then we have converted.
-    changed: bool,
-    /// Is the resulting value "empty"? This can be used when a container, such
-    /// as a map, holds values of this (intersection result) type; when
-    /// `is_empty` is true for the merge of the values at a particular key, we
-    /// can remove that key from the merged (intersected) result. This is not
-    /// necessary for analysis correctness but reduces the memory and runtime
-    /// cost of the fixpoint loop.
-    is_empty: bool,
-}
-
-impl IntersectFrom for AnalysisInfo {
-    fn intersect_from(&mut self, other: &Self) -> IntersectResult {
-        let mut changed = false;
-        changed |= self
-            .nominal_sp_offset
-            .intersect_from(&other.nominal_sp_offset)
-            .changed;
-        changed |= self
-            .label_to_locs
-            .intersect_from(&other.label_to_locs)
-            .changed;
-        changed |= self
-            .reg_to_label
-            .intersect_from(&other.reg_to_label)
-            .changed;
-        changed |= self
-            .stack_to_label
-            .intersect_from(&other.stack_to_label)
-            .changed;
-        IntersectResult {
-            changed,
-            is_empty: false,
-        }
-    }
-}
-
-impl<K, V> IntersectFrom for HashMap<K, V>
-where
-    K: Copy + Eq + Hash,
-    V: IntersectFrom,
-{
-    /// Intersection for hashmap: remove keys that are not in both inputs;
-    /// recursively intersect values for keys in common.
-    fn intersect_from(&mut self, other: &Self) -> IntersectResult {
-        let mut changed = false;
-        let mut remove_keys = vec![];
-        for k in self.keys() {
-            if !other.contains_key(k) {
-                remove_keys.push(*k);
-            }
-        }
-        for k in &remove_keys {
-            changed = true;
-            self.remove(k);
-        }
-
-        remove_keys.clear();
-        for k in other.keys() {
-            if let Some(v) = self.get_mut(k) {
-                let result = v.intersect_from(other.get(k).unwrap());
-                changed |= result.changed;
-                if result.is_empty {
-                    remove_keys.push(*k);
-                }
-            }
-        }
-        for k in &remove_keys {
-            changed = true;
-            self.remove(k);
-        }
-
-        IntersectResult {
-            changed,
-            is_empty: self.len() == 0,
-        }
-    }
-}
-impl<T> IntersectFrom for HashSet<T>
-where
-    T: Copy + Eq + Hash,
-{
-    /// Intersection for hashset: just take the set intersection.
-    fn intersect_from(&mut self, other: &Self) -> IntersectResult {
-        let mut changed = false;
-        let mut remove = vec![];
-        for val in self.iter() {
-            if !other.contains(val) {
-                remove.push(*val);
-            }
-        }
-        for val in remove {
-            changed = true;
-            self.remove(&val);
-        }
-
-        IntersectResult {
-            changed,
-            is_empty: self.len() == 0,
-        }
-    }
-}
-impl IntersectFrom for ValueLabel {
-    // Intersection for labeled value: remove if not equal. This is equivalent
-    // to a three-level lattice with top, bottom, and unordered set of
-    // individual labels in between.
-    fn intersect_from(&mut self, other: &Self) -> IntersectResult {
-        IntersectResult {
-            changed: false,
-            is_empty: *self != *other,
-        }
-    }
-}
-impl<T> IntersectFrom for Option<T>
-where
-    T: Copy + Eq,
-{
-    /// Intersectino for Option<T>: recursively intersect if both `Some`, else
-    /// `None`.
-    fn intersect_from(&mut self, other: &Self) -> IntersectResult {
-        let mut changed = false;
-        if !(self.is_some() && other.is_some() && self == other) {
-            changed = true;
-            *self = None;
-        }
-        IntersectResult {
-            changed,
-            is_empty: self.is_none(),
-        }
-    }
-}
-
-/// Compute the value-label ranges (locations for program-point ranges for
-/// labeled values) from a given `VCode` compilation result.
-///
-/// In order to compute this information, we perform a dataflow analysis on the
-/// machine code. To do so, and translate the results into a form usable by the
-/// debug-info consumers, we need to know two additional things:
-///
-/// - The machine-code layout (code offsets) of the instructions. DWARF is
-///   encoded in terms of instruction *ends* (and we reason about value
-///   locations at program points *after* instructions, to match this), so we
-///   take an array `inst_ends`, giving us code offsets for each instruction's
-///   end-point. (Note that this is one *past* the last byte; so a 4-byte
-///   instruction at offset 0 has an end offset of 4.)
-///
-/// - The locations of the labels to which branches will jump. Branches can tell
-///   us about their targets in terms of `MachLabel`s, but we don't know where
-///   those `MachLabel`s will be placed in the linear array of instructions.  We
-///   take the array `label_insn_index` to provide this info: for a label with
-///   index `l`, `label_insn_index[l]` is the index of the instruction before
-///   which that label is bound.
-pub(crate) fn compute<I: VCodeInst>(
-    insts: &[I],
-    layout_info: &InstsLayoutInfo,
-) -> ValueLabelsRanges {
-    let inst_start = |idx: usize| {
-        if idx == 0 {
-            0
-        } else {
-            layout_info.inst_end_offsets[idx - 1]
-        }
-    };
-
-    trace!("compute: insts =");
-    for i in 0..insts.len() {
-        trace!(
-            " #{} end: {} -> {:?}",
-            i,
-            layout_info.inst_end_offsets[i],
-            insts[i]
-        );
-    }
-    trace!("label_insn_index: {:?}", layout_info.label_inst_indices);
-
-    // Info at each block head, indexed by label.
-    let mut block_starts: HashMap<u32, AnalysisInfo> = HashMap::new();
-
-    // Initialize state at entry.
-    block_starts.insert(0, AnalysisInfo::new());
-
-    // Worklist: label indices for basic blocks.
-    let mut worklist = Vec::new();
-    let mut worklist_set = HashSet::new();
-    worklist.push(0);
-    worklist_set.insert(0);
-
-    while !worklist.is_empty() {
-        let block = worklist.pop().unwrap();
-        worklist_set.remove(&block);
-
-        let mut state = block_starts.get(&block).unwrap().clone();
-        trace!("at block {} -> state: {:?}", block, state);
-        // Iterate for each instruction in the block (we break at the first
-        // terminator we see).
-        let mut index = layout_info.label_inst_indices[block as usize];
-        while index < insts.len() as u32 {
-            state.step(&insts[index as usize]);
-            trace!(" -> inst #{}: {:?}", index, insts[index as usize]);
-            trace!("    --> state: {:?}", state);
-
-            let term = insts[index as usize].is_term();
-            if term.is_term() {
-                for succ in term.get_succs() {
-                    trace!("    SUCCESSOR block {}", succ.get());
-                    if let Some(succ_state) = block_starts.get_mut(&succ.get()) {
-                        trace!("       orig state: {:?}", succ_state);
-                        if succ_state.intersect_from(&state).changed {
-                            if worklist_set.insert(succ.get()) {
-                                worklist.push(succ.get());
-                            }
-                            trace!("        (changed)");
-                        }
-                        trace!("       new state: {:?}", succ_state);
-                    } else {
-                        // First time seeing this block
-                        block_starts.insert(succ.get(), state.clone());
-                        worklist.push(succ.get());
-                        worklist_set.insert(succ.get());
-                    }
-                }
-                break;
-            }
-
-            index += 1;
-        }
-    }
-
-    // Now iterate over blocks one last time, collecting
-    // value-label locations.
-
-    let mut value_labels_ranges: ValueLabelsRanges = HashMap::new();
-    for block in 0..layout_info.label_inst_indices.len() {
-        let start_index = layout_info.label_inst_indices[block];
-        let end_index = if block == layout_info.label_inst_indices.len() - 1 {
-            insts.len() as u32
-        } else {
-            layout_info.label_inst_indices[block + 1]
-        };
-        let block = block as u32;
-        let mut state = block_starts.get(&block).unwrap().clone();
-        for index in start_index..end_index {
-            let offset = inst_start(index as usize);
-            let end = layout_info.inst_end_offsets[index as usize];
-
-            // Cold blocks cause instructions to occur out-of-order wrt
-            // others. We rely on the monotonic mapping from instruction
-            // index to offset in machine code for this analysis to work,
-            // so we just skip debuginfo for cold blocks. This should be
-            // generally fine, as cold blocks generally constitute
-            // slowpaths for expansions of particular ops, rather than
-            // user-written code.
-            if layout_info.start_of_cold_code.is_some()
-                && offset >= layout_info.start_of_cold_code.unwrap()
-            {
-                continue;
-            }
-
-            assert!(offset <= end);
-            state.step(&insts[index as usize]);
-
-            for (label, locs) in &state.label_to_locs {
-                trace!("   inst {} has label {:?} -> locs {:?}", index, label, locs);
-                // Find an appropriate loc: a register if possible, otherwise pick the first stack
-                // loc.
-                let reg = locs.iter().cloned().find(|l| l.is_reg());
-                let loc = reg.or_else(|| locs.iter().cloned().find(|l| l.is_stack()));
-                if let Some(loc) = loc {
-                    let loc = LabelValueLoc::from(loc);
-                    let list = value_labels_ranges.entry(*label).or_insert_with(|| vec![]);
-                    // If the existing location list for this value-label is
-                    // either empty, or has an end location that does not extend
-                    // to the current offset, then we have to append a new
-                    // entry. Otherwise, we can extend the current entry.
-                    //
-                    // Note that `end` is one past the end of the instruction;
-                    // it appears that `end` is exclusive, so a mapping valid at
-                    // offset 5 will have start = 5, end = 6.
-                    if list
-                        .last()
-                        .map(|last| last.end <= offset || last.loc != loc)
-                        .unwrap_or(true)
-                    {
-                        list.push(ValueLocRange {
-                            loc,
-                            start: end,
-                            end: end + 1,
-                        });
-                    } else {
-                        list.last_mut().unwrap().end = end + 1;
-                    }
-                }
-            }
-        }
-    }
-
-    trace!("ret: {:?}", value_labels_ranges);
-    value_labels_ranges
-}
diff --git a/cranelift/codegen/src/machinst/helpers.rs b/cranelift/codegen/src/machinst/helpers.rs
index 40139d61ee..30446b04ab 100644
--- a/cranelift/codegen/src/machinst/helpers.rs
+++ b/cranelift/codegen/src/machinst/helpers.rs
@@ -1,8 +1,8 @@
 //! Miscellaneous helpers for machine backends.
 
 use super::{InsnOutput, LowerCtx, VCodeInst, ValueRegs};
+use super::{Reg, Writable};
 use crate::ir::Type;
-use regalloc::{Reg, Writable};
 use std::ops::{Add, BitAnd, Not, Sub};
 
 /// Returns the size (in bits) of a given type.
diff --git a/cranelift/codegen/src/machinst/isle.rs b/cranelift/codegen/src/machinst/isle.rs
index 052ed86e49..f6f8657a13 100644
--- a/cranelift/codegen/src/machinst/isle.rs
+++ b/cranelift/codegen/src/machinst/isle.rs
@@ -1,8 +1,7 @@
 use crate::ir::{types, Inst, Value, ValueList};
-use crate::machinst::{get_output_reg, InsnOutput, LowerCtx, MachInst, RegRenamer};
+use crate::machinst::{get_output_reg, InsnOutput, LowerCtx, Reg, Writable};
 use alloc::boxed::Box;
 use alloc::vec::Vec;
-use regalloc::{Reg, Writable};
 use smallvec::SmallVec;
 use std::cell::Cell;
 
@@ -107,7 +106,8 @@ macro_rules! isle_prelude_methods {
 
         #[inline]
         fn invalid_reg(&mut self) -> Reg {
-            Reg::invalid()
+            use crate::machinst::valueregs::InvalidSentinel;
+            Reg::invalid_sentinel()
         }
 
         #[inline]
@@ -467,7 +467,6 @@ where
     pub lower_ctx: &'a mut C,
     pub flags: &'a F,
     pub isa_flags: &'a I,
-    pub emitted_insts: SmallVec<[(C::I, bool); N]>,
 }
 
 /// Shared lowering code amongst all backends for doing ISLE-based lowering.
@@ -482,7 +481,6 @@ pub(crate) fn lower_common<C, F, I, IF, const N: usize>(
     outputs: &[InsnOutput],
     inst: Inst,
     isle_lower: IF,
-    map_regs: fn(&mut C::I, &RegRenamer),
 ) -> Result<(), ()>
 where
     C: LowerCtx,
@@ -495,7 +493,6 @@ where
         lower_ctx,
         flags,
         isa_flags,
-        emitted_insts: SmallVec::new(),
     };
 
     let temp_regs = isle_lower(&mut isle_ctx, inst).ok_or(())?;
@@ -514,10 +511,15 @@ where
     }
 
     // The ISLE generated code emits its own registers to define the
-    // instruction's lowered values in. We rename those registers to the
-    // registers they were assigned when their value was used as an operand in
-    // earlier lowerings.
-    let mut renamer = RegRenamer::default();
+    // instruction's lowered values in. However, other instructions
+    // that use this SSA value will be lowered assuming that the value
+    // is generated into a pre-assigned, different, register.
+    //
+    // To connect the two, we set up "aliases" in the VCodeBuilder
+    // that apply when it is building the Operand table for the
+    // regalloc to use. These aliases effectively rewrite any use of
+    // the pre-assigned register to the register that was returned by
+    // the ISLE lowering logic.
     for i in 0..outputs.len() {
         let regs = temp_regs[i];
         let dsts = get_output_reg(isle_ctx.lower_ctx, outputs[i]);
@@ -528,41 +530,11 @@ where
             // Flags values do not occupy any registers.
             assert!(regs.len() == 0);
         } else {
-            let (_, tys) = <C::I>::rc_for_type(ty).unwrap();
-            assert!(regs.len() == tys.len());
-            assert!(regs.len() == dsts.len());
-            for ((dst, temp), ty) in dsts.regs().iter().zip(regs.regs().iter()).zip(tys) {
-                renamer.add_rename(*temp, dst.to_reg(), *ty);
+            for (dst, temp) in dsts.regs().iter().zip(regs.regs().iter()) {
+                isle_ctx.lower_ctx.set_vreg_alias(dst.to_reg(), *temp);
             }
         }
     }
-    for (inst, _) in isle_ctx.emitted_insts.iter_mut() {
-        map_regs(inst, &renamer);
-    }
-
-    // If any renamed register wasn't actually defined in the ISLE-generated
-    // instructions then what we're actually doing is "renaming" an input to a
-    // new name which requires manually inserting a `mov` instruction. Note that
-    // this typically doesn't happen and is only here for cases where the input
-    // is sometimes passed through unmodified to the output, such as
-    // zero-extending a 64-bit input to a 128-bit output which doesn't actually
-    // change the input and simply produces another zero'd register.
-    for (old, new, ty) in renamer.unmapped_defs() {
-        isle_ctx
-            .lower_ctx
-            .emit(<C::I>::gen_move(Writable::from_reg(new), old, ty));
-    }
-
-    // Once everything is remapped we forward all emitted instructions to the
-    // `lower_ctx`. Note that this happens after the synthetic mov's above in
-    // case any of these instruction use those movs.
-    for (inst, is_safepoint) in isle_ctx.emitted_insts {
-        if is_safepoint {
-            lower_ctx.emit_safepoint(inst);
-        } else {
-            lower_ctx.emit(inst);
-        }
-    }
 
     Ok(())
 }
diff --git a/cranelift/codegen/src/machinst/lower.rs b/cranelift/codegen/src/machinst/lower.rs
index 314a294d2e..6cd7bfa5e8 100644
--- a/cranelift/codegen/src/machinst/lower.rs
+++ b/cranelift/codegen/src/machinst/lower.rs
@@ -18,17 +18,19 @@ use crate::ir::{
 };
 use crate::machinst::{
     non_writable_value_regs, writable_value_regs, ABICallee, BlockIndex, BlockLoweringOrder,
-    LoweredBlock, MachLabel, VCode, VCodeBuilder, VCodeConstant, VCodeConstantData, VCodeConstants,
-    VCodeInst, ValueRegs,
+    LoweredBlock, MachLabel, Reg, VCode, VCodeBuilder, VCodeConstant, VCodeConstantData,
+    VCodeConstants, VCodeInst, ValueRegs, Writable,
 };
 use crate::CodegenResult;
 use alloc::boxed::Box;
 use alloc::vec::Vec;
 use core::convert::TryInto;
-use regalloc::{Reg, StackmapRequestInfo, Writable};
+use regalloc2::VReg;
 use smallvec::{smallvec, SmallVec};
 use std::fmt::Debug;
 
+use super::{first_user_vreg_index, VCodeBuildDirection};
+
 /// An "instruction color" partitions CLIF instructions by side-effecting ops.
 /// All instructions with the same "color" are guaranteed not to be separated by
 /// any side-effecting op (for this purpose, loads are also considered
@@ -160,8 +162,6 @@ pub trait LowerCtx {
     fn alloc_tmp(&mut self, ty: Type) -> ValueRegs<Writable<Reg>>;
     /// Emit a machine instruction.
     fn emit(&mut self, mach_inst: Self::I);
-    /// Emit a machine instruction that is a safepoint.
-    fn emit_safepoint(&mut self, mach_inst: Self::I);
     /// Indicate that the side-effect of an instruction has been sunk to the
     /// current scan location. This should only be done with the instruction's
     /// original results are not used (i.e., `put_input_in_regs` is not invoked
@@ -178,6 +178,9 @@ pub trait LowerCtx {
     /// Cause the value in `reg` to be in a virtual reg, by copying it into a new virtual reg
     /// if `reg` is a real reg.  `ty` describes the type of the value in `reg`.
     fn ensure_in_vreg(&mut self, reg: Reg, ty: Type) -> Reg;
+
+    /// Note that one vreg is to be treated as an alias of another.
+    fn set_vreg_alias(&mut self, from: Reg, to: Reg);
 }
 
 /// A representation of all of the ways in which a value is available, aside
@@ -232,14 +235,6 @@ pub trait LowerBackend {
     }
 }
 
-/// A pending instruction to insert and auxiliary information about it: its source location and
-/// whether it is a safepoint.
-struct InstTuple<I: VCodeInst> {
-    loc: SourceLoc,
-    is_safepoint: bool,
-    inst: I,
-}
-
 /// Machine-independent lowering driver / machine-instruction container. Maintains a correspondence
 /// from original Inst to MachInsts.
 pub struct Lower<'func, I: VCodeInst> {
@@ -287,20 +282,10 @@ pub struct Lower<'func, I: VCodeInst> {
     inst_sunk: FxHashSet<Inst>,
 
     /// Next virtual register number to allocate.
-    next_vreg: u32,
-
-    /// Insts in reverse block order, before final copy to vcode.
-    block_insts: Vec<InstTuple<I>>,
-
-    /// Ranges in `block_insts` constituting BBs.
-    block_ranges: Vec<(usize, usize)>,
-
-    /// Instructions collected for the BB in progress, in reverse order, with
-    /// source-locs attached.
-    bb_insts: Vec<InstTuple<I>>,
+    next_vreg: usize,
 
     /// Instructions collected for the CLIF inst in progress, in forward order.
-    ir_insts: Vec<InstTuple<I>>,
+    ir_insts: Vec<I>,
 
     /// The register to use for GetPinnedReg, if any, on this architecture.
     pinned_reg: Option<Reg>,
@@ -324,22 +309,22 @@ pub enum RelocDistance {
 
 fn alloc_vregs<I: VCodeInst>(
     ty: Type,
-    next_vreg: &mut u32,
+    next_vreg: &mut usize,
     vcode: &mut VCodeBuilder<I>,
 ) -> CodegenResult<ValueRegs<Reg>> {
     let v = *next_vreg;
     let (regclasses, tys) = I::rc_for_type(ty)?;
-    *next_vreg += regclasses.len() as u32;
-    let regs = match regclasses {
-        &[rc0] => ValueRegs::one(Reg::new_virtual(rc0, v)),
-        &[rc0, rc1] => ValueRegs::two(Reg::new_virtual(rc0, v), Reg::new_virtual(rc1, v + 1)),
+    *next_vreg += regclasses.len();
+    let regs: ValueRegs<Reg> = match regclasses {
+        &[rc0] => ValueRegs::one(VReg::new(v, rc0).into()),
+        &[rc0, rc1] => ValueRegs::two(VReg::new(v, rc0).into(), VReg::new(v + 1, rc1).into()),
         // We can extend this if/when we support 32-bit targets; e.g.,
         // an i128 on a 32-bit machine will need up to four machine regs
         // for a `Value`.
         _ => panic!("Value must reside in 1 or 2 registers"),
     };
     for (&reg_ty, &reg) in tys.iter().zip(regs.regs().iter()) {
-        vcode.set_vreg_type(reg.to_virtual_reg(), reg_ty);
+        vcode.set_vreg_type(reg.to_virtual_reg().unwrap(), reg_ty);
     }
     Ok(regs)
 }
@@ -358,9 +343,15 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
         block_order: BlockLoweringOrder,
     ) -> CodegenResult<Lower<'func, I>> {
         let constants = VCodeConstants::with_capacity(f.dfg.constants.len());
-        let mut vcode = VCodeBuilder::new(abi, emit_info, block_order, constants);
+        let mut vcode = VCodeBuilder::new(
+            abi,
+            emit_info,
+            block_order,
+            constants,
+            VCodeBuildDirection::Backward,
+        );
 
-        let mut next_vreg: u32 = 0;
+        let mut next_vreg: usize = first_user_vreg_index();
 
         let mut value_regs = SecondaryMap::with_default(ValueRegs::invalid());
 
@@ -381,10 +372,11 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
                         let regs = alloc_vregs(ty, &mut next_vreg, &mut vcode)?;
                         value_regs[result] = regs;
                         log::trace!(
-                            "bb {} inst {} ({:?}): result regs {:?}",
+                            "bb {} inst {} ({:?}): result {} regs {:?}",
                             bb,
                             inst,
                             f.dfg[inst],
+                            result,
                             regs,
                         );
                     }
@@ -459,9 +451,6 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
             inst_sunk: FxHashSet::default(),
             cur_scan_entry_color: None,
             cur_inst: None,
-            block_insts: vec![],
-            block_ranges: vec![],
-            bb_insts: vec![],
             ir_insts: vec![],
             pinned_reg: None,
             vm_context,
@@ -475,6 +464,12 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
                 entry_bb,
                 self.f.dfg.block_params(entry_bb)
             );
+
+            // Make the vmctx available in debuginfo.
+            if let Some(vmctx_val) = self.f.special_param(ArgumentPurpose::VMContext) {
+                self.emit_value_label_marks_for_value(vmctx_val);
+            }
+
             for (i, param) in self.f.dfg.block_params(entry_bb).iter().enumerate() {
                 if !self.vcode.abi().arg_is_needed_in_body(i) {
                     continue;
@@ -509,14 +504,6 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
     }
 
     fn gen_retval_setup(&mut self, gen_ret_inst: GenerateReturn) {
-        // Hack: to keep `vmctx` alive, if it exists, we emit a value label here
-        // for it if debug info is requested. This ensures that it exists either
-        // in a register or spillslot throughout the entire function body, and
-        // allows for a better debugging experience.
-        if let Some(vmctx_val) = self.f.special_param(ArgumentPurpose::VMContext) {
-            self.emit_value_label_marks_for_value(vmctx_val);
-        }
-
         let retval_regs = self.retval_regs.clone();
         for (i, regs) in retval_regs.into_iter().enumerate() {
             let regs = writable_value_regs(regs);
@@ -534,141 +521,16 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
             GenerateReturn::No => self.vcode.abi().gen_epilogue_placeholder(),
         };
         self.emit(inst);
-    }
 
-    fn lower_edge(&mut self, pred: Block, inst: Inst, succ: Block) -> CodegenResult<()> {
-        log::trace!("lower_edge: pred {} succ {}", pred, succ);
-
-        let num_args = self.f.dfg.block_params(succ).len();
-        debug_assert!(num_args == self.f.dfg.inst_variable_args(inst).len());
-
-        // Most blocks have no params, so skip all the hoop-jumping below and make an early exit.
-        if num_args == 0 {
-            return Ok(());
-        }
-
-        self.cur_inst = Some(inst);
-
-        // Make up two vectors of info:
-        //
-        // * one for dsts which are to be assigned constants.  We'll deal with those second, so
-        //   as to minimise live ranges.
-        //
-        // * one for dsts whose sources are non-constants.
-
-        let mut const_bundles: SmallVec<[_; 16]> = SmallVec::new();
-        let mut var_bundles: SmallVec<[_; 16]> = SmallVec::new();
-
-        let mut i = 0;
-        for (dst_val, src_val) in self
-            .f
-            .dfg
-            .block_params(succ)
-            .iter()
-            .zip(self.f.dfg.inst_variable_args(inst).iter())
-        {
-            let src_val = self.f.dfg.resolve_aliases(*src_val);
-            let ty = self.f.dfg.value_type(src_val);
-
-            debug_assert!(ty == self.f.dfg.value_type(*dst_val));
-            let dst_regs = self.value_regs[*dst_val];
-
-            let input = self.get_value_as_source_or_const(src_val);
-            log::trace!("jump arg {} is {}", i, src_val);
-            i += 1;
-
-            if let Some(c) = input.constant {
-                log::trace!(" -> constant {}", c);
-                const_bundles.push((ty, writable_value_regs(dst_regs), c));
-            } else {
-                let src_regs = self.put_value_in_regs(src_val);
-                log::trace!(" -> reg {:?}", src_regs);
-                // Skip self-assignments.  Not only are they pointless, they falsely trigger the
-                // overlap-check below and hence can cause a lot of unnecessary copying through
-                // temporaries.
-                if dst_regs != src_regs {
-                    var_bundles.push((ty, writable_value_regs(dst_regs), src_regs));
-                }
+        // Hack: generate a virtual instruction that uses vmctx in
+        // order to keep it alive for the duration of the function,
+        // for the benefit of debuginfo.
+        if self.f.dfg.values_labels.is_some() {
+            if let Some(vmctx_val) = self.f.special_param(ArgumentPurpose::VMContext) {
+                let vmctx_reg = self.value_regs[vmctx_val].only_reg().unwrap();
+                self.emit(I::gen_dummy_use(vmctx_reg));
             }
         }
-
-        // Deal first with the moves whose sources are variables.
-
-        // FIXME: use regalloc.rs' SparseSetU here.  This would avoid all heap allocation
-        // for cases of up to circa 16 args.  Currently not possible because regalloc.rs
-        // does not export it.
-        let mut src_reg_set = FxHashSet::<Reg>::default();
-        for (_, _, src_regs) in &var_bundles {
-            for &reg in src_regs.regs() {
-                src_reg_set.insert(reg);
-            }
-        }
-        let mut overlaps = false;
-        'outer: for (_, dst_regs, _) in &var_bundles {
-            for &reg in dst_regs.regs() {
-                if src_reg_set.contains(&reg.to_reg()) {
-                    overlaps = true;
-                    break 'outer;
-                }
-            }
-        }
-
-        // If, as is mostly the case, the source and destination register sets are non
-        // overlapping, then we can copy directly, so as to save the register allocator work.
-        if !overlaps {
-            for (ty, dst_regs, src_regs) in &var_bundles {
-                let (_, reg_tys) = I::rc_for_type(*ty)?;
-                for ((dst, src), reg_ty) in dst_regs
-                    .regs()
-                    .iter()
-                    .zip(src_regs.regs().iter())
-                    .zip(reg_tys.iter())
-                {
-                    self.emit(I::gen_move(*dst, *src, *reg_ty));
-                }
-            }
-        } else {
-            // There's some overlap, so play safe and copy via temps.
-            let mut tmp_regs = SmallVec::<[ValueRegs<Writable<Reg>>; 16]>::new();
-            for (ty, _, _) in &var_bundles {
-                tmp_regs.push(self.alloc_tmp(*ty));
-            }
-            for ((ty, _, src_reg), tmp_reg) in var_bundles.iter().zip(tmp_regs.iter()) {
-                let (_, reg_tys) = I::rc_for_type(*ty)?;
-                for ((tmp, src), reg_ty) in tmp_reg
-                    .regs()
-                    .iter()
-                    .zip(src_reg.regs().iter())
-                    .zip(reg_tys.iter())
-                {
-                    self.emit(I::gen_move(*tmp, *src, *reg_ty));
-                }
-            }
-            for ((ty, dst_reg, _), tmp_reg) in var_bundles.iter().zip(tmp_regs.iter()) {
-                let (_, reg_tys) = I::rc_for_type(*ty)?;
-                for ((dst, tmp), reg_ty) in dst_reg
-                    .regs()
-                    .iter()
-                    .zip(tmp_reg.regs().iter())
-                    .zip(reg_tys.iter())
-                {
-                    self.emit(I::gen_move(*dst, tmp.to_reg(), *reg_ty));
-                }
-            }
-        }
-
-        // Now, finally, deal with the moves whose sources are constants.
-        for (ty, dst_reg, const_val) in &const_bundles {
-            for inst in I::gen_constant(*dst_reg, *const_val as u128, *ty, |ty| {
-                self.alloc_tmp(ty).only_reg().unwrap()
-            })
-            .into_iter()
-            {
-                self.emit(inst);
-            }
-        }
-
-        Ok(())
     }
 
     /// Has this instruction been sunk to a use-site (i.e., away from its
@@ -694,21 +556,24 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
         self.cur_scan_entry_color = Some(self.block_end_colors[block]);
         // Lowering loop:
         // - For each non-branch instruction, in reverse order:
-        //   - If side-effecting (load, store, branch/call/return, possible trap), or if
-        //     used outside of this block, or if demanded by another inst, then lower.
+        //   - If side-effecting (load, store, branch/call/return,
+        //     possible trap), or if used outside of this block, or if
+        //     demanded by another inst, then lower.
         //
-        // That's it! Lowering of side-effecting ops will force all *needed*
-        // (live) non-side-effecting ops to be lowered at the right places, via
-        // the `use_input_reg()` callback on the `LowerCtx` (that's us). That's
-        // because `use_input_reg()` sets the eager/demand bit for any insts
-        // whose result registers are used.
+        // That's it! Lowering of side-effecting ops will force all
+        // *needed* (live) non-side-effecting ops to be lowered at the
+        // right places, via the `use_input_reg()` callback on the
+        // `LowerCtx` (that's us). That's because `use_input_reg()`
+        // sets the eager/demand bit for any insts whose result
+        // registers are used.
         //
-        // We build up the BB in reverse instruction order in `bb_insts`.
-        // Because the machine backend calls `ctx.emit()` in forward order, we
-        // collect per-IR-inst lowered instructions in `ir_insts`, then reverse
-        // these and append to `bb_insts` as we go backward through the block.
-        // `bb_insts` are then reversed again and appended to the VCode at the
-        // end of the BB (in the toplevel driver `lower()`).
+        // We set the VCodeBuilder to "backward" mode, so we emit
+        // blocks in reverse order wrt the BlockIndex sequence, and
+        // emit instructions in reverse order within blocks.  Because
+        // the machine backend calls `ctx.emit()` in forward order, we
+        // collect per-IR-inst lowered instructions in `ir_insts`,
+        // then reverse these and append to the VCode at the end of
+        // each IR instruction.
         for inst in self.f.layout.block_insts(block).rev() {
             let data = &self.f.dfg[inst];
             let has_side_effect = has_lowering_side_effect(self.f, inst);
@@ -750,9 +615,6 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
             if has_side_effect || value_needed {
                 log::trace!("lowering: inst {}: {:?}", inst, self.f.dfg[inst]);
                 backend.lower(self, inst)?;
-                // Emit value-label markers if needed, to later recover debug
-                // mappings.
-                self.emit_value_label_markers_for_inst(inst);
             }
             if data.opcode().is_return() {
                 // Return: handle specially, using ABI-appropriate sequence.
@@ -767,11 +629,33 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
 
             let loc = self.srcloc(inst);
             self.finish_ir_inst(loc);
+
+            // Emit value-label markers if needed, to later recover
+            // debug mappings. This must happen before the instruction
+            // (so after we emit, in bottom-to-top pass).
+            self.emit_value_label_markers_for_inst(inst);
         }
+
+        // Add the block params to this block.
+        self.add_block_params(block)?;
+
         self.cur_scan_entry_color = None;
         Ok(())
     }
 
+    fn add_block_params(&mut self, block: Block) -> CodegenResult<()> {
+        for &param in self.f.dfg.block_params(block) {
+            let ty = self.f.dfg.value_type(param);
+            let (_reg_rcs, reg_tys) = I::rc_for_type(ty)?;
+            debug_assert_eq!(reg_tys.len(), self.value_regs[param].len());
+            for (&reg, &rty) in self.value_regs[param].regs().iter().zip(reg_tys.iter()) {
+                self.vcode
+                    .add_block_param(reg.to_virtual_reg().unwrap(), rty);
+            }
+        }
+        Ok(())
+    }
+
     fn get_value_labels<'a>(&'a self, val: Value, depth: usize) -> Option<&'a [ValueLabelStart]> {
         if let Some(ref values_labels) = self.f.dfg.values_labels {
             log::trace!(
@@ -794,7 +678,6 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
     }
 
     fn emit_value_label_marks_for_value(&mut self, val: Value) {
-        let mut markers: SmallVec<[I; 4]> = smallvec![];
         let regs = self.value_regs[val];
         if regs.len() > 1 {
             return;
@@ -813,12 +696,9 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
                     reg,
                     label,
                 );
-                markers.push(I::gen_value_label_marker(label, reg));
+                self.vcode.add_value_label(reg, label);
             }
         }
-        for marker in markers {
-            self.emit(marker);
-        }
     }
 
     fn emit_value_label_markers_for_inst(&mut self, inst: Inst) {
@@ -849,36 +729,17 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
     }
 
     fn finish_ir_inst(&mut self, loc: SourceLoc) {
-        // `bb_insts` is kept in reverse order, so emit the instructions in
-        // reverse order.
-        for mut tuple in self.ir_insts.drain(..).rev() {
-            tuple.loc = loc;
-            self.bb_insts.push(tuple);
+        self.vcode.set_srcloc(loc);
+        // The VCodeBuilder builds in reverse order (and reverses at
+        // the end), but `ir_insts` is in forward order, so reverse
+        // it.
+        for inst in self.ir_insts.drain(..).rev() {
+            self.vcode.push(inst);
         }
     }
 
     fn finish_bb(&mut self) {
-        let start = self.block_insts.len();
-        for tuple in self.bb_insts.drain(..).rev() {
-            self.block_insts.push(tuple);
-        }
-        let end = self.block_insts.len();
-        self.block_ranges.push((start, end));
-    }
-
-    fn copy_bbs_to_vcode(&mut self) {
-        for &(start, end) in self.block_ranges.iter().rev() {
-            for &InstTuple {
-                loc,
-                is_safepoint,
-                ref inst,
-            } in &self.block_insts[start..end]
-            {
-                self.vcode.set_srcloc(loc);
-                self.vcode.push(inst.clone(), is_safepoint);
-            }
-            self.vcode.end_bb();
-        }
+        self.vcode.end_bb();
     }
 
     fn lower_clif_branches<B: LowerBackend<MInst = I>>(
@@ -900,9 +761,28 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
         backend.lower_branch_group(self, branches, targets)?;
         let loc = self.srcloc(branches[0]);
         self.finish_ir_inst(loc);
+        // Add block param outputs for current block.
+        self.lower_branch_blockparam_args(block);
         Ok(())
     }
 
+    fn lower_branch_blockparam_args(&mut self, block: Block) {
+        visit_block_succs(self.f, block, |inst, _succ| {
+            let branch_args = self.f.dfg.inst_variable_args(inst);
+            let mut branch_arg_vregs: SmallVec<[Reg; 16]> = smallvec![];
+            for &arg in branch_args {
+                let arg = self.f.dfg.resolve_aliases(arg);
+                let regs = self.put_value_in_regs(arg);
+                for &vreg in regs.regs() {
+                    let vreg = self.vcode.resolve_vreg_alias(vreg.into());
+                    branch_arg_vregs.push(vreg.into());
+                }
+            }
+            self.vcode.add_branch_args_for_succ(&branch_arg_vregs[..]);
+        });
+        self.finish_ir_inst(SourceLoc::default());
+    }
+
     fn collect_branches_and_targets(
         &self,
         bindex: BlockIndex,
@@ -927,10 +807,7 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
     }
 
     /// Lower the function.
-    pub fn lower<B: LowerBackend<MInst = I>>(
-        mut self,
-        backend: &B,
-    ) -> CodegenResult<(VCode<I>, StackmapRequestInfo)> {
+    pub fn lower<B: LowerBackend<MInst = I>>(mut self, backend: &B) -> CodegenResult<VCode<I>> {
         log::trace!("about to lower function: {:?}", self.f);
 
         // Initialize the ABI object, giving it a temp if requested.
@@ -945,7 +822,7 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
         // not the whole `Lower` impl).
         self.pinned_reg = backend.maybe_pinned_reg();
 
-        self.vcode.set_entry(0);
+        self.vcode.set_entry(BlockIndex::new(0));
 
         // Reused vectors for branch lowering.
         let mut branches: SmallVec<[Inst; 2]> = SmallVec::new();
@@ -963,7 +840,7 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
 
         // Main lowering loop over lowered blocks.
         for (bindex, lb) in lowered_order.iter().enumerate().rev() {
-            let bindex = bindex as BlockIndex;
+            let bindex = BlockIndex::new(bindex);
 
             // Lower the block body in reverse order (see comment in
             // `lower_clif_block()` for rationale).
@@ -976,30 +853,41 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
                     self.finish_ir_inst(self.srcloc(branches[0]));
                 }
             } else {
-                // If no orig block, this must be a pure edge block; get the successor and
-                // emit a jump.
+                // If no orig block, this must be a pure edge block;
+                // get the successor and emit a jump. Add block params
+                // according to the one successor, and pass them
+                // through; note that the successor must have an
+                // original block.
                 let (_, succ) = self.vcode.block_order().succ_indices(bindex)[0];
+
+                let orig_succ = lowered_order[succ.index()];
+                let orig_succ = orig_succ
+                    .orig_block()
+                    .expect("Edge block succ must be body block");
+
+                let mut branch_arg_vregs: SmallVec<[Reg; 16]> = smallvec![];
+                for ty in self.f.dfg.block_param_types(orig_succ) {
+                    let regs = alloc_vregs(ty, &mut self.next_vreg, &mut self.vcode)?;
+                    for &reg in regs.regs() {
+                        branch_arg_vregs.push(reg);
+                        let vreg = reg.to_virtual_reg().unwrap();
+                        self.vcode
+                            .add_block_param(vreg, self.vcode.get_vreg_type(vreg));
+                    }
+                }
+                self.vcode.add_branch_args_for_succ(&branch_arg_vregs[..]);
+
                 self.emit(I::gen_jump(MachLabel::from_block(succ)));
                 self.finish_ir_inst(SourceLoc::default());
             }
 
-            // Out-edge phi moves.
-            if let Some((pred, inst, succ)) = lb.out_edge() {
-                self.lower_edge(pred, inst, succ)?;
-                self.finish_ir_inst(SourceLoc::default());
-            }
             // Original block body.
             if let Some(bb) = lb.orig_block() {
                 self.lower_clif_block(backend, bb)?;
                 self.emit_value_label_markers_for_block_args(bb);
             }
-            // In-edge phi moves.
-            if let Some((pred, inst, succ)) = lb.in_edge() {
-                self.lower_edge(pred, inst, succ)?;
-                self.finish_ir_inst(SourceLoc::default());
-            }
 
-            if bindex == 0 {
+            if bindex.index() == 0 {
                 // Set up the function with arg vreg inits.
                 self.gen_arg_setup();
                 self.finish_ir_inst(SourceLoc::default());
@@ -1008,13 +896,12 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
             self.finish_bb();
         }
 
-        self.copy_bbs_to_vcode();
-
-        // Now that we've emitted all instructions into the VCodeBuilder, let's build the VCode.
-        let (vcode, stack_map_info) = self.vcode.build();
+        // Now that we've emitted all instructions into the
+        // VCodeBuilder, let's build the VCode.
+        let vcode = self.vcode.build();
         log::trace!("built vcode: {:?}", vcode);
 
-        Ok((vcode, stack_map_info))
+        Ok(vcode)
     }
 }
 
@@ -1278,19 +1165,8 @@ impl<'func, I: VCodeInst> LowerCtx for Lower<'func, I> {
     }
 
     fn emit(&mut self, mach_inst: I) {
-        self.ir_insts.push(InstTuple {
-            loc: SourceLoc::default(),
-            is_safepoint: false,
-            inst: mach_inst,
-        });
-    }
-
-    fn emit_safepoint(&mut self, mach_inst: I) {
-        self.ir_insts.push(InstTuple {
-            loc: SourceLoc::default(),
-            is_safepoint: true,
-            inst: mach_inst,
-        });
+        log::trace!("emit: {:?}", mach_inst);
+        self.ir_insts.push(mach_inst);
     }
 
     fn sink_inst(&mut self, ir_inst: Inst) {
@@ -1336,7 +1212,7 @@ impl<'func, I: VCodeInst> LowerCtx for Lower<'func, I> {
     }
 
     fn ensure_in_vreg(&mut self, reg: Reg, ty: Type) -> Reg {
-        if reg.is_virtual() {
+        if reg.to_virtual_reg().is_some() {
             reg
         } else {
             let new_reg = self.alloc_tmp(ty).only_reg().unwrap();
@@ -1344,6 +1220,11 @@ impl<'func, I: VCodeInst> LowerCtx for Lower<'func, I> {
             new_reg.to_reg()
         }
     }
+
+    fn set_vreg_alias(&mut self, from: Reg, to: Reg) {
+        log::trace!("set vreg alias: from {:?} to {:?}", from, to);
+        self.vcode.set_vreg_alias(from, to);
+    }
 }
 
 /// Visit all successors of a block with a given visitor closure.
diff --git a/cranelift/codegen/src/machinst/mod.rs b/cranelift/codegen/src/machinst/mod.rs
index c2cdb4e32c..7d1581d4f8 100644
--- a/cranelift/codegen/src/machinst/mod.rs
+++ b/cranelift/codegen/src/machinst/mod.rs
@@ -8,14 +8,10 @@
 //!
 //! The container for machine instructions, at various stages of construction,
 //! is the `VCode` struct. We refer to a sequence of machine instructions organized
-//! into basic blocks as "vcode". This is short for "virtual-register code", though
-//! it's a bit of a misnomer because near the end of the pipeline, vcode has all
-//! real registers. Nevertheless, the name is catchy and we like it.
+//! into basic blocks as "vcode". This is short for "virtual-register code".
 //!
 //! The compilation pipeline, from an `ir::Function` (already optimized as much as
 //! you like by machine-independent optimization passes) onward, is as follows.
-//! (N.B.: though we show the VCode separately at each stage, the passes
-//! mutate the VCode in place; these are not separate copies of the code.)
 //!
 //! ```plain
 //!
@@ -31,37 +27,25 @@
 //!         |                          with unknown offsets.
 //!         |                        - critical edges (actually all edges)
 //!         |                          are split.)
-//!         | [regalloc]
 //!         |
-//!     VCode<arch_backend::Inst>   (machine instructions:
-//!         |                        - all real registers.
-//!         |                        - new instruction sequence returned
-//!         |                          out-of-band in RegAllocResult.
-//!         |                        - instruction sequence has spills,
-//!         |                          reloads, and moves inserted.
-//!         |                        - other invariants same as above.)
+//!         | [regalloc --> `regalloc2::Output`; VCode is unchanged]
 //!         |
-//!         | [preamble/postamble]
+//!         | [binary emission via MachBuffer]
 //!         |
-//!     VCode<arch_backend::Inst>   (machine instructions:
-//!         |                        - stack-frame size known.
-//!         |                        - out-of-band instruction sequence
-//!         |                          has preamble prepended to entry
-//!         |                          block, and postamble injected before
-//!         |                          every return instruction.
-//!         |                        - all symbolic stack references to
-//!         |                          stackslots and spillslots are resolved
-//!         |                          to concrete FP-offset mem addresses.)
-//!         |
-//!         | [binary emission via MachBuffer
-//!         |  with streaming branch resolution/simplification]
-//!         |
-//!     Vec<u8>                     (machine code!)
+//!     Vec<u8>                     (machine code:
+//!         |                        - two-dest branches resolved via
+//!         |                          streaming branch resolution/simplification.
+//!         |                        - regalloc `Allocation` results used directly
+//!         |                          by instruction emission code.
+//!         |                        - prologue and epilogue(s) built and emitted
+//!         |                          directly during emission.
+//!         |                        - nominal-SP-relative offsets resolved
+//!         |                          by tracking EmitState.)
 //!
 //! ```
 
 use crate::binemit::{Addend, CodeInfo, CodeOffset, Reloc, StackMap};
-use crate::ir::{SourceLoc, StackSlot, Type, ValueLabel};
+use crate::ir::{SourceLoc, StackSlot, Type};
 use crate::result::CodegenResult;
 use crate::settings::Flags;
 use crate::value_label::ValueLabelsRanges;
@@ -69,10 +53,7 @@ use alloc::boxed::Box;
 use alloc::vec::Vec;
 use core::fmt::Debug;
 use cranelift_entity::PrimaryMap;
-use regalloc::RegUsageCollector;
-use regalloc::{
-    RealReg, RealRegUniverse, Reg, RegClass, RegUsageMapper, SpillSlot, VirtualReg, Writable,
-};
+use regalloc2::{Allocation, VReg};
 use smallvec::{smallvec, SmallVec};
 use std::string::String;
 
@@ -98,20 +79,15 @@ pub use helpers::*;
 pub mod inst_common;
 pub use inst_common::*;
 pub mod valueregs;
+pub use reg::*;
 pub use valueregs::*;
-pub mod debug;
-pub use regmapping::*;
-pub mod regmapping;
+pub mod reg;
 
 /// A machine instruction.
 pub trait MachInst: Clone + Debug {
     /// Return the registers referenced by this machine instruction along with
     /// the modes of reference (use, def, modify).
-    fn get_regs(&self, collector: &mut RegUsageCollector);
-
-    /// Map virtual registers to physical registers using the given virt->phys
-    /// maps corresponding to the program points prior to, and after, this instruction.
-    fn map_regs<RUM: RegUsageMapper>(&mut self, maps: &RUM);
+    fn get_operands<F: Fn(VReg) -> VReg>(&self, collector: &mut OperandCollector<'_, F>);
 
     /// If this is a simple move, return the (source, destination) tuple of registers.
     fn is_move(&self) -> Option<(Writable<Reg>, Reg)>;
@@ -128,11 +104,6 @@ pub trait MachInst: Clone + Debug {
         true
     }
 
-    /// If this is a load or store to the stack, return that info.
-    fn stack_op_info(&self) -> Option<MachInstStackOpInfo> {
-        None
-    }
-
     /// Generate a move.
     fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Self;
 
@@ -144,10 +115,9 @@ pub trait MachInst: Clone + Debug {
         alloc_tmp: F,
     ) -> SmallVec<[Self; 4]>;
 
-    /// Possibly operate on a value directly in a spill-slot rather than a
-    /// register. Useful if the machine has register-memory instruction forms
-    /// (e.g., add directly from or directly to memory), like x86.
-    fn maybe_direct_reload(&self, reg: VirtualReg, slot: SpillSlot) -> Option<Self>;
+    /// Generate a dummy instruction that will keep a value alive but
+    /// has no other purpose.
+    fn gen_dummy_use(reg: Reg) -> Self;
 
     /// Determine register class(es) to store the given Cranelift type, and the
     /// Cranelift type actually stored in the underlying register(s).  May return
@@ -163,6 +133,13 @@ pub trait MachInst: Clone + Debug {
     /// generating spills and reloads for individual registers.
     fn rc_for_type(ty: Type) -> CodegenResult<(&'static [RegClass], &'static [Type])>;
 
+    /// Get an appropriate type that can fully hold a value in a given
+    /// register class. This may not be the only type that maps to
+    /// that class, but when used with `gen_move()` or the ABI trait's
+    /// load/spill constructors, it should produce instruction(s) that
+    /// move the entire register contents.
+    fn canonical_type_for_rc(rc: RegClass) -> Type;
+
     /// Generate a jump to another target. Used during lowering of
     /// control flow.
     fn gen_jump(target: MachLabel) -> Self;
@@ -187,16 +164,8 @@ pub trait MachInst: Clone + Debug {
     /// be dependent on compilation flags.
     fn ref_type_regclass(_flags: &Flags) -> RegClass;
 
-    /// Does this instruction define a ValueLabel? Returns the `Reg` whose value
-    /// becomes the new value of the `ValueLabel` after this instruction.
-    fn defines_value_label(&self) -> Option<(ValueLabel, Reg)> {
-        None
-    }
-
-    /// Create a marker instruction that defines a value label.
-    fn gen_value_label_marker(_label: ValueLabel, _reg: Reg) -> Self {
-        Self::gen_nop(0)
-    }
+    /// Is this a safepoint?
+    fn is_safepoint(&self) -> bool;
 
     /// A label-use kind: a type that describes the types of label references that
     /// can occur in an instruction.
@@ -266,35 +235,6 @@ pub enum MachTerminator<'a> {
     Indirect(&'a [MachLabel]),
 }
 
-impl<'a> MachTerminator<'a> {
-    /// Get the successor labels named in a `MachTerminator`.
-    pub fn get_succs(&self) -> SmallVec<[MachLabel; 2]> {
-        let mut ret = smallvec![];
-        match self {
-            &MachTerminator::Uncond(l) => {
-                ret.push(l);
-            }
-            &MachTerminator::Cond(l1, l2) => {
-                ret.push(l1);
-                ret.push(l2);
-            }
-            &MachTerminator::Indirect(ls) => {
-                ret.extend(ls.iter().cloned());
-            }
-            _ => {}
-        }
-        ret
-    }
-
-    /// Is this a terminator?
-    pub fn is_term(&self) -> bool {
-        match self {
-            MachTerminator::None => false,
-            _ => true,
-        }
-    }
-}
-
 /// A trait describing the ability to encode a MachInst into binary machine code.
 pub trait MachInstEmit: MachInst {
     /// Persistent state carried across `emit` invocations.
@@ -302,9 +242,15 @@ pub trait MachInstEmit: MachInst {
     /// Constant information used in `emit` invocations.
     type Info;
     /// Emit the instruction.
-    fn emit(&self, code: &mut MachBuffer<Self>, info: &Self::Info, state: &mut Self::State);
+    fn emit(
+        &self,
+        allocs: &[Allocation],
+        code: &mut MachBuffer<Self>,
+        info: &Self::Info,
+        state: &mut Self::State,
+    );
     /// Pretty-print the instruction.
-    fn pretty_print(&self, mb_rru: Option<&RealRegUniverse>, state: &mut Self::State) -> String;
+    fn pretty_print_inst(&self, allocs: &[Allocation], state: &mut Self::State) -> String;
 }
 
 /// A trait describing the emission state carried between MachInsts when
@@ -409,15 +355,3 @@ pub enum UnwindInfoKind {
     #[cfg(feature = "unwind")]
     Windows,
 }
-
-/// Info about an operation that loads or stores from/to the stack.
-#[derive(Clone, Copy, Debug)]
-pub enum MachInstStackOpInfo {
-    /// Load from an offset from the nominal stack pointer into the given reg.
-    LoadNomSPOff(Reg, i64),
-    /// Store to an offset from the nominal stack pointer from the given reg.
-    StoreNomSPOff(Reg, i64),
-    /// Adjustment of nominal-SP up or down. This value is added to subsequent
-    /// offsets in loads/stores above to produce real-SP offsets.
-    NomSPAdj(i64),
-}
diff --git a/cranelift/codegen/src/machinst/reg.rs b/cranelift/codegen/src/machinst/reg.rs
new file mode 100644
index 0000000000..8b0b835aac
--- /dev/null
+++ b/cranelift/codegen/src/machinst/reg.rs
@@ -0,0 +1,504 @@
+//! Definitions for registers, operands, etc. Provides a thin
+//! interface over the register allocator so that we can more easily
+//! swap it out or shim it when necessary.
+
+use crate::machinst::MachInst;
+use alloc::{string::String, vec::Vec};
+use core::{fmt::Debug, hash::Hash};
+use regalloc2::{Allocation, Operand, PReg, VReg};
+use smallvec::{smallvec, SmallVec};
+
+#[cfg(feature = "enable-serde")]
+use serde::{Deserialize, Serialize};
+
+/// The first 128 vregs (64 int, 64 float/vec) are "pinned" to
+/// physical registers: this means that they are always constrained to
+/// the corresponding register at all use/mod/def sites.
+///
+/// Arbitrary vregs can also be constrained to physical registers at
+/// particular use/def/mod sites, and this is preferable; but pinned
+/// vregs allow us to migrate code that has been written using
+/// RealRegs directly.
+const PINNED_VREGS: usize = 128;
+
+/// Convert a `VReg` to its pinned `PReg`, if any.
+pub fn pinned_vreg_to_preg(vreg: VReg) -> Option<PReg> {
+    if vreg.vreg() < PINNED_VREGS {
+        Some(PReg::from_index(vreg.vreg()))
+    } else {
+        None
+    }
+}
+
+/// Give the first available vreg for generated code (i.e., after all
+/// pinned vregs).
+pub fn first_user_vreg_index() -> usize {
+    // This is just the constant defined above, but we keep the
+    // constant private and expose only this helper function with the
+    // specific name in order to ensure other parts of the code don't
+    // open-code and depend on the index-space scheme.
+    PINNED_VREGS
+}
+
+/// A register named in an instruction. This register can be either a
+/// virtual register or a fixed physical register. It does not have
+/// any constraints applied to it: those can be added later in
+/// `MachInst::get_operands()` when the `Reg`s are converted to
+/// `Operand`s.
+#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
+pub struct Reg(VReg);
+
+impl Reg {
+    /// Get the physical register (`RealReg`), if this register is
+    /// one.
+    pub fn to_real_reg(self) -> Option<RealReg> {
+        if pinned_vreg_to_preg(self.0).is_some() {
+            Some(RealReg(self.0))
+        } else {
+            None
+        }
+    }
+
+    /// Get the virtual (non-physical) register, if this register is
+    /// one.
+    pub fn to_virtual_reg(self) -> Option<VirtualReg> {
+        if pinned_vreg_to_preg(self.0).is_none() {
+            Some(VirtualReg(self.0))
+        } else {
+            None
+        }
+    }
+
+    /// Get the class of this register.
+    pub fn class(self) -> RegClass {
+        self.0.class()
+    }
+
+    /// Is this a real (physical) reg?
+    pub fn is_real(self) -> bool {
+        self.to_real_reg().is_some()
+    }
+
+    /// Is this a virtual reg?
+    pub fn is_virtual(self) -> bool {
+        self.to_virtual_reg().is_some()
+    }
+}
+
+impl std::fmt::Debug for Reg {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        if let Some(rreg) = self.to_real_reg() {
+            let preg: PReg = rreg.into();
+            write!(f, "{}", preg)
+        } else if let Some(vreg) = self.to_virtual_reg() {
+            let vreg: VReg = vreg.into();
+            write!(f, "{}", vreg)
+        } else {
+            unreachable!()
+        }
+    }
+}
+
+/// A real (physical) register. This corresponds to one of the target
+/// ISA's named registers and can be used as an instruction operand.
+#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
+pub struct RealReg(VReg);
+
+impl RealReg {
+    /// Get the class of this register.
+    pub fn class(self) -> RegClass {
+        self.0.class()
+    }
+
+    pub fn hw_enc(self) -> u8 {
+        PReg::from(self).hw_enc() as u8
+    }
+}
+
+impl std::fmt::Debug for RealReg {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        Reg::from(*self).fmt(f)
+    }
+}
+
+/// A virtual register. This can be allocated into a real (physical)
+/// register of the appropriate register class, but which one is not
+/// specified. Virtual registers are used when generating `MachInst`s,
+/// before register allocation occurs, in order to allow us to name as
+/// many register-carried values as necessary.
+#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
+pub struct VirtualReg(VReg);
+
+impl VirtualReg {
+    /// Get the class of this register.
+    pub fn class(self) -> RegClass {
+        self.0.class()
+    }
+
+    pub fn index(self) -> usize {
+        self.0.vreg()
+    }
+}
+
+impl std::fmt::Debug for VirtualReg {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        Reg::from(*self).fmt(f)
+    }
+}
+
+/// A type wrapper that indicates a register type is writable. The
+/// underlying register can be extracted, and the type wrapper can be
+/// built using an arbitrary register. Hence, this type-level wrapper
+/// is not strictly a guarantee. However, "casting" to a writable
+/// register is an explicit operation for which we can
+/// audit. Ordinarily, internal APIs in the compiler backend should
+/// take a `Writable<Reg>` whenever the register is written, and the
+/// usual, frictionless way to get one of these is to allocate a new
+/// temporary.
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
+pub struct Writable<T: Clone + Copy + Debug + PartialEq + Eq + PartialOrd + Ord + Hash> {
+    reg: T,
+}
+
+impl<T: Clone + Copy + Debug + PartialEq + Eq + PartialOrd + Ord + Hash> Writable<T> {
+    /// Explicitly construct a `Writable<T>` from a `T`. As noted in
+    /// the documentation for `Writable`, this is not hidden or
+    /// disallowed from the outside; anyone can perform the "cast";
+    /// but it is explicit so that we can audit the use sites.
+    pub fn from_reg(reg: T) -> Writable<T> {
+        Writable { reg }
+    }
+
+    /// Get the underlying register, which can be read.
+    pub fn to_reg(self) -> T {
+        self.reg
+    }
+
+    /// Map the underlying register to another value or type.
+    pub fn map<U, F>(self, f: F) -> Writable<U>
+    where
+        U: Clone + Copy + Debug + PartialEq + Eq + PartialOrd + Ord + Hash,
+        F: Fn(T) -> U,
+    {
+        Writable { reg: f(self.reg) }
+    }
+}
+
+// Conversions between regalloc2 types (VReg) and our types
+// (VirtualReg, RealReg, Reg).
+
+impl std::convert::From<regalloc2::VReg> for Reg {
+    fn from(vreg: regalloc2::VReg) -> Reg {
+        Reg(vreg)
+    }
+}
+
+impl std::convert::From<regalloc2::VReg> for VirtualReg {
+    fn from(vreg: regalloc2::VReg) -> VirtualReg {
+        debug_assert!(pinned_vreg_to_preg(vreg).is_none());
+        VirtualReg(vreg)
+    }
+}
+
+impl std::convert::From<regalloc2::VReg> for RealReg {
+    fn from(vreg: regalloc2::VReg) -> RealReg {
+        debug_assert!(pinned_vreg_to_preg(vreg).is_some());
+        RealReg(vreg)
+    }
+}
+
+impl std::convert::From<Reg> for regalloc2::VReg {
+    /// Extract the underlying `regalloc2::VReg`. Note that physical
+    /// registers also map to particular (special) VRegs, so this
+    /// method can be used either on virtual or physical `Reg`s.
+    fn from(reg: Reg) -> regalloc2::VReg {
+        reg.0
+    }
+}
+
+impl std::convert::From<VirtualReg> for regalloc2::VReg {
+    fn from(reg: VirtualReg) -> regalloc2::VReg {
+        reg.0
+    }
+}
+
+impl std::convert::From<RealReg> for regalloc2::VReg {
+    fn from(reg: RealReg) -> regalloc2::VReg {
+        reg.0
+    }
+}
+
+impl std::convert::From<RealReg> for regalloc2::PReg {
+    fn from(reg: RealReg) -> regalloc2::PReg {
+        PReg::from_index(reg.0.vreg())
+    }
+}
+
+impl std::convert::From<regalloc2::PReg> for RealReg {
+    fn from(preg: regalloc2::PReg) -> RealReg {
+        RealReg(VReg::new(preg.index(), preg.class()))
+    }
+}
+
+impl std::convert::From<regalloc2::PReg> for Reg {
+    fn from(preg: regalloc2::PReg) -> Reg {
+        Reg(VReg::new(preg.index(), preg.class()))
+    }
+}
+
+impl std::convert::From<RealReg> for Reg {
+    fn from(reg: RealReg) -> Reg {
+        Reg(reg.0)
+    }
+}
+
+impl std::convert::From<VirtualReg> for Reg {
+    fn from(reg: VirtualReg) -> Reg {
+        Reg(reg.0)
+    }
+}
+
+/// A spill slot.
+pub type SpillSlot = regalloc2::SpillSlot;
+
+/// A register class. Each register in the ISA has one class, and the
+/// classes are disjoint. Most modern ISAs will have just two classes:
+/// the integer/general-purpose registers (GPRs), and the float/vector
+/// registers (typically used for both).
+///
+/// Note that unlike some other compiler backend/register allocator
+/// designs, we do not allow for overlapping classes, i.e. registers
+/// that belong to more than one class, because doing so makes the
+/// allocation problem significantly more complex. Instead, when a
+/// register can be addressed under different names for different
+/// sizes (for example), the backend author should pick classes that
+/// denote some fundamental allocation unit that encompasses the whole
+/// register. For example, always allocate 128-bit vector registers
+/// `v0`..`vN`, even though `f32` and `f64` values may use only the
+/// low 32/64 bits of those registers and name them differently.
+pub type RegClass = regalloc2::RegClass;
+
+/// An OperandCollector is a wrapper around a Vec of Operands
+/// (flattened array for a whole sequence of instructions) that
+/// gathers operands from a single instruction and provides the range
+/// in the flattened array.
+#[derive(Debug)]
+pub struct OperandCollector<'a, F: Fn(VReg) -> VReg> {
+    operands: &'a mut Vec<Operand>,
+    operands_start: usize,
+    clobbers: Vec<PReg>,
+    renamer: F,
+}
+
+impl<'a, F: Fn(VReg) -> VReg> OperandCollector<'a, F> {
+    /// Start gathering operands into one flattened operand array.
+    pub fn new(operands: &'a mut Vec<Operand>, renamer: F) -> Self {
+        let operands_start = operands.len();
+        Self {
+            operands,
+            operands_start,
+            clobbers: vec![],
+            renamer,
+        }
+    }
+
+    /// Add an operand.
+    fn add_operand(&mut self, operand: Operand) {
+        let vreg = (self.renamer)(operand.vreg());
+        let operand = Operand::new(vreg, operand.constraint(), operand.kind(), operand.pos());
+        self.operands.push(operand);
+    }
+
+    /// Add a clobber.
+    fn add_clobber(&mut self, clobber: PReg) {
+        self.clobbers.push(clobber);
+    }
+
+    /// Finish the operand collection and return the tuple giving the
+    /// range of indices in the flattened operand array, and the
+    /// clobber array.
+    pub fn finish(self) -> ((u32, u32), Vec<PReg>) {
+        let start = self.operands_start as u32;
+        let end = self.operands.len() as u32;
+        ((start, end), self.clobbers)
+    }
+
+    /// Add a register use, at the start of the instruction (`Before`
+    /// position).
+    pub fn reg_use(&mut self, reg: Reg) {
+        self.add_operand(Operand::reg_use(reg.into()));
+    }
+
+    /// Add multiple register uses.
+    pub fn reg_uses(&mut self, regs: &[Reg]) {
+        for &reg in regs {
+            self.reg_use(reg);
+        }
+    }
+
+    /// Add a register def, at the end of the instruction (`After`
+    /// position). Use only when this def will be written after all
+    /// uses are read.
+    pub fn reg_def(&mut self, reg: Writable<Reg>) {
+        self.add_operand(Operand::reg_def(reg.to_reg().into()));
+    }
+
+    /// Add multiple register defs.
+    pub fn reg_defs(&mut self, regs: &[Writable<Reg>]) {
+        for &reg in regs {
+            self.reg_def(reg);
+        }
+    }
+
+    /// Add a register "early def", which logically occurs at the
+    /// beginning of the instruction, alongside all uses. Use this
+    /// when the def may be written before all uses are read; the
+    /// regalloc will ensure that it does not overwrite any uses.
+    pub fn reg_early_def(&mut self, reg: Writable<Reg>) {
+        self.add_operand(Operand::reg_def_at_start(reg.to_reg().into()));
+    }
+
+    /// Add a register "fixed use", which ties a vreg to a particular
+    /// RealReg at this point.
+    pub fn reg_fixed_use(&mut self, reg: Reg, rreg: Reg) {
+        let rreg = rreg.to_real_reg().expect("fixed reg is not a RealReg");
+        self.add_operand(Operand::reg_fixed_use(reg.into(), rreg.into()));
+    }
+
+    /// Add a register "fixed def", which ties a vreg to a particular
+    /// RealReg at this point.
+    pub fn reg_fixed_def(&mut self, reg: Writable<Reg>, rreg: Reg) {
+        let rreg = rreg.to_real_reg().expect("fixed reg is not a RealReg");
+        self.add_operand(Operand::reg_fixed_def(reg.to_reg().into(), rreg.into()));
+    }
+
+    /// Add a register def that reuses an earlier use-operand's
+    /// allocation. The index of that earlier operand (relative to the
+    /// current instruction's start of operands) must be known.
+    pub fn reg_reuse_def(&mut self, reg: Writable<Reg>, idx: usize) {
+        if reg.to_reg().to_virtual_reg().is_some() {
+            self.add_operand(Operand::reg_reuse_def(reg.to_reg().into(), idx));
+        } else {
+            // Sometimes destination registers that reuse a source are
+            // given with RealReg args. In this case, we assume the
+            // creator of the instruction knows what they are doing
+            // and just emit a normal def to the pinned vreg.
+            self.add_operand(Operand::reg_def(reg.to_reg().into()));
+        }
+    }
+
+    /// Add a register use+def, or "modify", where the reg must stay
+    /// in the same register on the input and output side of the
+    /// instruction.
+    pub fn reg_mod(&mut self, reg: Writable<Reg>) {
+        self.add_operand(Operand::new(
+            reg.to_reg().into(),
+            regalloc2::OperandConstraint::Reg,
+            regalloc2::OperandKind::Mod,
+            regalloc2::OperandPos::Early,
+        ));
+    }
+
+    /// Add a register clobber. This is a register that is written by
+    /// the instruction, so must be reserved (not used) for the whole
+    /// instruction, but is not used afterward.
+    #[allow(dead_code)] // FIXME: use clobbers rather than defs for calls!
+    pub fn reg_clobber(&mut self, reg: Writable<RealReg>) {
+        self.add_clobber(PReg::from(reg.to_reg()));
+    }
+}
+
+/// Use an OperandCollector to count the number of operands on an instruction.
+pub fn count_operands<I: MachInst>(inst: &I) -> usize {
+    let mut ops = vec![];
+    let mut coll = OperandCollector::new(&mut ops, |vreg| vreg);
+    inst.get_operands(&mut coll);
+    let ((start, end), _) = coll.finish();
+    debug_assert_eq!(0, start);
+    end as usize
+}
+
+/// Pretty-print part of a disassembly, with knowledge of
+/// operand/instruction size, and optionally with regalloc
+/// results. This can be used, for example, to print either `rax` or
+/// `eax` for the register by those names on x86-64, depending on a
+/// 64- or 32-bit context.
+pub trait PrettyPrint {
+    fn pretty_print(&self, size_bytes: u8, allocs: &mut AllocationConsumer<'_>) -> String;
+
+    fn pretty_print_default(&self) -> String {
+        self.pretty_print(0, &mut AllocationConsumer::new(&[]))
+    }
+}
+
+/// A consumer of an (optional) list of Allocations along with Regs
+/// that provides RealRegs where available.
+///
+/// This is meant to be used during code emission or
+/// pretty-printing. In at least the latter case, regalloc results may
+/// or may not be available, so we may end up printing either vregs or
+/// rregs. Even pre-regalloc, though, some registers may be RealRegs
+/// that were provided when the instruction was created.
+///
+/// This struct should be used in a specific way: when matching on an
+/// instruction, provide it the Regs in the same order as they were
+/// provided to the OperandCollector.
+#[derive(Clone)]
+pub struct AllocationConsumer<'a> {
+    allocs: std::slice::Iter<'a, Allocation>,
+}
+
+impl<'a> AllocationConsumer<'a> {
+    pub fn new(allocs: &'a [Allocation]) -> Self {
+        Self {
+            allocs: allocs.iter(),
+        }
+    }
+
+    pub fn next(&mut self, pre_regalloc_reg: Reg) -> Reg {
+        let alloc = self.allocs.next();
+        let alloc = alloc.map(|alloc| {
+            Reg::from(
+                alloc
+                    .as_reg()
+                    .expect("Should not have gotten a stack allocation"),
+            )
+        });
+
+        match (pre_regalloc_reg.to_real_reg(), alloc) {
+            (Some(rreg), None) => rreg.into(),
+            (Some(rreg), Some(alloc)) => {
+                debug_assert_eq!(Reg::from(rreg), alloc);
+                alloc
+            }
+            (None, Some(alloc)) => alloc,
+            _ => pre_regalloc_reg,
+        }
+    }
+
+    pub fn next_writable(&mut self, pre_regalloc_reg: Writable<Reg>) -> Writable<Reg> {
+        Writable::from_reg(self.next(pre_regalloc_reg.to_reg()))
+    }
+
+    pub fn next_n(&mut self, count: usize) -> SmallVec<[Allocation; 4]> {
+        let mut allocs = smallvec![];
+        for _ in 0..count {
+            if let Some(next) = self.allocs.next() {
+                allocs.push(*next);
+            } else {
+                return allocs;
+            }
+        }
+        allocs
+    }
+}
+
+impl<'a> std::default::Default for AllocationConsumer<'a> {
+    fn default() -> Self {
+        Self { allocs: [].iter() }
+    }
+}
diff --git a/cranelift/codegen/src/machinst/regmapping.rs b/cranelift/codegen/src/machinst/regmapping.rs
deleted file mode 100644
index 4b51c426bd..0000000000
--- a/cranelift/codegen/src/machinst/regmapping.rs
+++ /dev/null
@@ -1,108 +0,0 @@
-use crate::ir::Type;
-use regalloc::{Reg, RegUsageMapper, Writable};
-use smallvec::SmallVec;
-use std::cell::Cell;
-
-// Define our own register-mapping trait so we can do arbitrary register
-// renaming that are more free form than what `regalloc` constrains us to with
-// its `RegUsageMapper` trait definition.
-pub trait RegMapper {
-    fn get_use(&self, reg: Reg) -> Option<Reg>;
-    fn get_def(&self, reg: Reg) -> Option<Reg>;
-    fn get_mod(&self, reg: Reg) -> Option<Reg>;
-
-    fn map_use(&self, r: &mut Reg) {
-        if let Some(new) = self.get_use(*r) {
-            *r = new;
-        }
-    }
-
-    fn map_def(&self, r: &mut Writable<Reg>) {
-        if let Some(new) = self.get_def(r.to_reg()) {
-            *r = Writable::from_reg(new);
-        }
-    }
-
-    fn map_mod(&self, r: &mut Writable<Reg>) {
-        if let Some(new) = self.get_mod(r.to_reg()) {
-            *r = Writable::from_reg(new);
-        }
-    }
-}
-
-impl<T> RegMapper for T
-where
-    T: RegUsageMapper,
-{
-    fn get_use(&self, reg: Reg) -> Option<Reg> {
-        let v = reg.as_virtual_reg()?;
-        self.get_use(v).map(|r| r.to_reg())
-    }
-
-    fn get_def(&self, reg: Reg) -> Option<Reg> {
-        let v = reg.as_virtual_reg()?;
-        self.get_def(v).map(|r| r.to_reg())
-    }
-
-    fn get_mod(&self, reg: Reg) -> Option<Reg> {
-        let v = reg.as_virtual_reg()?;
-        self.get_mod(v).map(|r| r.to_reg())
-    }
-}
-
-#[derive(Debug, Default)]
-pub struct RegRenamer {
-    // Map of `(old, new, used, ty)` register names. Use a `SmallVec` because
-    // we typically only have one or two renamings.
-    //
-    // The `used` flag indicates whether the mapping has been used for
-    // `get_def`, later used afterwards during `unmapped_defs` to know what
-    // moves need to be generated.
-    renames: SmallVec<[(Reg, Reg, Cell<bool>, Type); 2]>,
-}
-
-impl RegRenamer {
-    /// Adds a new mapping which means that `old` reg should now be called
-    /// `new`. The type of `old` is `ty` as specified.
-    pub fn add_rename(&mut self, old: Reg, new: Reg, ty: Type) {
-        self.renames.push((old, new, Cell::new(false), ty));
-    }
-
-    fn get_rename(&self, reg: Reg, set_used_def: bool) -> Option<Reg> {
-        let (_, new, used_def, _) = self.renames.iter().find(|(old, _, _, _)| reg == *old)?;
-        used_def.set(used_def.get() || set_used_def);
-        Some(*new)
-    }
-
-    /// Returns the list of register mappings, with their type, which were not
-    /// actually mapped.
-    ///
-    /// This list is used because it means that the `old` name for the register
-    /// was never actually defined, so to correctly rename this register the
-    /// caller needs to move `old` into `new`.
-    ///
-    /// This yields tuples of `(old, new, ty)`.
-    pub fn unmapped_defs(&self) -> impl Iterator<Item = (Reg, Reg, Type)> + '_ {
-        self.renames.iter().filter_map(|(old, new, used_def, ty)| {
-            if used_def.get() {
-                None
-            } else {
-                Some((*old, *new, *ty))
-            }
-        })
-    }
-}
-
-impl RegMapper for RegRenamer {
-    fn get_use(&self, reg: Reg) -> Option<Reg> {
-        self.get_rename(reg, false)
-    }
-
-    fn get_def(&self, reg: Reg) -> Option<Reg> {
-        self.get_rename(reg, true)
-    }
-
-    fn get_mod(&self, reg: Reg) -> Option<Reg> {
-        self.get_rename(reg, false)
-    }
-}
diff --git a/cranelift/codegen/src/machinst/valueregs.rs b/cranelift/codegen/src/machinst/valueregs.rs
index df18202ccc..52b3c89896 100644
--- a/cranelift/codegen/src/machinst/valueregs.rs
+++ b/cranelift/codegen/src/machinst/valueregs.rs
@@ -1,7 +1,9 @@
 //! Data structure for tracking the (possibly multiple) registers that hold one
 //! SSA `Value`.
 
-use regalloc::{RealReg, Reg, VirtualReg, Writable};
+use regalloc2::{PReg, VReg};
+
+use super::{RealReg, Reg, VirtualReg, Writable};
 use std::fmt::Debug;
 
 const VALUE_REGS_PARTS: usize = 2;
@@ -35,17 +37,17 @@ pub trait InvalidSentinel: Copy + Eq {
 }
 impl InvalidSentinel for Reg {
     fn invalid_sentinel() -> Self {
-        Reg::invalid()
+        Reg::from(VReg::invalid())
     }
 }
 impl InvalidSentinel for VirtualReg {
     fn invalid_sentinel() -> Self {
-        VirtualReg::invalid()
+        VirtualReg::from(VReg::invalid())
     }
 }
 impl InvalidSentinel for RealReg {
     fn invalid_sentinel() -> Self {
-        RealReg::invalid()
+        RealReg::from(PReg::invalid())
     }
 }
 impl InvalidSentinel for Writable<Reg> {
diff --git a/cranelift/codegen/src/machinst/vcode.rs b/cranelift/codegen/src/machinst/vcode.rs
index d1e585c609..fde128088c 100644
--- a/cranelift/codegen/src/machinst/vcode.rs
+++ b/cranelift/codegen/src/machinst/vcode.rs
@@ -18,46 +18,46 @@
 //! backend pipeline.
 
 use crate::fx::FxHashMap;
-use crate::ir::{self, types, Constant, ConstantData, SourceLoc};
+use crate::fx::FxHashSet;
+use crate::ir::{self, types, Constant, ConstantData, LabelValueLoc, SourceLoc, ValueLabel};
 use crate::machinst::*;
-use crate::settings;
 use crate::timing;
-use regalloc::Function as RegallocFunction;
-use regalloc::Set as RegallocSet;
-use regalloc::{
-    BlockIx, InstIx, PrettyPrint, Range, RegAllocResult, RegClass, RegUsageCollector,
-    RegUsageMapper, SpillSlot, StackmapRequestInfo,
+use crate::ValueLocRange;
+use regalloc2::{
+    Edit, Function as RegallocFunction, InstOrEdit, InstRange, Operand, OperandKind, PReg,
+    RegClass, VReg,
 };
 
 use alloc::boxed::Box;
-use alloc::{borrow::Cow, vec::Vec};
+use alloc::vec::Vec;
 use cranelift_entity::{entity_impl, Keys, PrimaryMap};
-use std::cell::RefCell;
 use std::collections::HashMap;
 use std::fmt;
-use std::iter;
-use std::string::String;
 
 /// Index referring to an instruction in VCode.
-pub type InsnIndex = u32;
+pub type InsnIndex = regalloc2::Inst;
+
 /// Index referring to a basic block in VCode.
-pub type BlockIndex = u32;
+pub type BlockIndex = regalloc2::Block;
 
 /// VCodeInst wraps all requirements for a MachInst to be in VCode: it must be
 /// a `MachInst` and it must be able to emit itself at least to a `SizeCodeSink`.
 pub trait VCodeInst: MachInst + MachInstEmit {}
 impl<I: MachInst + MachInstEmit> VCodeInst for I {}
 
-/// A function in "VCode" (virtualized-register code) form, after lowering.
-/// This is essentially a standard CFG of basic blocks, where each basic block
-/// consists of lowered instructions produced by the machine-specific backend.
+/// A function in "VCode" (virtualized-register code) form, after
+/// lowering.  This is essentially a standard CFG of basic blocks,
+/// where each basic block consists of lowered instructions produced
+/// by the machine-specific backend.
+///
+/// Note that the VCode is immutable once produced, and is not
+/// modified by register allocation in particular. Rather, register
+/// allocation on the `VCode` produces a separate `regalloc2::Output`
+/// struct, and this can be passed to `emit`. `emit` in turn does not
+/// modify the vcode, but produces an `EmitResult`, which contains the
+/// machine code itself, and the associated disassembly and/or
+/// metadata as requested.
 pub struct VCode<I: VCodeInst> {
-    /// Function liveins.
-    liveins: RegallocSet<RealReg>,
-
-    /// Function liveouts.
-    liveouts: RegallocSet<RealReg>,
-
     /// VReg IR-level types.
     vreg_types: Vec<Type>,
 
@@ -67,6 +67,27 @@ pub struct VCode<I: VCodeInst> {
     /// Lowered machine instructions in order corresponding to the original IR.
     insts: Vec<I>,
 
+    /// Operands: pre-regalloc references to virtual registers with
+    /// constraints, in one flattened array. This allows the regalloc
+    /// to efficiently access all operands without requiring expensive
+    /// matches or method invocations on insts.
+    operands: Vec<Operand>,
+
+    /// Operand index ranges: for each instruction in `insts`, there
+    /// is a tuple here providing the range in `operands` for that
+    /// instruction's operands.
+    operand_ranges: Vec<(u32, u32)>,
+
+    /// Clobbers: a sparse map from instruction indices to clobber lists.
+    clobber_ranges: FxHashMap<InsnIndex, (u32, u32)>,
+
+    /// A flat list of clobbered registers, with index ranges held by
+    /// `clobber_ranges`.
+    clobbers: Vec<PReg>,
+
+    /// Move information: for a given InsnIndex, (src, dst) operand pair.
+    is_move: FxHashMap<InsnIndex, (Operand, Operand)>,
+
     /// Source locations for each instruction. (`SourceLoc` is a `u32`, so it is
     /// reasonable to keep one of these per instruction.)
     srclocs: Vec<SourceLoc>,
@@ -77,13 +98,63 @@ pub struct VCode<I: VCodeInst> {
     /// Block instruction indices.
     block_ranges: Vec<(InsnIndex, InsnIndex)>,
 
-    /// Block successors: index range in the successor-list below.
-    block_succ_range: Vec<(usize, usize)>,
+    /// Block successors: index range in the `block_succs_preds` list.
+    block_succ_range: Vec<(u32, u32)>,
 
-    /// Block successor lists, concatenated into one Vec. The `block_succ_range`
-    /// list of tuples above gives (start, end) ranges within this list that
-    /// correspond to each basic block's successors.
-    block_succs: Vec<BlockIx>,
+    /// Block predecessors: index range in the `block_succs_preds` list.
+    block_pred_range: Vec<(u32, u32)>,
+
+    /// Block successor and predecessor lists, concatenated into one
+    /// Vec. The `block_succ_range` and `block_pred_range` lists of
+    /// tuples above give (start, end) ranges within this list that
+    /// correspond to each basic block's successors or predecessors,
+    /// respectively.
+    block_succs_preds: Vec<regalloc2::Block>,
+
+    /// Block parameters: index range in `block_params` below.
+    block_params_range: Vec<(u32, u32)>,
+
+    /// Block parameter lists, concatenated into one vec. The
+    /// `block_params_range` list of tuples above gives (start, end)
+    /// ranges within this list that correspond to each basic block's
+    /// blockparam vregs.
+    block_params: Vec<regalloc2::VReg>,
+
+    /// Outgoing block arguments on branch instructions, concatenated
+    /// into one list.
+    ///
+    /// Note that this is conceptually a 3D array: we have a VReg list
+    /// per block, per successor. We flatten those three dimensions
+    /// into this 1D vec, then store index ranges in two levels of
+    /// indirection.
+    ///
+    /// Indexed by the indices in `branch_block_arg_succ_range`.
+    branch_block_args: Vec<regalloc2::VReg>,
+
+    /// Array of sequences of (start, end) tuples in
+    /// `branch_block_args`, one for each successor; these sequences
+    /// for each block are concatenated.
+    ///
+    /// Indexed by the indices in `branch_block_arg_succ_range`.
+    branch_block_arg_range: Vec<(u32, u32)>,
+
+    /// For a given block, indices in `branch_block_arg_range`
+    /// corresponding to all of its successors.
+    branch_block_arg_succ_range: Vec<(u32, u32)>,
+
+    /// VReg aliases. Each key in this table is translated to its
+    /// value when gathering Operands from instructions. Aliases are
+    /// not chased transitively (we do not further look up the
+    /// translated reg to see if it is another alias).
+    ///
+    /// We use these aliases to rename an instruction's expected
+    /// result vregs to the returned vregs from lowering, which are
+    /// usually freshly-allocated temps.
+    ///
+    /// Operands and branch arguments will already have been
+    /// translated through this alias table; but it helps to make
+    /// sense of instructions when pretty-printed, for example.
+    vreg_aliases: FxHashMap<regalloc2::VReg, regalloc2::VReg>,
 
     /// Block-order information.
     block_order: BlockLoweringOrder,
@@ -95,68 +166,113 @@ pub struct VCode<I: VCodeInst> {
     /// immutable across function compilations within the same module.
     emit_info: I::Info,
 
-    /// Safepoint instruction indices. Filled in post-regalloc. (Prior to
-    /// regalloc, the safepoint instructions are listed in the separate
-    /// `StackmapRequestInfo` held separate from the `VCode`.)
-    safepoint_insns: Vec<InsnIndex>,
+    /// Reference-typed `regalloc2::VReg`s. The regalloc requires
+    /// these in a dense slice (as opposed to querying the
+    /// reftype-status of each vreg) for efficient iteration.
+    reftyped_vregs: Vec<VReg>,
 
-    /// For each safepoint entry in `safepoint_insns`, a list of `SpillSlot`s.
-    /// These are used to generate actual stack maps at emission. Filled in
-    /// post-regalloc.
-    safepoint_slots: Vec<Vec<SpillSlot>>,
-
-    /// Do we generate debug info?
-    generate_debug_info: bool,
-
-    /// Instruction end offsets, instruction indices at each label,
-    /// total buffer size, and start of cold code.  Only present if
-    /// `generate_debug_info` is set.
-    insts_layout: RefCell<InstsLayoutInfo>,
+    /// A set with the same contents as `reftyped_vregs`, in order to
+    /// avoid inserting more than once.
+    reftyped_vregs_set: FxHashSet<VReg>,
 
     /// Constants.
     constants: VCodeConstants,
 
-    /// Are any debug value-labels present? If not, we can skip the
-    /// post-emission analysis.
-    has_value_labels: bool,
+    /// Value labels for debuginfo attached to vregs.
+    debug_value_labels: Vec<(VReg, InsnIndex, InsnIndex, u32)>,
 }
 
-#[derive(Debug, Default)]
-pub(crate) struct InstsLayoutInfo {
-    pub(crate) inst_end_offsets: Vec<CodeOffset>,
-    pub(crate) label_inst_indices: Vec<CodeOffset>,
-    pub(crate) start_of_cold_code: Option<CodeOffset>,
+/// The result of `VCode::emit`. Contains all information computed
+/// during emission: actual machine code, optionally a disassembly,
+/// and optionally metadata about the code layout.
+pub struct EmitResult<I: VCodeInst> {
+    /// The MachBuffer containing the machine code.
+    pub buffer: MachBuffer<I>,
+
+    /// Offset of each basic block, recorded during emission. Computed
+    /// only if `debug_value_labels` is non-empty.
+    pub bb_offsets: Vec<CodeOffset>,
+
+    /// Final basic-block edges, in terms of code offsets of
+    /// bb-starts. Computed only if `debug_value_labels` is non-empty.
+    pub bb_edges: Vec<(CodeOffset, CodeOffset)>,
+
+    /// Final instruction offsets, recorded during emission. Computed
+    /// only if `debug_value_labels` is non-empty.
+    pub inst_offsets: Vec<CodeOffset>,
+
+    /// Final length of function body.
+    pub func_body_len: CodeOffset,
+
+    /// The pretty-printed disassembly, if any. This uses the same
+    /// pretty-printing for MachInsts as the pre-regalloc VCode Debug
+    /// implementation, but additionally includes the prologue and
+    /// epilogue(s), and makes use of the regalloc results.
+    pub disasm: Option<String>,
+
+    /// Offsets of stackslots.
+    pub stackslot_offsets: PrimaryMap<StackSlot, u32>,
+
+    /// Value-labels information (debug metadata).
+    pub value_labels_ranges: ValueLabelsRanges,
+
+    /// Stack frame size.
+    pub frame_size: u32,
 }
 
-/// A builder for a VCode function body. This builder is designed for the
-/// lowering approach that we take: we traverse basic blocks in forward
-/// (original IR) order, but within each basic block, we generate code from
-/// bottom to top; and within each IR instruction that we visit in this reverse
-/// order, we emit machine instructions in *forward* order again.
+/// A builder for a VCode function body.
 ///
-/// Hence, to produce the final instructions in proper order, we perform two
-/// swaps.  First, the machine instructions (`I` instances) are produced in
-/// forward order for an individual IR instruction. Then these are *reversed*
-/// and concatenated to `bb_insns` at the end of the IR instruction lowering.
-/// The `bb_insns` vec will thus contain all machine instructions for a basic
-/// block, in reverse order. Finally, when we're done with a basic block, we
-/// reverse the whole block's vec of instructions again, and concatenate onto
-/// the VCode's insts.
+/// This builder has the ability to accept instructions in either
+/// forward or reverse order, depending on the pass direction that
+/// produces the VCode. The lowering from CLIF to VCode<MachInst>
+/// ordinarily occurs in reverse order (in order to allow instructions
+/// to be lowered only if used, and not merged) so a reversal will
+/// occur at the end of lowering to ensure the VCode is in machine
+/// order.
+///
+/// If built in reverse, block and instruction indices used once the
+/// VCode is built are relative to the final (reversed) order, not the
+/// order of construction. Note that this means we do not know the
+/// final block or instruction indices when building, so we do not
+/// hand them out. (The user is assumed to know them when appending
+/// terminator instructions with successor blocks.)
 pub struct VCodeBuilder<I: VCodeInst> {
     /// In-progress VCode.
     vcode: VCode<I>,
 
-    /// In-progress stack map-request info.
-    stack_map_info: StackmapRequestInfo,
+    /// In what direction is the build occuring?
+    direction: VCodeBuildDirection,
 
     /// Index of the last block-start in the vcode.
-    block_start: InsnIndex,
+    block_start: usize,
 
     /// Start of succs for the current block in the concatenated succs list.
     succ_start: usize,
 
+    /// Start of blockparams for the current block in the concatenated
+    /// blockparams list.
+    block_params_start: usize,
+
+    /// Start of successor blockparam arg list entries in
+    /// the concatenated branch_block_arg_range list.
+    branch_block_arg_succ_start: usize,
+
     /// Current source location.
     cur_srcloc: SourceLoc,
+
+    /// Debug-value label in-progress map, keyed by label. For each
+    /// label, we keep disjoint ranges mapping to vregs. We'll flatten
+    /// this into (vreg, range, label) tuples when done.
+    debug_info: FxHashMap<ValueLabel, Vec<(InsnIndex, InsnIndex, VReg)>>,
+}
+
+/// Direction in which a VCodeBuilder builds VCode.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum VCodeBuildDirection {
+    // TODO: add `Forward` once we need it and can test it adequately.
+    /// Backward-build pass: we expect the producer to call `emit()`
+    /// with instructions in reverse program order within each block.
+    Backward,
 }
 
 impl<I: VCodeInst> VCodeBuilder<I> {
@@ -166,27 +282,19 @@ impl<I: VCodeInst> VCodeBuilder<I> {
         emit_info: I::Info,
         block_order: BlockLoweringOrder,
         constants: VCodeConstants,
+        direction: VCodeBuildDirection,
     ) -> VCodeBuilder<I> {
-        let reftype_class = I::ref_type_regclass(abi.flags());
-        let vcode = VCode::new(
-            abi,
-            emit_info,
-            block_order,
-            constants,
-            /* generate_debug_info = */ true,
-        );
-        let stack_map_info = StackmapRequestInfo {
-            reftype_class,
-            reftyped_vregs: vec![],
-            safepoint_insns: vec![],
-        };
+        let vcode = VCode::new(abi, emit_info, block_order, constants);
 
         VCodeBuilder {
             vcode,
-            stack_map_info,
+            direction,
             block_start: 0,
             succ_start: 0,
+            block_params_start: 0,
+            branch_block_arg_succ_start: 0,
             cur_srcloc: SourceLoc::default(),
+            debug_info: FxHashMap::default(),
         }
     }
 
@@ -202,18 +310,26 @@ impl<I: VCodeInst> VCodeBuilder<I> {
 
     /// Set the type of a VReg.
     pub fn set_vreg_type(&mut self, vreg: VirtualReg, ty: Type) {
-        if self.vcode.vreg_types.len() <= vreg.get_index() {
+        if self.vcode.vreg_types.len() <= vreg.index() {
             self.vcode
                 .vreg_types
-                .resize(vreg.get_index() + 1, ir::types::I8);
+                .resize(vreg.index() + 1, ir::types::I8);
         }
-        self.vcode.vreg_types[vreg.get_index()] = ty;
+        self.vcode.vreg_types[vreg.index()] = ty;
         if is_reftype(ty) {
-            self.stack_map_info.reftyped_vregs.push(vreg);
+            let vreg: VReg = vreg.into();
+            if self.vcode.reftyped_vregs_set.insert(vreg) {
+                self.vcode.reftyped_vregs.push(vreg);
+            }
             self.vcode.have_ref_values = true;
         }
     }
 
+    /// Get the type of a VReg.
+    pub fn get_vreg_type(&self, vreg: VirtualReg) -> Type {
+        self.vcode.vreg_types[vreg.index()]
+    }
+
     /// Set the current block as the entry block.
     pub fn set_entry(&mut self, block: BlockIndex) {
         self.vcode.entry = block;
@@ -223,47 +339,78 @@ impl<I: VCodeInst> VCodeBuilder<I> {
     /// for IR insts and prior to ending the function (building the VCode).
     pub fn end_bb(&mut self) {
         let start_idx = self.block_start;
-        let end_idx = self.vcode.insts.len() as InsnIndex;
+        let end_idx = self.vcode.insts.len();
         self.block_start = end_idx;
         // Add the instruction index range to the list of blocks.
-        self.vcode.block_ranges.push((start_idx, end_idx));
+        self.vcode
+            .block_ranges
+            .push((InsnIndex::new(start_idx), InsnIndex::new(end_idx)));
         // End the successors list.
-        let succ_end = self.vcode.block_succs.len();
+        let succ_end = self.vcode.block_succs_preds.len();
         self.vcode
             .block_succ_range
-            .push((self.succ_start, succ_end));
+            .push((self.succ_start as u32, succ_end as u32));
         self.succ_start = succ_end;
+        // End the blockparams list.
+        let block_params_end = self.vcode.block_params.len();
+        self.vcode
+            .block_params_range
+            .push((self.block_params_start as u32, block_params_end as u32));
+        self.block_params_start = block_params_end;
+        // End the branch blockparam args list.
+        let branch_block_arg_succ_end = self.vcode.branch_block_arg_range.len();
+        self.vcode.branch_block_arg_succ_range.push((
+            self.branch_block_arg_succ_start as u32,
+            branch_block_arg_succ_end as u32,
+        ));
+        self.branch_block_arg_succ_start = branch_block_arg_succ_end;
     }
 
-    /// Push an instruction for the current BB and current IR inst within the BB.
-    pub fn push(&mut self, insn: I, is_safepoint: bool) {
+    pub fn add_block_param(&mut self, param: VirtualReg, ty: Type) {
+        self.set_vreg_type(param, ty);
+        self.vcode.block_params.push(param.into());
+    }
+
+    pub fn add_branch_args_for_succ(&mut self, args: &[Reg]) {
+        let start = self.vcode.branch_block_args.len();
+        self.vcode
+            .branch_block_args
+            .extend(args.iter().map(|&arg| VReg::from(arg)));
+        let end = self.vcode.branch_block_args.len();
+        self.vcode
+            .branch_block_arg_range
+            .push((start as u32, end as u32));
+    }
+
+    /// Push an instruction for the current BB and current IR inst
+    /// within the BB.
+    pub fn push(&mut self, insn: I) {
         match insn.is_term() {
             MachTerminator::None | MachTerminator::Ret => {}
             MachTerminator::Uncond(target) => {
-                self.vcode.block_succs.push(BlockIx::new(target.get()));
+                self.vcode
+                    .block_succs_preds
+                    .push(BlockIndex::new(target.get() as usize));
             }
             MachTerminator::Cond(true_branch, false_branch) => {
-                self.vcode.block_succs.push(BlockIx::new(true_branch.get()));
                 self.vcode
-                    .block_succs
-                    .push(BlockIx::new(false_branch.get()));
+                    .block_succs_preds
+                    .push(BlockIndex::new(true_branch.get() as usize));
+                self.vcode
+                    .block_succs_preds
+                    .push(BlockIndex::new(false_branch.get() as usize));
             }
             MachTerminator::Indirect(targets) => {
                 for target in targets {
-                    self.vcode.block_succs.push(BlockIx::new(target.get()));
+                    self.vcode
+                        .block_succs_preds
+                        .push(BlockIndex::new(target.get() as usize));
                 }
             }
         }
-        if insn.defines_value_label().is_some() {
-            self.vcode.has_value_labels = true;
-        }
+
         self.vcode.insts.push(insn);
         self.vcode.srclocs.push(self.cur_srcloc);
-        if is_safepoint {
-            self.stack_map_info
-                .safepoint_insns
-                .push(InstIx::new((self.vcode.insts.len() - 1) as u32));
-        }
     }
 
     /// Set the current source location.
@@ -271,26 +418,210 @@ impl<I: VCodeInst> VCodeBuilder<I> {
         self.cur_srcloc = srcloc;
     }
 
+    /// Add a debug value label to a register.
+    pub fn add_value_label(&mut self, reg: Reg, label: ValueLabel) {
+        // We'll fix up labels in reverse(). Because we're generating
+        // code bottom-to-top, the liverange of the label goes *from*
+        // the last index at which was defined (or 0, which is the end
+        // of the eventual function) *to* just this instruction, and
+        // no further.
+        let inst = InsnIndex::new(self.vcode.insts.len());
+        let labels = self.debug_info.entry(label).or_insert_with(|| vec![]);
+        let last = labels
+            .last()
+            .map(|(_start, end, _vreg)| *end)
+            .unwrap_or(InsnIndex::new(0));
+        labels.push((last, inst, reg.into()));
+    }
+
+    pub fn set_vreg_alias(&mut self, from: Reg, to: Reg) {
+        let from = from.into();
+        let resolved_to = self.resolve_vreg_alias(to.into());
+        // Disallow cycles (see below).
+        assert_ne!(resolved_to, from);
+        self.vcode.vreg_aliases.insert(from, resolved_to);
+    }
+
+    pub fn resolve_vreg_alias(&self, from: regalloc2::VReg) -> regalloc2::VReg {
+        Self::resolve_vreg_alias_impl(&self.vcode.vreg_aliases, from)
+    }
+
+    fn resolve_vreg_alias_impl(
+        aliases: &FxHashMap<regalloc2::VReg, regalloc2::VReg>,
+        from: regalloc2::VReg,
+    ) -> regalloc2::VReg {
+        // We prevent cycles from existing by resolving targets of
+        // aliases eagerly before setting them. If the target resolves
+        // to the origin of the alias, then a cycle would be created
+        // and the alias is disallowed. Because of the structure of
+        // SSA code (one instruction can refer to another's defs but
+        // not vice-versa, except indirectly through
+        // phis/blockparams), cycles should not occur as we use
+        // aliases to redirect vregs to the temps that actually define
+        // them.
+
+        let mut vreg = from;
+        while let Some(to) = aliases.get(&vreg) {
+            vreg = *to;
+        }
+        vreg
+    }
+
     /// Access the constants.
     pub fn constants(&mut self) -> &mut VCodeConstants {
         &mut self.vcode.constants
     }
 
-    /// Build the final VCode, returning the vcode itself as well as auxiliary
-    /// information, such as the stack map request information.
-    pub fn build(self) -> (VCode<I>, StackmapRequestInfo) {
-        // TODO: come up with an abstraction for "vcode and auxiliary data". The
-        // auxiliary data needs to be separate from the vcode so that it can be
-        // referenced as the vcode is mutated (e.g. by the register allocator).
-        (self.vcode, self.stack_map_info)
-    }
-}
+    fn compute_preds_from_succs(&mut self) {
+        // Compute predecessors from successors. In order to gather
+        // all preds for a block into a contiguous sequence, we build
+        // a list of (succ, pred) tuples and then sort.
+        let mut succ_pred_edges: Vec<(BlockIndex, BlockIndex)> =
+            Vec::with_capacity(self.vcode.block_succs_preds.len());
+        for (pred, &(start, end)) in self.vcode.block_succ_range.iter().enumerate() {
+            let pred = BlockIndex::new(pred);
+            for i in start..end {
+                let succ = BlockIndex::new(self.vcode.block_succs_preds[i as usize].index());
+                succ_pred_edges.push((succ, pred));
+            }
+        }
+        succ_pred_edges.sort_unstable();
 
-fn is_redundant_move<I: VCodeInst>(insn: &I) -> bool {
-    if let Some((to, from)) = insn.is_move() {
-        to.to_reg() == from
-    } else {
-        false
+        let mut i = 0;
+        for succ in 0..self.vcode.num_blocks() {
+            let succ = BlockIndex::new(succ);
+            let start = self.vcode.block_succs_preds.len();
+            while i < succ_pred_edges.len() && succ_pred_edges[i].0 == succ {
+                let pred = succ_pred_edges[i].1;
+                self.vcode.block_succs_preds.push(pred);
+                i += 1;
+            }
+            let end = self.vcode.block_succs_preds.len();
+            self.vcode.block_pred_range.push((start as u32, end as u32));
+        }
+    }
+
+    /// Called once, when a build in Backward order is complete, to
+    /// perform the overall reversal (into final forward order) and
+    /// finalize metadata accordingly.
+    fn reverse_and_finalize(&mut self) {
+        let n_insts = self.vcode.insts.len();
+        if n_insts == 0 {
+            return;
+        }
+
+        // Reverse the per-block and per-inst sequences.
+        self.vcode.block_ranges.reverse();
+        // block_params_range is indexed by block (and blocks were
+        // traversed in reverse) so we reverse it; but block-param
+        // sequences in the concatenated vec can remain in reverse
+        // order (it is effectively an arena of arbitrarily-placed
+        // referenced sequences).
+        self.vcode.block_params_range.reverse();
+        // Likewise, we reverse block_succ_range, but the block_succ
+        // concatenated array can remain as-is.
+        self.vcode.block_succ_range.reverse();
+        self.vcode.insts.reverse();
+        self.vcode.srclocs.reverse();
+        // Likewise, branch_block_arg_succ_range is indexed by block
+        // so must be reversed.
+        self.vcode.branch_block_arg_succ_range.reverse();
+
+        // To translate an instruction index *endpoint* in reversed
+        // order to forward order, compute `n_insts - i`.
+        //
+        // Why not `n_insts - 1 - i`? That would be correct to
+        // translate an individual instruction index (for ten insts 0
+        // to 9 inclusive, inst 0 becomes 9, and inst 9 becomes
+        // 0). But for the usual inclusive-start, exclusive-end range
+        // idiom, inclusive starts become exclusive ends and
+        // vice-versa, so e.g. an (inclusive) start of 0 becomes an
+        // (exclusive) end of 10.
+        let translate = |inst: InsnIndex| InsnIndex::new(n_insts - inst.index());
+
+        // Edit the block-range instruction indices.
+        for tuple in &mut self.vcode.block_ranges {
+            let (start, end) = *tuple;
+            *tuple = (translate(end), translate(start)); // Note reversed order.
+        }
+
+        // Generate debug-value labels based on per-label maps.
+        for (label, tuples) in &self.debug_info {
+            for &(start, end, vreg) in tuples {
+                let vreg = self.resolve_vreg_alias(vreg);
+                let fwd_start = translate(end);
+                let fwd_end = translate(start);
+                self.vcode
+                    .debug_value_labels
+                    .push((vreg, fwd_start, fwd_end, label.as_u32()));
+            }
+        }
+
+        // Now sort debug value labels by VReg, as required
+        // by regalloc2.
+        self.vcode
+            .debug_value_labels
+            .sort_unstable_by_key(|(vreg, _, _, _)| *vreg);
+    }
+
+    fn collect_operands(&mut self) {
+        for (i, insn) in self.vcode.insts.iter().enumerate() {
+            // Push operands from the instruction onto the operand list.
+            //
+            // We rename through the vreg alias table as we collect
+            // the operands. This is better than a separate post-pass
+            // over operands, because it has more cache locality:
+            // operands only need to pass through L1 once. This is
+            // also better than renaming instructions'
+            // operands/registers while lowering, because here we only
+            // need to do the `match` over the instruction to visit
+            // its register fields (which is slow, branchy code) once.
+
+            let vreg_aliases = &self.vcode.vreg_aliases;
+            let mut op_collector = OperandCollector::new(&mut self.vcode.operands, |vreg| {
+                Self::resolve_vreg_alias_impl(vreg_aliases, vreg)
+            });
+            insn.get_operands(&mut op_collector);
+            let (ops, clobbers) = op_collector.finish();
+            self.vcode.operand_ranges.push(ops);
+
+            if !clobbers.is_empty() {
+                let start = self.vcode.clobbers.len();
+                self.vcode.clobbers.extend(clobbers.into_iter());
+                let end = self.vcode.clobbers.len();
+                self.vcode
+                    .clobber_ranges
+                    .insert(InsnIndex::new(i), (start as u32, end as u32));
+            }
+
+            if let Some((dst, src)) = insn.is_move() {
+                let src = Operand::reg_use(Self::resolve_vreg_alias_impl(vreg_aliases, src.into()));
+                let dst = Operand::reg_def(Self::resolve_vreg_alias_impl(
+                    vreg_aliases,
+                    dst.to_reg().into(),
+                ));
+                // Note that regalloc2 requires these in (src, dst) order.
+                self.vcode.is_move.insert(InsnIndex::new(i), (src, dst));
+            }
+        }
+
+        // Translate blockparam args via the vreg aliases table as well.
+        for arg in &mut self.vcode.branch_block_args {
+            let new_arg = Self::resolve_vreg_alias_impl(&self.vcode.vreg_aliases, *arg);
+            log::trace!("operandcollector: block arg {:?} -> {:?}", arg, new_arg);
+            *arg = new_arg;
+        }
+    }
+
+    /// Build the final VCode.
+    pub fn build(mut self) -> VCode<I> {
+        if self.direction == VCodeBuildDirection::Backward {
+            self.reverse_and_finalize();
+        }
+        self.collect_operands();
+        self.compute_preds_from_succs();
+        self.vcode.debug_value_labels.sort_unstable();
+        self.vcode
     }
 }
 
@@ -306,228 +637,222 @@ impl<I: VCodeInst> VCode<I> {
         emit_info: I::Info,
         block_order: BlockLoweringOrder,
         constants: VCodeConstants,
-        generate_debug_info: bool,
     ) -> VCode<I> {
+        let n_blocks = block_order.lowered_order().len();
         VCode {
-            liveins: abi.liveins(),
-            liveouts: abi.liveouts(),
             vreg_types: vec![],
             have_ref_values: false,
-            insts: vec![],
-            srclocs: vec![],
-            entry: 0,
-            block_ranges: vec![],
-            block_succ_range: vec![],
-            block_succs: vec![],
+            insts: Vec::with_capacity(10 * n_blocks),
+            operands: Vec::with_capacity(30 * n_blocks),
+            operand_ranges: Vec::with_capacity(10 * n_blocks),
+            clobber_ranges: FxHashMap::default(),
+            clobbers: vec![],
+            is_move: FxHashMap::default(),
+            srclocs: Vec::with_capacity(10 * n_blocks),
+            entry: BlockIndex::new(0),
+            block_ranges: Vec::with_capacity(n_blocks),
+            block_succ_range: Vec::with_capacity(n_blocks),
+            block_succs_preds: Vec::with_capacity(2 * n_blocks),
+            block_pred_range: Vec::with_capacity(n_blocks),
+            block_params_range: Vec::with_capacity(n_blocks),
+            block_params: Vec::with_capacity(5 * n_blocks),
+            branch_block_args: Vec::with_capacity(10 * n_blocks),
+            branch_block_arg_range: Vec::with_capacity(2 * n_blocks),
+            branch_block_arg_succ_range: Vec::with_capacity(n_blocks),
             block_order,
             abi,
             emit_info,
-            safepoint_insns: vec![],
-            safepoint_slots: vec![],
-            generate_debug_info,
-            insts_layout: RefCell::new(Default::default()),
+            reftyped_vregs: vec![],
+            reftyped_vregs_set: FxHashSet::default(),
             constants,
-            has_value_labels: false,
+            debug_value_labels: vec![],
+            vreg_aliases: FxHashMap::with_capacity_and_hasher(10 * n_blocks, Default::default()),
         }
     }
 
-    /// Returns the flags controlling this function's compilation.
-    pub fn flags(&self) -> &settings::Flags {
-        self.abi.flags()
-    }
-
-    /// Get the IR-level type of a VReg.
-    pub fn vreg_type(&self, vreg: VirtualReg) -> Type {
-        self.vreg_types[vreg.get_index()]
-    }
-
     /// Get the number of blocks. Block indices will be in the range `0 ..
     /// (self.num_blocks() - 1)`.
     pub fn num_blocks(&self) -> usize {
         self.block_ranges.len()
     }
 
-    /// Stack frame size for the full function's body.
-    pub fn frame_size(&self) -> u32 {
-        self.abi.frame_size()
-    }
-
     /// Get the successors for a block.
-    pub fn succs(&self, block: BlockIndex) -> &[BlockIx] {
-        let (start, end) = self.block_succ_range[block as usize];
-        &self.block_succs[start..end]
+    pub fn succs(&self, block: BlockIndex) -> &[BlockIndex] {
+        let (start, end) = self.block_succ_range[block.index()];
+        &self.block_succs_preds[start as usize..end as usize]
     }
 
-    /// Take the results of register allocation, with a sequence of
-    /// instructions including spliced fill/reload/move instructions, and replace
-    /// the VCode with them.
-    pub fn replace_insns_from_regalloc(&mut self, result: RegAllocResult<Self>) {
-        // Record the spillslot count and clobbered registers for the ABI/stack
-        // setup code.
-        self.abi.set_num_spillslots(result.num_spill_slots as usize);
-        self.abi
-            .set_clobbered(result.clobbered_registers.map(|r| Writable::from_reg(*r)));
+    fn compute_clobbers(&self, regalloc: &regalloc2::Output) -> Vec<Writable<RealReg>> {
+        // Compute clobbered registers.
+        let mut clobbered = vec![];
+        let mut clobbered_set = FxHashSet::default();
 
-        let mut final_insns = vec![];
-        let mut final_block_ranges = vec![(0, 0); self.num_blocks()];
-        let mut final_srclocs = vec![];
-        let mut final_safepoint_insns = vec![];
-        let mut safept_idx = 0;
-
-        assert!(result.target_map.elems().len() == self.num_blocks());
-        for block in 0..self.num_blocks() {
-            let start = result.target_map.elems()[block].get() as usize;
-            let end = if block == self.num_blocks() - 1 {
-                result.insns.len()
-            } else {
-                result.target_map.elems()[block + 1].get() as usize
-            };
-            let block = block as BlockIndex;
-            let final_start = final_insns.len() as InsnIndex;
-
-            if block == self.entry {
-                // Start with the prologue.
-                let prologue = self.abi.gen_prologue();
-                let len = prologue.len();
-                final_insns.extend(prologue.into_iter());
-                final_srclocs.extend(iter::repeat(SourceLoc::default()).take(len));
-            }
-
-            for i in start..end {
-                let insn = &result.insns[i];
-
-                // Elide redundant moves at this point (we only know what is
-                // redundant once registers are allocated).
-                if is_redundant_move(insn) {
-                    continue;
-                }
-
-                // Is there a srcloc associated with this insn? Look it up based on original
-                // instruction index (if new insn corresponds to some original insn, i.e., is not
-                // an inserted load/spill/move).
-                let orig_iix = result.orig_insn_map[InstIx::new(i as u32)];
-                let srcloc = if orig_iix.is_invalid() {
-                    SourceLoc::default()
-                } else {
-                    self.srclocs[orig_iix.get() as usize]
-                };
-
-                // Whenever encountering a return instruction, replace it
-                // with the epilogue.
-                let is_ret = insn.is_term() == MachTerminator::Ret;
-                if is_ret {
-                    let epilogue = self.abi.gen_epilogue();
-                    let len = epilogue.len();
-                    final_insns.extend(epilogue.into_iter());
-                    final_srclocs.extend(iter::repeat(srcloc).take(len));
-                } else {
-                    final_insns.push(insn.clone());
-                    final_srclocs.push(srcloc);
-                }
-
-                // Was this instruction a safepoint instruction? Add its final
-                // index to the safepoint insn-index list if so.
-                if safept_idx < result.new_safepoint_insns.len()
-                    && (result.new_safepoint_insns[safept_idx].get() as usize) == i
-                {
-                    let idx = final_insns.len() - 1;
-                    final_safepoint_insns.push(idx as InsnIndex);
-                    safept_idx += 1;
+        // All moves are included in clobbers.
+        for edit in &regalloc.edits {
+            let Edit::Move { to, .. } = edit.1;
+            if let Some(preg) = to.as_reg() {
+                let reg = RealReg::from(preg);
+                if clobbered_set.insert(reg) {
+                    clobbered.push(Writable::from_reg(reg));
                 }
             }
-
-            let final_end = final_insns.len() as InsnIndex;
-            final_block_ranges[block as usize] = (final_start, final_end);
         }
 
-        debug_assert!(final_insns.len() == final_srclocs.len());
+        for (i, (start, end)) in self.operand_ranges.iter().enumerate() {
+            // Skip this instruction if not "included in clobbers" as
+            // per the MachInst. (Some backends use this to implement
+            // ABI specifics; e.g., excluding calls of the same ABI as
+            // the current function from clobbers, because by
+            // definition everything clobbered by the call can be
+            // clobbered by this function without saving as well.)
+            if !self.insts[i].is_included_in_clobbers() {
+                continue;
+            }
 
-        self.insts = final_insns;
-        self.srclocs = final_srclocs;
-        self.block_ranges = final_block_ranges;
-        self.safepoint_insns = final_safepoint_insns;
+            let start = *start as usize;
+            let end = *end as usize;
+            let operands = &self.operands[start..end];
+            let allocs = &regalloc.allocs[start..end];
+            for (operand, alloc) in operands.iter().zip(allocs.iter()) {
+                // We're interested only in writes (Mods or Defs).
+                if operand.kind() == OperandKind::Use {
+                    continue;
+                }
+                if let Some(preg) = alloc.as_reg() {
+                    let reg = RealReg::from(preg);
+                    if clobbered_set.insert(reg) {
+                        clobbered.push(Writable::from_reg(reg));
+                    }
+                }
+            }
 
-        // Save safepoint slot-lists. These will be passed to the `EmitState`
-        // for the machine backend during emission so that it can do
-        // target-specific translations of slot numbers to stack offsets.
-        self.safepoint_slots = result.stackmaps;
+            // Also add explicitly-clobbered registers.
+            if let Some(&(start, end)) = self.clobber_ranges.get(&InsnIndex::new(i)) {
+                let inst_clobbers = &self.clobbers[(start as usize)..(end as usize)];
+                for &preg in inst_clobbers {
+                    let reg = RealReg::from(preg);
+                    if clobbered_set.insert(reg) {
+                        clobbered.push(Writable::from_reg(reg));
+                    }
+                }
+            }
+        }
+
+        clobbered
     }
 
-    /// Emit the instructions to a `MachBuffer`, containing fixed-up code and external
-    /// reloc/trap/etc. records ready for use.
+    /// Emit the instructions to a `MachBuffer`, containing fixed-up
+    /// code and external reloc/trap/etc. records ready for use. Takes
+    /// the regalloc results as well.
+    ///
+    /// Returns the machine code itself, and optionally metadata
+    /// and/or a disassembly, as an `EmitResult`. The `VCode` itself
+    /// is consumed by the emission process.
     pub fn emit(
-        &self,
-    ) -> (
-        MachBuffer<I>,
-        Vec<CodeOffset>,
-        Vec<(CodeOffset, CodeOffset)>,
-    )
+        mut self,
+        regalloc: &regalloc2::Output,
+        want_disasm: bool,
+        want_metadata: bool,
+    ) -> EmitResult<I>
     where
         I: MachInstEmit,
     {
+        // To write into disasm string.
+        use core::fmt::Write;
+
         let _tt = timing::vcode_emit();
         let mut buffer = MachBuffer::new();
-        let mut state = I::State::new(&*self.abi);
-        let cfg_metadata = self.flags().machine_code_cfg_info();
         let mut bb_starts: Vec<Option<CodeOffset>> = vec![];
 
         // The first M MachLabels are reserved for block indices, the next N MachLabels for
         // constants.
-        buffer.reserve_labels_for_blocks(self.num_blocks() as BlockIndex);
+        buffer.reserve_labels_for_blocks(self.num_blocks());
         buffer.reserve_labels_for_constants(&self.constants);
 
-        let mut inst_end_offsets = vec![0; self.insts.len()];
-        let mut label_inst_indices = vec![0; self.num_blocks()];
-
-        // Map from instruction index to index in
-        // `safepoint_slots`. We need this because we emit
-        // instructions out-of-order, while the safepoint_insns /
-        // safepoint_slots data structures are sorted in instruction
-        // order.
-        let mut safepoint_indices: FxHashMap<u32, usize> = FxHashMap::default();
-        for (safepoint_idx, iix) in self.safepoint_insns.iter().enumerate() {
-            // Disregard safepoints that ended up having no live refs.
-            if self.safepoint_slots[safepoint_idx].len() > 0 {
-                safepoint_indices.insert(*iix, safepoint_idx);
-            }
-        }
-
         // Construct the final order we emit code in: cold blocks at the end.
         let mut final_order: SmallVec<[BlockIndex; 16]> = smallvec![];
         let mut cold_blocks: SmallVec<[BlockIndex; 16]> = smallvec![];
         for block in 0..self.num_blocks() {
-            let block = block as BlockIndex;
+            let block = BlockIndex::new(block);
             if self.block_order.is_cold(block) {
                 cold_blocks.push(block);
             } else {
                 final_order.push(block);
             }
         }
-        let first_cold_block = cold_blocks.first().cloned();
         final_order.extend(cold_blocks.clone());
 
+        // Compute/save info we need for the prologue: clobbers and
+        // number of spillslots.
+        //
+        // We clone `abi` here because we will mutate it as we
+        // generate the prologue and set other info, but we can't
+        // mutate `VCode`. The info it usually carries prior to
+        // setting clobbers is fairly minimal so this should be
+        // relatively cheap.
+        let clobbers = self.compute_clobbers(regalloc);
+        self.abi.set_num_spillslots(regalloc.num_spillslots);
+        self.abi.set_clobbered(clobbers);
+
+        // We need to generate the prologue in order to get the ABI
+        // object into the right state first. We'll emit it when we
+        // hit the right block below.
+        let prologue_insts = self.abi.gen_prologue();
+
         // Emit blocks.
         let mut cur_srcloc = None;
         let mut last_offset = None;
-        let mut start_of_cold_code = None;
+        let mut inst_offsets = vec![];
+        let mut state = I::State::new(&*self.abi);
+
+        let mut disasm = String::new();
+
+        if !self.debug_value_labels.is_empty() {
+            inst_offsets.resize(self.insts.len(), 0);
+        }
+
         for block in final_order {
+            log::trace!("emitting block {:?}", block);
             let new_offset = I::align_basic_block(buffer.cur_offset());
             while new_offset > buffer.cur_offset() {
                 // Pad with NOPs up to the aligned block offset.
                 let nop = I::gen_nop((new_offset - buffer.cur_offset()) as usize);
-                nop.emit(&mut buffer, &self.emit_info, &mut Default::default());
+                nop.emit(&[], &mut buffer, &self.emit_info, &mut Default::default());
             }
             assert_eq!(buffer.cur_offset(), new_offset);
 
-            if Some(block) == first_cold_block {
-                start_of_cold_code = Some(buffer.cur_offset());
+            let do_emit = |inst: &I,
+                           allocs: &[Allocation],
+                           disasm: &mut String,
+                           buffer: &mut MachBuffer<I>,
+                           state: &mut I::State| {
+                if want_disasm {
+                    let mut s = state.clone();
+                    writeln!(disasm, "  {}", inst.pretty_print_inst(allocs, &mut s)).unwrap();
+                }
+                inst.emit(allocs, buffer, &self.emit_info, state);
+            };
+
+            // Is this the first block? Emit the prologue directly if so.
+            if block == self.entry {
+                log::trace!(" -> entry block");
+                buffer.start_srcloc(SourceLoc::default());
+                state.pre_sourceloc(SourceLoc::default());
+                for inst in &prologue_insts {
+                    do_emit(&inst, &[], &mut disasm, &mut buffer, &mut state);
+                }
+                buffer.end_srcloc();
             }
 
-            let (start, end) = self.block_ranges[block as usize];
-            buffer.bind_label(MachLabel::from_block(block));
-            label_inst_indices[block as usize] = start;
+            // Now emit the regular block body.
 
-            if cfg_metadata {
+            buffer.bind_label(MachLabel::from_block(block));
+
+            if want_disasm {
+                writeln!(&mut disasm, "block{}:", block.index()).unwrap();
+            }
+
+            if want_metadata {
                 // Track BB starts. If we have backed up due to MachBuffer
                 // branch opts, note that the removed blocks were removed.
                 let cur_offset = buffer.cur_offset();
@@ -543,38 +868,139 @@ impl<I: VCodeInst> VCode<I> {
                 last_offset = Some(cur_offset);
             }
 
-            for iix in start..end {
-                let srcloc = self.srclocs[iix as usize];
-                if cur_srcloc != Some(srcloc) {
-                    if cur_srcloc.is_some() {
-                        buffer.end_srcloc();
-                    }
-                    buffer.start_srcloc(srcloc);
-                    cur_srcloc = Some(srcloc);
-                }
-                state.pre_sourceloc(cur_srcloc.unwrap_or(SourceLoc::default()));
+            for inst_or_edit in regalloc.block_insts_and_edits(&self, block) {
+                match inst_or_edit {
+                    InstOrEdit::Inst(iix) => {
+                        if !self.debug_value_labels.is_empty() {
+                            // If we need to produce debug info,
+                            // record the offset of each instruction
+                            // so that we can translate value-label
+                            // ranges to machine-code offsets.
 
-                if let Some(safepoint_idx) = safepoint_indices.get(&iix) {
-                    let stack_map = self
-                        .abi
-                        .spillslots_to_stack_map(&self.safepoint_slots[*safepoint_idx][..], &state);
-                    state.pre_safepoint(stack_map);
-                }
+                            // Cold blocks violate monotonicity
+                            // assumptions elsewhere (that
+                            // instructions in inst-index order are in
+                            // order in machine code), so we omit
+                            // their offsets here. Value-label range
+                            // generation below will skip empty ranges
+                            // and ranges with to-offsets of zero.
+                            if !self.block_order.is_cold(block) {
+                                inst_offsets[iix.index()] = buffer.cur_offset();
+                            }
+                        }
 
-                self.insts[iix as usize].emit(&mut buffer, &self.emit_info, &mut state);
+                        if self.insts[iix.index()].is_move().is_some() {
+                            // Skip moves in the pre-regalloc program;
+                            // all of these are incorporated by the
+                            // regalloc into its unified move handling
+                            // and they come out the other end, if
+                            // still needed (not elided), as
+                            // regalloc-inserted moves.
+                            continue;
+                        }
 
-                if self.generate_debug_info {
-                    // Buffer truncation may have happened since last inst append; trim inst-end
-                    // layout info as appropriate.
-                    let l = &mut inst_end_offsets[0..iix as usize];
-                    for end in l.iter_mut().rev() {
-                        if *end > buffer.cur_offset() {
-                            *end = buffer.cur_offset();
+                        // Update the srcloc at this point in the buffer.
+                        let srcloc = self.srclocs[iix.index()];
+                        if cur_srcloc != Some(srcloc) {
+                            if cur_srcloc.is_some() {
+                                buffer.end_srcloc();
+                            }
+                            buffer.start_srcloc(srcloc);
+                            cur_srcloc = Some(srcloc);
+                        }
+                        state.pre_sourceloc(cur_srcloc.unwrap_or(SourceLoc::default()));
+
+                        // If this is a safepoint, compute a stack map
+                        // and pass it to the emit state.
+                        if self.insts[iix.index()].is_safepoint() {
+                            let mut safepoint_slots: SmallVec<[SpillSlot; 8]> = smallvec![];
+                            // Find the contiguous range of
+                            // (progpoint, allocation) safepoint slot
+                            // records in `regalloc.safepoint_slots`
+                            // for this instruction index.
+                            let safepoint_slots_start = regalloc
+                                .safepoint_slots
+                                .binary_search_by(|(progpoint, _alloc)| {
+                                    if progpoint.inst() >= iix {
+                                        std::cmp::Ordering::Greater
+                                    } else {
+                                        std::cmp::Ordering::Less
+                                    }
+                                })
+                                .unwrap_err();
+
+                            for (_, alloc) in regalloc.safepoint_slots[safepoint_slots_start..]
+                                .iter()
+                                .take_while(|(progpoint, _)| progpoint.inst() == iix)
+                            {
+                                let slot = alloc.as_stack().unwrap();
+                                safepoint_slots.push(slot);
+                            }
+                            if !safepoint_slots.is_empty() {
+                                let stack_map = self
+                                    .abi
+                                    .spillslots_to_stack_map(&safepoint_slots[..], &state);
+                                state.pre_safepoint(stack_map);
+                            }
+                        }
+
+                        // Get the allocations for this inst from the regalloc result.
+                        let allocs = regalloc.inst_allocs(iix);
+
+                        // If the instruction we are about to emit is
+                        // a return, place an epilogue at this point
+                        // (and don't emit the return; the actual
+                        // epilogue will contain it).
+                        if self.insts[iix.index()].is_term() == MachTerminator::Ret {
+                            for inst in self.abi.gen_epilogue() {
+                                do_emit(&inst, &[], &mut disasm, &mut buffer, &mut state);
+                            }
                         } else {
-                            break;
+                            // Emit the instruction!
+                            do_emit(
+                                &self.insts[iix.index()],
+                                allocs,
+                                &mut disasm,
+                                &mut buffer,
+                                &mut state,
+                            );
+                        }
+                    }
+
+                    InstOrEdit::Edit(Edit::Move { from, to }) => {
+                        // Create a move/spill/reload instruction and
+                        // immediately emit it.
+                        match (from.as_reg(), to.as_reg()) {
+                            (Some(from), Some(to)) => {
+                                // Reg-to-reg move.
+                                let from_rreg = Reg::from(from);
+                                let to_rreg = Writable::from_reg(Reg::from(to));
+                                debug_assert_eq!(from.class(), to.class());
+                                let ty = I::canonical_type_for_rc(from.class());
+                                let mv = I::gen_move(to_rreg, from_rreg, ty);
+                                do_emit(&mv, &[], &mut disasm, &mut buffer, &mut state);
+                            }
+                            (Some(from), None) => {
+                                // Spill from register to spillslot.
+                                let to = to.as_stack().unwrap();
+                                let from_rreg = RealReg::from(from);
+                                debug_assert_eq!(from.class(), to.class());
+                                let spill = self.abi.gen_spill(to, from_rreg);
+                                do_emit(&spill, &[], &mut disasm, &mut buffer, &mut state);
+                            }
+                            (None, Some(to)) => {
+                                // Load from spillslot to register.
+                                let from = from.as_stack().unwrap();
+                                let to_rreg = Writable::from_reg(RealReg::from(to));
+                                debug_assert_eq!(from.class(), to.class());
+                                let reload = self.abi.gen_reload(to_rreg, from);
+                                do_emit(&reload, &[], &mut disasm, &mut buffer, &mut state);
+                            }
+                            (None, None) => {
+                                panic!("regalloc2 should have eliminated stack-to-stack moves!");
+                            }
                         }
                     }
-                    inst_end_offsets[iix as usize] = buffer.cur_offset();
                 }
             }
 
@@ -583,13 +1009,14 @@ impl<I: VCodeInst> VCode<I> {
                 cur_srcloc = None;
             }
 
-            // Do we need an island? Get the worst-case size of the next BB and see if, having
-            // emitted that many bytes, we will be beyond the deadline.
-            if block < (self.num_blocks() - 1) as BlockIndex {
-                let next_block = block + 1;
-                let next_block_range = self.block_ranges[next_block as usize];
-                let next_block_size = next_block_range.1 - next_block_range.0;
-                let worst_case_next_bb = I::worst_case_size() * next_block_size;
+            // Do we need an island? Get the worst-case size of the
+            // next BB and see if, having emitted that many bytes, we
+            // will be beyond the deadline.
+            if block.index() < (self.num_blocks() - 1) {
+                let next_block = block.index() + 1;
+                let next_block_range = self.block_ranges[next_block];
+                let next_block_size = next_block_range.1.index() - next_block_range.0.index();
+                let worst_case_next_bb = I::worst_case_size() * next_block_size as u32;
                 if buffer.island_needed(worst_case_next_bb) {
                     buffer.emit_island(worst_case_next_bb);
                 }
@@ -602,25 +1029,12 @@ impl<I: VCodeInst> VCode<I> {
             buffer.defer_constant(label, data.alignment(), data.as_slice(), u32::max_value());
         }
 
-        if self.generate_debug_info {
-            for end in inst_end_offsets.iter_mut().rev() {
-                if *end > buffer.cur_offset() {
-                    *end = buffer.cur_offset();
-                } else {
-                    break;
-                }
-            }
-            *self.insts_layout.borrow_mut() = InstsLayoutInfo {
-                inst_end_offsets,
-                label_inst_indices,
-                start_of_cold_code,
-            };
-        }
+        let func_body_len = buffer.cur_offset();
 
         // Create `bb_edges` and final (filtered) `bb_starts`.
-        let mut final_bb_starts = vec![];
         let mut bb_edges = vec![];
-        if cfg_metadata {
+        let mut bb_offsets = vec![];
+        if want_metadata {
             for block in 0..self.num_blocks() {
                 if bb_starts[block].is_none() {
                     // Block was deleted by MachBuffer; skip.
@@ -628,157 +1042,248 @@ impl<I: VCodeInst> VCode<I> {
                 }
                 let from = bb_starts[block].unwrap();
 
-                final_bb_starts.push(from);
+                bb_offsets.push(from);
                 // Resolve each `succ` label and add edges.
-                let succs = self.block_succs(BlockIx::new(block as u32));
-                for succ in succs.iter() {
-                    let to = buffer.resolve_label_offset(MachLabel::from_block(succ.get()));
+                let succs = self.block_succs(BlockIndex::new(block));
+                for &succ in succs.iter() {
+                    let to = buffer.resolve_label_offset(MachLabel::from_block(succ));
                     bb_edges.push((from, to));
                 }
             }
         }
 
-        (buffer, final_bb_starts, bb_edges)
+        let value_labels_ranges =
+            self.compute_value_labels_ranges(regalloc, &inst_offsets[..], func_body_len);
+        let frame_size = self.abi.frame_size();
+
+        EmitResult {
+            buffer,
+            bb_offsets,
+            bb_edges,
+            inst_offsets,
+            func_body_len,
+            disasm: if want_disasm { Some(disasm) } else { None },
+            stackslot_offsets: self.abi.stackslot_offsets().clone(),
+            value_labels_ranges,
+            frame_size,
+        }
     }
 
-    /// Generates value-label ranges.
-    pub fn value_labels_ranges(&self) -> ValueLabelsRanges {
-        if !self.has_value_labels {
+    fn compute_value_labels_ranges(
+        &self,
+        regalloc: &regalloc2::Output,
+        inst_offsets: &[CodeOffset],
+        func_body_len: u32,
+    ) -> ValueLabelsRanges {
+        if self.debug_value_labels.is_empty() {
             return ValueLabelsRanges::default();
         }
 
-        let layout_info = &self.insts_layout.borrow();
-        debug::compute(&self.insts, &*layout_info)
-    }
+        let mut value_labels_ranges: ValueLabelsRanges = HashMap::new();
+        for &(label, from, to, alloc) in &regalloc.debug_locations {
+            let ranges = value_labels_ranges
+                .entry(ValueLabel::from_u32(label))
+                .or_insert_with(|| vec![]);
+            let from_offset = inst_offsets[from.inst().index()];
+            let to_offset = if to.inst().index() == inst_offsets.len() {
+                func_body_len
+            } else {
+                inst_offsets[to.inst().index()]
+            };
 
-    /// Get the offsets of stackslots.
-    pub fn stackslot_offsets(&self) -> &PrimaryMap<StackSlot, u32> {
-        self.abi.stackslot_offsets()
+            // Empty range or to-offset of zero can happen because of
+            // cold blocks (see above).
+            if to_offset == 0 || from_offset == to_offset {
+                continue;
+            }
+
+            let loc = if let Some(preg) = alloc.as_reg() {
+                LabelValueLoc::Reg(Reg::from(preg))
+            } else {
+                // We can't translate spillslot locations at the
+                // moment because ValueLabelLoc requires an
+                // instantaneous SP offset, and this can *change*
+                // within the range we have here because of callsites
+                // adjusting SP temporarily. To avoid the complexity
+                // of accurately plumbing through nominal-SP
+                // adjustment sites, we just omit debug info for
+                // values that are spilled. Not ideal, but debug info
+                // is best-effort.
+                continue;
+            };
+
+            ranges.push(ValueLocRange {
+                loc,
+                // ValueLocRanges are recorded by *instruction-end
+                // offset*. `from_offset` is the *start* of the
+                // instruction; that is the same as the end of another
+                // instruction, so we only want to begin coverage once
+                // we are past the previous instruction's end.
+                start: from_offset + 1,
+                // Likewise, `end` is exclusive, but we want to
+                // *include* the end of the last
+                // instruction. `to_offset` is the start of the
+                // `to`-instruction, which is the exclusive end, i.e.,
+                // the first instruction not covered. That
+                // instruction's start is the same as the end of the
+                // last instruction that is included, so we go one
+                // byte further to be sure to include it.
+                end: to_offset + 1,
+            });
+        }
+
+        value_labels_ranges
     }
 
     /// Get the IR block for a BlockIndex, if one exists.
     pub fn bindex_to_bb(&self, block: BlockIndex) -> Option<ir::Block> {
-        self.block_order.lowered_order()[block as usize].orig_block()
+        self.block_order.lowered_order()[block.index()].orig_block()
     }
 }
 
 impl<I: VCodeInst> RegallocFunction for VCode<I> {
-    type Inst = I;
-
-    fn insns(&self) -> &[I] {
-        &self.insts[..]
+    fn num_insts(&self) -> usize {
+        self.insts.len()
     }
 
-    fn insns_mut(&mut self) -> &mut [I] {
-        &mut self.insts[..]
+    fn num_blocks(&self) -> usize {
+        self.block_ranges.len()
     }
 
-    fn get_insn(&self, insn: InstIx) -> &I {
-        &self.insts[insn.get() as usize]
+    fn entry_block(&self) -> BlockIndex {
+        self.entry
     }
 
-    fn get_insn_mut(&mut self, insn: InstIx) -> &mut I {
-        &mut self.insts[insn.get() as usize]
+    fn block_insns(&self, block: BlockIndex) -> InstRange {
+        let (start, end) = self.block_ranges[block.index()];
+        InstRange::forward(start, end)
     }
 
-    fn blocks(&self) -> Range<BlockIx> {
-        Range::new(BlockIx::new(0), self.block_ranges.len())
+    fn block_succs(&self, block: BlockIndex) -> &[BlockIndex] {
+        let (start, end) = self.block_succ_range[block.index()];
+        &self.block_succs_preds[start as usize..end as usize]
     }
 
-    fn entry_block(&self) -> BlockIx {
-        BlockIx::new(self.entry)
+    fn block_preds(&self, block: BlockIndex) -> &[BlockIndex] {
+        let (start, end) = self.block_pred_range[block.index()];
+        &self.block_succs_preds[start as usize..end as usize]
     }
 
-    fn block_insns(&self, block: BlockIx) -> Range<InstIx> {
-        let (start, end) = self.block_ranges[block.get() as usize];
-        Range::new(InstIx::new(start), (end - start) as usize)
+    fn block_params(&self, block: BlockIndex) -> &[VReg] {
+        let (start, end) = self.block_params_range[block.index()];
+        &self.block_params[start as usize..end as usize]
     }
 
-    fn block_succs(&self, block: BlockIx) -> Cow<[BlockIx]> {
-        let (start, end) = self.block_succ_range[block.get() as usize];
-        Cow::Borrowed(&self.block_succs[start..end])
+    fn branch_blockparams(&self, block: BlockIndex, _insn: InsnIndex, succ_idx: usize) -> &[VReg] {
+        let (succ_range_start, succ_range_end) = self.branch_block_arg_succ_range[block.index()];
+        let succ_ranges =
+            &self.branch_block_arg_range[succ_range_start as usize..succ_range_end as usize];
+        let (branch_block_args_start, branch_block_args_end) = succ_ranges[succ_idx];
+        &self.branch_block_args[branch_block_args_start as usize..branch_block_args_end as usize]
     }
 
-    fn is_ret(&self, insn: InstIx) -> bool {
-        match self.insts[insn.get() as usize].is_term() {
+    fn is_ret(&self, insn: InsnIndex) -> bool {
+        match self.insts[insn.index()].is_term() {
             MachTerminator::Ret => true,
             _ => false,
         }
     }
 
-    fn is_included_in_clobbers(&self, insn: &I) -> bool {
-        insn.is_included_in_clobbers()
+    fn is_branch(&self, insn: InsnIndex) -> bool {
+        match self.insts[insn.index()].is_term() {
+            MachTerminator::Cond(..)
+            | MachTerminator::Uncond(..)
+            | MachTerminator::Indirect(..) => true,
+            _ => false,
+        }
     }
 
-    fn get_regs(insn: &I, collector: &mut RegUsageCollector) {
-        insn.get_regs(collector)
+    fn requires_refs_on_stack(&self, insn: InsnIndex) -> bool {
+        self.insts[insn.index()].is_safepoint()
     }
 
-    fn map_regs<RUM: RegUsageMapper>(insn: &mut I, mapper: &RUM) {
-        insn.map_regs(mapper);
+    fn is_move(&self, insn: InsnIndex) -> Option<(Operand, Operand)> {
+        self.is_move.get(&insn).cloned()
     }
 
-    fn is_move(&self, insn: &I) -> Option<(Writable<Reg>, Reg)> {
-        insn.is_move()
+    fn inst_operands(&self, insn: InsnIndex) -> &[Operand] {
+        let (start, end) = self.operand_ranges[insn.index()];
+        &self.operands[start as usize..end as usize]
     }
 
-    fn get_num_vregs(&self) -> usize {
-        self.vreg_types.len()
+    fn inst_clobbers(&self, insn: InsnIndex) -> &[PReg] {
+        if let Some(&(start, end)) = self.clobber_ranges.get(&insn) {
+            &self.clobbers[start as usize..end as usize]
+        } else {
+            &[]
+        }
     }
 
-    fn get_spillslot_size(&self, regclass: RegClass, _: VirtualReg) -> u32 {
-        self.abi.get_spillslot_size(regclass)
+    fn num_vregs(&self) -> usize {
+        std::cmp::max(self.vreg_types.len(), first_user_vreg_index())
     }
 
-    fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, _: Option<VirtualReg>) -> I {
-        self.abi.gen_spill(to_slot, from_reg)
+    fn reftype_vregs(&self) -> &[VReg] {
+        &self.reftyped_vregs[..]
     }
 
-    fn gen_reload(
-        &self,
-        to_reg: Writable<RealReg>,
-        from_slot: SpillSlot,
-        _: Option<VirtualReg>,
-    ) -> I {
-        self.abi.gen_reload(to_reg, from_slot)
+    fn debug_value_labels(&self) -> &[(VReg, InsnIndex, InsnIndex, u32)] {
+        &self.debug_value_labels[..]
     }
 
-    fn gen_move(&self, to_reg: Writable<RealReg>, from_reg: RealReg, vreg: VirtualReg) -> I {
-        let ty = self.vreg_type(vreg);
-        I::gen_move(to_reg.map(|r| r.to_reg()), from_reg.to_reg(), ty)
+    fn is_pinned_vreg(&self, vreg: VReg) -> Option<PReg> {
+        pinned_vreg_to_preg(vreg)
     }
 
-    fn gen_zero_len_nop(&self) -> I {
-        I::gen_nop(0)
+    fn spillslot_size(&self, regclass: RegClass) -> usize {
+        self.abi.get_spillslot_size(regclass) as usize
     }
 
-    fn maybe_direct_reload(&self, insn: &I, reg: VirtualReg, slot: SpillSlot) -> Option<I> {
-        insn.maybe_direct_reload(reg, slot)
-    }
-
-    fn func_liveins(&self) -> RegallocSet<RealReg> {
-        self.liveins.clone()
-    }
-
-    fn func_liveouts(&self) -> RegallocSet<RealReg> {
-        self.liveouts.clone()
+    fn allow_multiple_vreg_defs(&self) -> bool {
+        // At least the s390x backend requires this, because the
+        // `Loop` pseudo-instruction aggregates all Operands so pinned
+        // vregs (RealRegs) may occur more than once.
+        true
     }
 }
 
 impl<I: VCodeInst> fmt::Debug for VCode<I> {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        writeln!(f, "VCode_Debug {{")?;
-        writeln!(f, "  Entry block: {}", self.entry)?;
+        writeln!(f, "VCode {{")?;
+        writeln!(f, "  Entry block: {}", self.entry.index())?;
+
+        let mut state = Default::default();
+
+        let mut alias_keys = self.vreg_aliases.keys().cloned().collect::<Vec<_>>();
+        alias_keys.sort_unstable();
+        for key in alias_keys {
+            let dest = self.vreg_aliases.get(&key).unwrap();
+            writeln!(f, "  {:?} := {:?}", Reg::from(key), Reg::from(*dest))?;
+        }
 
         for block in 0..self.num_blocks() {
-            writeln!(f, "Block {}:", block,)?;
-            for succ in self.succs(block as BlockIndex) {
-                writeln!(f, "  (successor: Block {})", succ.get())?;
+            let block = BlockIndex::new(block);
+            writeln!(f, "Block {}:", block.index())?;
+            if let Some(bb) = self.bindex_to_bb(block) {
+                writeln!(f, "    (original IR block: {})", bb)?;
             }
-            let (start, end) = self.block_ranges[block];
-            writeln!(f, "  (instruction range: {} .. {})", start, end)?;
-            for inst in start..end {
-                writeln!(f, "  Inst {}: {:?}", inst, self.insts[inst as usize])?;
+            for succ in self.succs(block) {
+                writeln!(f, "    (successor: Block {})", succ.index())?;
+            }
+            let (start, end) = self.block_ranges[block.index()];
+            writeln!(
+                f,
+                "    (instruction range: {} .. {})",
+                start.index(),
+                end.index()
+            )?;
+            for inst in start.index()..end.index() {
+                writeln!(
+                    f,
+                    "  Inst {}: {}",
+                    inst,
+                    self.insts[inst].pretty_print_inst(&[], &mut state)
+                )?;
             }
         }
 
@@ -787,57 +1292,6 @@ impl<I: VCodeInst> fmt::Debug for VCode<I> {
     }
 }
 
-/// Pretty-printing with `RealRegUniverse` context.
-impl<I: VCodeInst> PrettyPrint for VCode<I> {
-    fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
-        use std::fmt::Write;
-
-        let mut s = String::new();
-        write!(&mut s, "VCode_ShowWithRRU {{{{\n").unwrap();
-        write!(&mut s, "  Entry block: {}\n", self.entry).unwrap();
-
-        let mut state = Default::default();
-        let mut safepoint_idx = 0;
-        for i in 0..self.num_blocks() {
-            let block = i as BlockIndex;
-
-            write!(&mut s, "Block {}:\n", block).unwrap();
-            if let Some(bb) = self.bindex_to_bb(block) {
-                write!(&mut s, "  (original IR block: {})\n", bb).unwrap();
-            }
-            for succ in self.succs(block) {
-                write!(&mut s, "  (successor: Block {})\n", succ.get()).unwrap();
-            }
-            let (start, end) = self.block_ranges[block as usize];
-            write!(&mut s, "  (instruction range: {} .. {})\n", start, end).unwrap();
-            for inst in start..end {
-                if safepoint_idx < self.safepoint_insns.len()
-                    && self.safepoint_insns[safepoint_idx] == inst
-                {
-                    write!(
-                        &mut s,
-                        "      (safepoint: slots {:?} with EmitState {:?})\n",
-                        self.safepoint_slots[safepoint_idx], state,
-                    )
-                    .unwrap();
-                    safepoint_idx += 1;
-                }
-                write!(
-                    &mut s,
-                    "  Inst {}:   {}\n",
-                    inst,
-                    self.insts[inst as usize].pretty_print(mb_rru, &mut state)
-                )
-                .unwrap();
-            }
-        }
-
-        write!(&mut s, "}}}}\n").unwrap();
-
-        s
-    }
-}
-
 /// This structure tracks the large constants used in VCode that will be emitted separately by the
 /// [MachBuffer].
 ///
diff --git a/cranelift/codegen/src/prelude.isle b/cranelift/codegen/src/prelude.isle
index 044e24930f..9922130762 100644
--- a/cranelift/codegen/src/prelude.isle
+++ b/cranelift/codegen/src/prelude.isle
@@ -397,9 +397,6 @@
 (decl emit (MInst) Unit)
 (extern constructor emit emit)
 
-(decl emit_safepoint (MInst) Unit)
-(extern constructor emit_safepoint emit_safepoint)
-
 ;;;; Helpers for Side-Effectful Instructions Without Results ;;;;;;;;;;;;;;;;;;;
 
 (type SideEffectNoResult (enum (Inst (inst MInst))))
@@ -411,12 +408,6 @@
       (let ((_ Unit (emit inst)))
         (output_none)))
 
-;; Similarly, but emit the side-effectful instruction as a safepoint.
-(decl safepoint (SideEffectNoResult) InstOutput)
-(rule (safepoint (SideEffectNoResult.Inst inst))
-      (let ((_ Unit (emit_safepoint inst)))
-        (output_none)))
-
 ;;;; Helpers for Working with Flags ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 ;; Newtype wrapper around `MInst` for instructions that are used for their
diff --git a/cranelift/codegen/src/result.rs b/cranelift/codegen/src/result.rs
index 3178cd5ba9..d8b7320c54 100644
--- a/cranelift/codegen/src/result.rs
+++ b/cranelift/codegen/src/result.rs
@@ -1,12 +1,14 @@
 //! Result and error types representing the outcome of compiling a function.
 
+use regalloc2::checker::CheckerErrors;
+
 use crate::verifier::VerifierErrors;
 use std::string::String;
 
 /// A compilation error.
 ///
 /// When Cranelift fails to compile a function, it will return one of these error codes.
-#[derive(Debug, PartialEq, Eq)]
+#[derive(Debug)]
 pub enum CodegenError {
     /// A list of IR verifier errors.
     ///
@@ -36,6 +38,9 @@ pub enum CodegenError {
     /// A failure to map Cranelift register representation to a DWARF register representation.
     #[cfg(feature = "unwind")]
     RegisterMappingError(crate::isa::unwind::systemv::RegisterMappingError),
+
+    /// Register allocator internal error discovered by the symbolic checker.
+    Regalloc(CheckerErrors),
 }
 
 /// A convenient alias for a `Result` that uses `CodegenError` as the error type.
@@ -52,6 +57,7 @@ impl std::error::Error for CodegenError {
             | CodegenError::Unsupported { .. } => None,
             #[cfg(feature = "unwind")]
             CodegenError::RegisterMappingError { .. } => None,
+            CodegenError::Regalloc(..) => None,
         }
     }
 }
@@ -65,6 +71,7 @@ impl std::fmt::Display for CodegenError {
             CodegenError::Unsupported(feature) => write!(f, "Unsupported feature: {}", feature),
             #[cfg(feature = "unwind")]
             CodegenError::RegisterMappingError(_0) => write!(f, "Register mapping error"),
+            CodegenError::Regalloc(errors) => write!(f, "Regalloc validation errors: {:?}", errors),
         }
     }
 }
diff --git a/cranelift/codegen/src/settings.rs b/cranelift/codegen/src/settings.rs
index 1842e543fe..5a9f27c746 100644
--- a/cranelift/codegen/src/settings.rs
+++ b/cranelift/codegen/src/settings.rs
@@ -510,7 +510,6 @@ mod tests {
         assert_eq!(
             f.to_string(),
             r#"[shared]
-regalloc = "backtracking"
 opt_level = "none"
 tls_model = "none"
 libcall_call_conv = "isa_default"
diff --git a/cranelift/codegen/src/value_label.rs b/cranelift/codegen/src/value_label.rs
index 459fa62d72..ab5276909a 100644
--- a/cranelift/codegen/src/value_label.rs
+++ b/cranelift/codegen/src/value_label.rs
@@ -1,10 +1,10 @@
 use crate::ir::{SourceLoc, ValueLabel};
+use crate::machinst::Reg;
 use crate::HashMap;
 use alloc::vec::Vec;
 use core::cmp::Ordering;
 use core::convert::From;
 use core::ops::Deref;
-use regalloc::Reg;
 
 #[cfg(feature = "enable-serde")]
 use serde::{Deserialize, Serialize};
diff --git a/cranelift/filetests/filetests/isa/aarch64/amodes.clif b/cranelift/filetests/filetests/isa/aarch64/amodes.clif
index bc6636429a..c3254cc946 100644
--- a/cranelift/filetests/filetests/isa/aarch64/amodes.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/amodes.clif
@@ -10,14 +10,9 @@ block0(v0: i64, v1: i32):
   return v4
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   ldr w0, [x0, w1, SXTW]
-;   Inst 1:   ret
-; }}
+; block0:
+;   ldr w0, [x0, w1, SXTW]
+;   ret
 
 function %f6(i64, i32) -> i32 {
 block0(v0: i64, v1: i32):
@@ -27,14 +22,9 @@ block0(v0: i64, v1: i32):
   return v4
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   ldr w0, [x0, w1, SXTW]
-;   Inst 1:   ret
-; }}
+; block0:
+;   ldr w0, [x0, w1, SXTW]
+;   ret
 
 function %f7(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
@@ -45,15 +35,10 @@ block0(v0: i32, v1: i32):
   return v5
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   mov w0, w0
-;   Inst 1:   ldr w0, [x0, w1, UXTW]
-;   Inst 2:   ret
-; }}
+; block0:
+;   mov w6, w0
+;   ldr w0, [x6, w1, UXTW]
+;   ret
 
 function %f8(i64, i32) -> i32 {
 block0(v0: i64, v1: i32):
@@ -66,17 +51,12 @@ block0(v0: i64, v1: i32):
   return v7
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 5)
-;   Inst 0:   add x2, x0, #68
-;   Inst 1:   add x0, x2, x0
-;   Inst 2:   add x0, x0, x1, SXTW
-;   Inst 3:   ldr w0, [x0, w1, SXTW]
-;   Inst 4:   ret
-; }}
+; block0:
+;   add x6, x0, #68
+;   add x6, x6, x0
+;   add x6, x6, x1, SXTW
+;   ldr w0, [x6, w1, SXTW]
+;   ret
 
 function %f9(i64, i64, i64) -> i32 {
 block0(v0: i64, v1: i64, v2: i64):
@@ -88,16 +68,11 @@ block0(v0: i64, v1: i64, v2: i64):
   return v7
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 4)
-;   Inst 0:   add x0, x0, x2
-;   Inst 1:   add x0, x0, x1
-;   Inst 2:   ldur w0, [x0, #48]
-;   Inst 3:   ret
-; }}
+; block0:
+;   add x0, x0, x2
+;   add x0, x0, x1
+;   ldr w0, [x0, #48]
+;   ret
 
 function %f10(i64, i64, i64) -> i32 {
 block0(v0: i64, v1: i64, v2: i64):
@@ -109,17 +84,12 @@ block0(v0: i64, v1: i64, v2: i64):
   return v7
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 5)
-;   Inst 0:   movz x3, #4100
-;   Inst 1:   add x1, x3, x1
-;   Inst 2:   add x1, x1, x2
-;   Inst 3:   ldr w0, [x1, x0]
-;   Inst 4:   ret
-; }}
+; block0:
+;   movz x8, #4100
+;   add x8, x8, x1
+;   add x8, x8, x2
+;   ldr w0, [x8, x0]
+;   ret
 
 function %f10() -> i32 {
 block0:
@@ -128,15 +98,10 @@ block0:
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   movz x0, #1234
-;   Inst 1:   ldr w0, [x0]
-;   Inst 2:   ret
-; }}
+; block0:
+;   movz x2, #1234
+;   ldr w0, [x2]
+;   ret
 
 function %f11(i64) -> i32 {
 block0(v0: i64):
@@ -146,15 +111,10 @@ block0(v0: i64):
   return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   add x0, x0, #8388608
-;   Inst 1:   ldr w0, [x0]
-;   Inst 2:   ret
-; }}
+; block0:
+;   add x4, x0, #8388608
+;   ldr w0, [x4]
+;   ret
 
 function %f12(i64) -> i32 {
 block0(v0: i64):
@@ -164,15 +124,10 @@ block0(v0: i64):
   return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   sub x0, x0, #4
-;   Inst 1:   ldr w0, [x0]
-;   Inst 2:   ret
-; }}
+; block0:
+;   sub x4, x0, #4
+;   ldr w0, [x4]
+;   ret
 
 function %f13(i64) -> i32 {
 block0(v0: i64):
@@ -182,17 +137,12 @@ block0(v0: i64):
   return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 5)
-;   Inst 0:   movz w1, #51712
-;   Inst 1:   movk w1, #15258, LSL #16
-;   Inst 2:   add x0, x1, x0
-;   Inst 3:   ldr w0, [x0]
-;   Inst 4:   ret
-; }}
+; block0:
+;   movz w4, #51712
+;   movk w4, #15258, LSL #16
+;   add x4, x4, x0
+;   ldr w0, [x4]
+;   ret
 
 function %f14(i32) -> i32 {
 block0(v0: i32):
@@ -201,15 +151,10 @@ block0(v0: i32):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   sxtw x0, w0
-;   Inst 1:   ldr w0, [x0]
-;   Inst 2:   ret
-; }}
+; block0:
+;   sxtw x4, w0
+;   ldr w0, [x4]
+;   ret
 
 function %f15(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
@@ -220,15 +165,10 @@ block0(v0: i32, v1: i32):
   return v5
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   sxtw x0, w0
-;   Inst 1:   ldr w0, [x0, w1, SXTW]
-;   Inst 2:   ret
-; }}
+; block0:
+;   sxtw x6, w0
+;   ldr w0, [x6, w1, SXTW]
+;   ret
 
 function %f18(i64, i64, i64) -> i32 {
 block0(v0: i64, v1: i64, v2: i64):
@@ -238,15 +178,10 @@ block0(v0: i64, v1: i64, v2: i64):
   return v5
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   movn w0, #4097
-;   Inst 1:   ldrsh x0, [x0]
-;   Inst 2:   ret
-; }}
+; block0:
+;   movn w8, #4097
+;   ldrsh x0, [x8]
+;   ret
 
 function %f19(i64, i64, i64) -> i32 {
 block0(v0: i64, v1: i64, v2: i64):
@@ -256,15 +191,10 @@ block0(v0: i64, v1: i64, v2: i64):
   return v5
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   movz x0, #4098
-;   Inst 1:   ldrsh x0, [x0]
-;   Inst 2:   ret
-; }}
+; block0:
+;   movz x8, #4098
+;   ldrsh x0, [x8]
+;   ret
 
 function %f20(i64, i64, i64) -> i32 {
 block0(v0: i64, v1: i64, v2: i64):
@@ -274,16 +204,11 @@ block0(v0: i64, v1: i64, v2: i64):
   return v5
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 4)
-;   Inst 0:   movn w0, #4097
-;   Inst 1:   sxtw x0, w0
-;   Inst 2:   ldrsh x0, [x0]
-;   Inst 3:   ret
-; }}
+; block0:
+;   movn w8, #4097
+;   sxtw x10, w8
+;   ldrsh x0, [x10]
+;   ret
 
 function %f21(i64, i64, i64) -> i32 {
 block0(v0: i64, v1: i64, v2: i64):
@@ -293,16 +218,11 @@ block0(v0: i64, v1: i64, v2: i64):
   return v5
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 4)
-;   Inst 0:   movz x0, #4098
-;   Inst 1:   sxtw x0, w0
-;   Inst 2:   ldrsh x0, [x0]
-;   Inst 3:   ret
-; }}
+; block0:
+;   movz x8, #4098
+;   sxtw x10, w8
+;   ldrsh x0, [x10]
+;   ret
 
 function %i128(i64) -> i128 {
 block0(v0: i64):
@@ -311,17 +231,13 @@ block0(v0: i64):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 5)
-;   Inst 0:   mov x1, x0
-;   Inst 1:   ldp x2, x1, [x1]
-;   Inst 2:   stp x2, x1, [x0]
-;   Inst 3:   mov x0, x2
-;   Inst 4:   ret
-; }}
+; block0:
+;   mov x8, x0
+;   ldp x3, x1, [x8]
+;   mov x11, x3
+;   stp x11, x1, [x0]
+;   mov x0, x3
+;   ret
 
 function %i128_imm_offset(i64) -> i128 {
 block0(v0: i64):
@@ -330,17 +246,13 @@ block0(v0: i64):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 5)
-;   Inst 0:   mov x1, x0
-;   Inst 1:   ldp x2, x1, [x1, #16]
-;   Inst 2:   stp x2, x1, [x0, #16]
-;   Inst 3:   mov x0, x2
-;   Inst 4:   ret
-; }}
+; block0:
+;   mov x8, x0
+;   ldp x3, x1, [x8, #16]
+;   mov x11, x3
+;   stp x11, x1, [x0, #16]
+;   mov x0, x3
+;   ret
 
 function %i128_imm_offset_large(i64) -> i128 {
 block0(v0: i64):
@@ -349,17 +261,13 @@ block0(v0: i64):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 5)
-;   Inst 0:   mov x1, x0
-;   Inst 1:   ldp x2, x1, [x1, #504]
-;   Inst 2:   stp x2, x1, [x0, #504]
-;   Inst 3:   mov x0, x2
-;   Inst 4:   ret
-; }}
+; block0:
+;   mov x8, x0
+;   ldp x3, x1, [x8, #504]
+;   mov x11, x3
+;   stp x11, x1, [x0, #504]
+;   mov x0, x3
+;   ret
 
 function %i128_imm_offset_negative_large(i64) -> i128 {
 block0(v0: i64):
@@ -368,17 +276,13 @@ block0(v0: i64):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 5)
-;   Inst 0:   mov x1, x0
-;   Inst 1:   ldp x2, x1, [x1, #-512]
-;   Inst 2:   stp x2, x1, [x0, #-512]
-;   Inst 3:   mov x0, x2
-;   Inst 4:   ret
-; }}
+; block0:
+;   mov x8, x0
+;   ldp x3, x1, [x8, #-512]
+;   mov x11, x3
+;   stp x11, x1, [x0, #-512]
+;   mov x0, x3
+;   ret
 
 function %i128_add_offset(i64) -> i128 {
 block0(v0: i64):
@@ -388,17 +292,13 @@ block0(v0: i64):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 5)
-;   Inst 0:   mov x1, x0
-;   Inst 1:   ldp x2, x1, [x1, #32]
-;   Inst 2:   stp x2, x1, [x0, #32]
-;   Inst 3:   mov x0, x2
-;   Inst 4:   ret
-; }}
+; block0:
+;   mov x8, x0
+;   ldp x3, x1, [x8, #32]
+;   mov x11, x3
+;   stp x11, x1, [x0, #32]
+;   mov x0, x3
+;   ret
 
 function %i128_32bit_sextend_simple(i32) -> i128 {
 block0(v0: i32):
@@ -408,18 +308,13 @@ block0(v0: i32):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 6)
-;   Inst 0:   sxtw x1, w0
-;   Inst 1:   ldp x2, x1, [x1]
-;   Inst 2:   sxtw x0, w0
-;   Inst 3:   stp x2, x1, [x0]
-;   Inst 4:   mov x0, x2
-;   Inst 5:   ret
-; }}
+; block0:
+;   sxtw x8, w0
+;   ldp x4, x1, [x8]
+;   sxtw x9, w0
+;   mov x0, x4
+;   stp x0, x1, [x9]
+;   ret
 
 function %i128_32bit_sextend(i64, i32) -> i128 {
 block0(v0: i64, v1: i32):
@@ -431,18 +326,14 @@ block0(v0: i64, v1: i32):
   return v5
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 8)
-;   Inst 0:   mov x2, x0
-;   Inst 1:   add x2, x2, x1, SXTW
-;   Inst 2:   ldp x3, x2, [x2, #24]
-;   Inst 3:   add x0, x0, x1, SXTW
-;   Inst 4:   stp x3, x2, [x0, #24]
-;   Inst 5:   mov x0, x3
-;   Inst 6:   mov x1, x2
-;   Inst 7:   ret
-; }}
+; block0:
+;   mov x10, x0
+;   add x10, x10, x1, SXTW
+;   ldp x6, x7, [x10, #24]
+;   add x0, x0, x1, SXTW
+;   mov x15, x6
+;   mov x1, x7
+;   stp x15, x1, [x0, #24]
+;   mov x0, x6
+;   ret
 
diff --git a/cranelift/filetests/filetests/isa/aarch64/arithmetic.clif b/cranelift/filetests/filetests/isa/aarch64/arithmetic.clif
index 939f0e6390..1491b28604 100644
--- a/cranelift/filetests/filetests/isa/aarch64/arithmetic.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/arithmetic.clif
@@ -8,14 +8,9 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   add x0, x0, x1
-;   Inst 1:   ret
-; }}
+; block0:
+;   add x0, x0, x1
+;   ret
 
 function %f2(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -23,14 +18,9 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   sub x0, x0, x1
-;   Inst 1:   ret
-; }}
+; block0:
+;   sub x0, x0, x1
+;   ret
 
 function %f3(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -38,14 +28,9 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   madd x0, x0, x1, xzr
-;   Inst 1:   ret
-; }}
+; block0:
+;   madd x0, x0, x1, xzr
+;   ret
 
 function %f4(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -53,14 +38,9 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   umulh x0, x0, x1
-;   Inst 1:   ret
-; }}
+; block0:
+;   umulh x0, x0, x1
+;   ret
 
 function %f5(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -68,14 +48,9 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   smulh x0, x0, x1
-;   Inst 1:   ret
-; }}
+; block0:
+;   smulh x0, x0, x1
+;   ret
 
 function %f6(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -83,18 +58,13 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 6)
-;   Inst 0:   cbnz x1, 8 ; udf
-;   Inst 1:   adds xzr, x1, #1
-;   Inst 2:   ccmp x0, #1, #nzcv, eq
-;   Inst 3:   b.vc 8 ; udf
-;   Inst 4:   sdiv x0, x0, x1
-;   Inst 5:   ret
-; }}
+; block0:
+;   cbnz x1, 8 ; udf
+;   adds xzr, x1, #1
+;   ccmp x0, #1, #nzcv, eq
+;   b.vc 8 ; udf
+;   sdiv x0, x0, x1
+;   ret
 
 function %f7(i64) -> i64 {
 block0(v0: i64):
@@ -103,15 +73,10 @@ block0(v0: i64):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   orr x1, xzr, #2
-;   Inst 1:   sdiv x0, x0, x1
-;   Inst 2:   ret
-; }}
+; block0:
+;   orr x3, xzr, #2
+;   sdiv x0, x0, x3
+;   ret
 
 function %f8(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -119,15 +84,10 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   cbnz x1, 8 ; udf
-;   Inst 1:   udiv x0, x0, x1
-;   Inst 2:   ret
-; }}
+; block0:
+;   cbnz x1, 8 ; udf
+;   udiv x0, x0, x1
+;   ret
 
 function %f9(i64) -> i64 {
 block0(v0: i64):
@@ -136,15 +96,10 @@ block0(v0: i64):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   orr x1, xzr, #2
-;   Inst 1:   udiv x0, x0, x1
-;   Inst 2:   ret
-; }}
+; block0:
+;   orr x3, xzr, #2
+;   udiv x0, x0, x3
+;   ret
 
 function %f10(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -152,16 +107,11 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 4)
-;   Inst 0:   cbnz x1, 8 ; udf
-;   Inst 1:   sdiv x2, x0, x1
-;   Inst 2:   msub x0, x2, x1, x0
-;   Inst 3:   ret
-; }}
+; block0:
+;   cbnz x1, 8 ; udf
+;   sdiv x6, x0, x1
+;   msub x0, x6, x1, x0
+;   ret
 
 function %f11(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -169,16 +119,11 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 4)
-;   Inst 0:   cbnz x1, 8 ; udf
-;   Inst 1:   udiv x2, x0, x1
-;   Inst 2:   msub x0, x2, x1, x0
-;   Inst 3:   ret
-; }}
+; block0:
+;   cbnz x1, 8 ; udf
+;   udiv x6, x0, x1
+;   msub x0, x6, x1, x0
+;   ret
 
 function %f12(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
@@ -186,20 +131,15 @@ block0(v0: i32, v1: i32):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 8)
-;   Inst 0:   sxtw x0, w0
-;   Inst 1:   sxtw x1, w1
-;   Inst 2:   cbnz x1, 8 ; udf
-;   Inst 3:   adds wzr, w1, #1
-;   Inst 4:   ccmp w0, #1, #nzcv, eq
-;   Inst 5:   b.vc 8 ; udf
-;   Inst 6:   sdiv x0, x0, x1
-;   Inst 7:   ret
-; }}
+; block0:
+;   sxtw x5, w0
+;   sxtw x7, w1
+;   cbnz x7, 8 ; udf
+;   adds wzr, w7, #1
+;   ccmp w5, #1, #nzcv, eq
+;   b.vc 8 ; udf
+;   sdiv x0, x5, x7
+;   ret
 
 function %f13(i32) -> i32 {
 block0(v0: i32):
@@ -208,16 +148,11 @@ block0(v0: i32):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 4)
-;   Inst 0:   sxtw x0, w0
-;   Inst 1:   orr x1, xzr, #2
-;   Inst 2:   sdiv x0, x0, x1
-;   Inst 3:   ret
-; }}
+; block0:
+;   sxtw x3, w0
+;   orr x5, xzr, #2
+;   sdiv x0, x3, x5
+;   ret
 
 function %f14(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
@@ -225,17 +160,12 @@ block0(v0: i32, v1: i32):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 5)
-;   Inst 0:   mov w0, w0
-;   Inst 1:   mov w1, w1
-;   Inst 2:   cbnz x1, 8 ; udf
-;   Inst 3:   udiv x0, x0, x1
-;   Inst 4:   ret
-; }}
+; block0:
+;   mov w5, w0
+;   mov w7, w1
+;   cbnz x7, 8 ; udf
+;   udiv x0, x5, x7
+;   ret
 
 function %f15(i32) -> i32 {
 block0(v0: i32):
@@ -244,16 +174,11 @@ block0(v0: i32):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 4)
-;   Inst 0:   mov w0, w0
-;   Inst 1:   orr x1, xzr, #2
-;   Inst 2:   udiv x0, x0, x1
-;   Inst 3:   ret
-; }}
+; block0:
+;   mov w3, w0
+;   orr x5, xzr, #2
+;   udiv x0, x3, x5
+;   ret
 
 function %f16(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
@@ -261,18 +186,13 @@ block0(v0: i32, v1: i32):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 6)
-;   Inst 0:   sxtw x0, w0
-;   Inst 1:   sxtw x1, w1
-;   Inst 2:   cbnz x1, 8 ; udf
-;   Inst 3:   sdiv x2, x0, x1
-;   Inst 4:   msub x0, x2, x1, x0
-;   Inst 5:   ret
-; }}
+; block0:
+;   sxtw x5, w0
+;   sxtw x7, w1
+;   cbnz x7, 8 ; udf
+;   sdiv x10, x5, x7
+;   msub x0, x10, x7, x5
+;   ret
 
 function %f17(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
@@ -280,18 +200,13 @@ block0(v0: i32, v1: i32):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 6)
-;   Inst 0:   mov w0, w0
-;   Inst 1:   mov w1, w1
-;   Inst 2:   cbnz x1, 8 ; udf
-;   Inst 3:   udiv x2, x0, x1
-;   Inst 4:   msub x0, x2, x1, x0
-;   Inst 5:   ret
-; }}
+; block0:
+;   mov w5, w0
+;   mov w7, w1
+;   cbnz x7, 8 ; udf
+;   udiv x10, x5, x7
+;   msub x0, x10, x7, x5
+;   ret
 
 function %f18(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -299,14 +214,9 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   and x0, x0, x1
-;   Inst 1:   ret
-; }}
+; block0:
+;   and x0, x0, x1
+;   ret
 
 function %f19(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -314,14 +224,9 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   orr x0, x0, x1
-;   Inst 1:   ret
-; }}
+; block0:
+;   orr x0, x0, x1
+;   ret
 
 function %f20(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -329,14 +234,9 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   eor x0, x0, x1
-;   Inst 1:   ret
-; }}
+; block0:
+;   eor x0, x0, x1
+;   ret
 
 function %f21(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -344,14 +244,9 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   bic x0, x0, x1
-;   Inst 1:   ret
-; }}
+; block0:
+;   bic x0, x0, x1
+;   ret
 
 function %f22(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -359,14 +254,9 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   orn x0, x0, x1
-;   Inst 1:   ret
-; }}
+; block0:
+;   orn x0, x0, x1
+;   ret
 
 function %f23(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -374,14 +264,9 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   eon x0, x0, x1
-;   Inst 1:   ret
-; }}
+; block0:
+;   eon x0, x0, x1
+;   ret
 
 function %f24(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -389,14 +274,9 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   orn x0, xzr, x0
-;   Inst 1:   ret
-; }}
+; block0:
+;   orn x0, xzr, x0
+;   ret
 
 function %f25(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
@@ -406,14 +286,9 @@ block0(v0: i32, v1: i32):
   return v4
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   sub w0, w1, w0, LSL 21
-;   Inst 1:   ret
-; }}
+; block0:
+;   sub w0, w1, w0, LSL 21
+;   ret
 
 function %f26(i32) -> i32 {
 block0(v0: i32):
@@ -422,14 +297,9 @@ block0(v0: i32):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   sub w0, w0, #1
-;   Inst 1:   ret
-; }}
+; block0:
+;   sub w0, w0, #1
+;   ret
 
 function %f27(i32) -> i32 {
 block0(v0: i32):
@@ -438,14 +308,9 @@ block0(v0: i32):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   add w0, w0, #1
-;   Inst 1:   ret
-; }}
+; block0:
+;   add w0, w0, #1
+;   ret
 
 function %f28(i64) -> i64 {
 block0(v0: i64):
@@ -454,14 +319,9 @@ block0(v0: i64):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   add x0, x0, #1
-;   Inst 1:   ret
-; }}
+; block0:
+;   add x0, x0, #1
+;   ret
 
 function %f29(i64) -> i64 {
 block0(v0: i64):
@@ -470,15 +330,10 @@ block0(v0: i64):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   movz x0, #1
-;   Inst 1:   sub x0, xzr, x0
-;   Inst 2:   ret
-; }}
+; block0:
+;   movz x3, #1
+;   sub x0, xzr, x3
+;   ret
 
 function %f30(i8x16) -> i8x16 {
 block0(v0: i8x16):
@@ -487,17 +342,12 @@ block0(v0: i8x16):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 5)
-;   Inst 0:   movz x0, #1
-;   Inst 1:   sub w0, wzr, w0
-;   Inst 2:   dup v1.16b, w0
-;   Inst 3:   ushl v0.16b, v0.16b, v1.16b
-;   Inst 4:   ret
-; }}
+; block0:
+;   movz x3, #1
+;   sub w5, wzr, w3
+;   dup v7.16b, w5
+;   ushl v0.16b, v0.16b, v7.16b
+;   ret
 
 function %add_i128(i128, i128) -> i128 {
 block0(v0: i128, v1: i128):
@@ -505,15 +355,10 @@ block0(v0: i128, v1: i128):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   adds x0, x0, x2
-;   Inst 1:   adc x1, x1, x3
-;   Inst 2:   ret
-; }}
+; block0:
+;   adds x0, x0, x2
+;   adc x1, x1, x3
+;   ret
 
 function %sub_i128(i128, i128) -> i128 {
 block0(v0: i128, v1: i128):
@@ -521,15 +366,10 @@ block0(v0: i128, v1: i128):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   subs x0, x0, x2
-;   Inst 1:   sbc x1, x1, x3
-;   Inst 2:   ret
-; }}
+; block0:
+;   subs x0, x0, x2
+;   sbc x1, x1, x3
+;   ret
 
 function %mul_i128(i128, i128) -> i128 {
 block0(v0: i128, v1: i128):
@@ -537,17 +377,12 @@ block0(v0: i128, v1: i128):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 5)
-;   Inst 0:   umulh x4, x0, x2
-;   Inst 1:   madd x3, x0, x3, x4
-;   Inst 2:   madd x1, x1, x2, x3
-;   Inst 3:   madd x0, x0, x2, xzr
-;   Inst 4:   ret
-; }}
+; block0:
+;   umulh x10, x0, x2
+;   madd x12, x0, x3, x10
+;   madd x1, x1, x2, x12
+;   madd x0, x0, x2, xzr
+;   ret
 
 function %add_mul_1(i32, i32, i32) -> i32 {
 block0(v0: i32, v1: i32, v2: i32):
@@ -556,14 +391,9 @@ block0(v0: i32, v1: i32, v2: i32):
     return v4
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   madd w0, w1, w2, w0
-;   Inst 1:   ret
-; }}
+; block0:
+;   madd w0, w1, w2, w0
+;   ret
 
 function %add_mul_2(i32, i32, i32) -> i32 {
 block0(v0: i32, v1: i32, v2: i32):
@@ -572,14 +402,9 @@ block0(v0: i32, v1: i32, v2: i32):
     return v4
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   madd w0, w1, w2, w0
-;   Inst 1:   ret
-; }}
+; block0:
+;   madd w0, w1, w2, w0
+;   ret
 
 function %srem_const (i64) -> i64 {
 block0(v0: i64):
@@ -588,16 +413,11 @@ block0(v0: i64):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 4)
-;   Inst 0:   orr x1, xzr, #2
-;   Inst 1:   sdiv x2, x0, x1
-;   Inst 2:   msub x0, x2, x1, x0
-;   Inst 3:   ret
-; }}
+; block0:
+;   orr x3, xzr, #2
+;   sdiv x5, x0, x3
+;   msub x0, x5, x3, x0
+;   ret
 
 function %urem_const (i64) -> i64 {
 block0(v0: i64):
@@ -606,16 +426,11 @@ block0(v0: i64):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 4)
-;   Inst 0:   orr x1, xzr, #2
-;   Inst 1:   udiv x2, x0, x1
-;   Inst 2:   msub x0, x2, x1, x0
-;   Inst 3:   ret
-; }}
+; block0:
+;   orr x3, xzr, #2
+;   udiv x5, x0, x3
+;   msub x0, x5, x3, x0
+;   ret
 
 function %sdiv_minus_one(i64) -> i64 {
 block0(v0: i64):
@@ -624,16 +439,11 @@ block0(v0: i64):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 6)
-;   Inst 0:   movn x1, #0
-;   Inst 1:   adds xzr, x1, #1
-;   Inst 2:   ccmp x0, #1, #nzcv, eq
-;   Inst 3:   b.vc 8 ; udf
-;   Inst 4:   sdiv x0, x0, x1
-;   Inst 5:   ret
-; }}
+; block0:
+;   movn x3, #0
+;   adds xzr, x3, #1
+;   ccmp x0, #1, #nzcv, eq
+;   b.vc 8 ; udf
+;   sdiv x0, x0, x3
+;   ret
 
diff --git a/cranelift/filetests/filetests/isa/aarch64/atomic-rmw-lse.clif b/cranelift/filetests/filetests/isa/aarch64/atomic-rmw-lse.clif
index ca3c26fbcc..96cbe97cd9 100644
--- a/cranelift/filetests/filetests/isa/aarch64/atomic-rmw-lse.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/atomic-rmw-lse.clif
@@ -7,14 +7,9 @@ block0(v0: i64, v1: i64):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   ldaddal x1, x0, [x0]
-;   Inst 1:   ret
-; }}
+; block0:
+;   ldaddal x1, x4, [x0]
+;   ret
 
 function %atomic_rmw_add_i32(i64, i32) {
 block0(v0: i64, v1: i32):
@@ -22,14 +17,9 @@ block0(v0: i64, v1: i32):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   ldaddal w1, w0, [x0]
-;   Inst 1:   ret
-; }}
+; block0:
+;   ldaddal w1, w4, [x0]
+;   ret
 
 function %atomic_rmw_add_i16(i64, i16) {
 block0(v0: i64, v1: i16):
@@ -37,14 +27,9 @@ block0(v0: i64, v1: i16):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   ldaddalh w1, w0, [x0]
-;   Inst 1:   ret
-; }}
+; block0:
+;   ldaddalh w1, w4, [x0]
+;   ret
 
 function %atomic_rmw_add_i8(i64, i8) {
 block0(v0: i64, v1: i8):
@@ -52,14 +37,9 @@ block0(v0: i64, v1: i8):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   ldaddalb w1, w0, [x0]
-;   Inst 1:   ret
-; }}
+; block0:
+;   ldaddalb w1, w4, [x0]
+;   ret
 
 function %atomic_rmw_and_i64(i64, i64) {
 block0(v0: i64, v1: i64):
@@ -67,14 +47,9 @@ block0(v0: i64, v1: i64):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   ldclral x1, x0, [x0]
-;   Inst 1:   ret
-; }}
+; block0:
+;   ldclral x1, x4, [x0]
+;   ret
 
 function %atomic_rmw_and_i32(i64, i32) {
 block0(v0: i64, v1: i32):
@@ -82,14 +57,9 @@ block0(v0: i64, v1: i32):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   ldclral w1, w0, [x0]
-;   Inst 1:   ret
-; }}
+; block0:
+;   ldclral w1, w4, [x0]
+;   ret
 
 function %atomic_rmw_and_i16(i64, i16) {
 block0(v0: i64, v1: i16):
@@ -97,14 +67,9 @@ block0(v0: i64, v1: i16):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   ldclralh w1, w0, [x0]
-;   Inst 1:   ret
-; }}
+; block0:
+;   ldclralh w1, w4, [x0]
+;   ret
 
 function %atomic_rmw_and_i8(i64, i8) {
 block0(v0: i64, v1: i8):
@@ -112,14 +77,9 @@ block0(v0: i64, v1: i8):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   ldclralb w1, w0, [x0]
-;   Inst 1:   ret
-; }}
+; block0:
+;   ldclralb w1, w4, [x0]
+;   ret
 
 function %atomic_rmw_nand_i64(i64, i64) {
 block0(v0: i64, v1: i64):
@@ -127,25 +87,21 @@ block0(v0: i64, v1: i64):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 13)
-;   Inst 0:   stp fp, lr, [sp, #-16]!
-;   Inst 1:   mov fp, sp
-;   Inst 2:   str x28, [sp, #-16]!
-;   Inst 3:   stp x26, x27, [sp, #-16]!
-;   Inst 4:   stp x24, x25, [sp, #-16]!
-;   Inst 5:   mov x25, x0
-;   Inst 6:   mov x26, x1
-;   Inst 7:   1: ldaxr x27, [x25]; and x28, x27, x26; mvn x28, x28; stlxr w24, x28, [x25]; cbnz w24, 1b
-;   Inst 8:   ldp x24, x25, [sp], #16
-;   Inst 9:   ldp x26, x27, [sp], #16
-;   Inst 10:   ldr x28, [sp], #16
-;   Inst 11:   ldp fp, lr, [sp], #16
-;   Inst 12:   ret
-; }}
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   str x28, [sp, #-16]!
+;   stp x26, x27, [sp, #-16]!
+;   stp x24, x25, [sp, #-16]!
+; block0:
+;   mov x25, x0
+;   mov x4, x1
+;   mov x26, x4
+;   1: ldaxr x27, [x25]; and x28, x27, x26; mvn x28, x28; stlxr w24, x28, [x25]; cbnz w24, 1b
+;   ldp x24, x25, [sp], #16
+;   ldp x26, x27, [sp], #16
+;   ldr x28, [sp], #16
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %atomic_rmw_nand_i32(i64, i32) {
 block0(v0: i64, v1: i32):
@@ -153,25 +109,21 @@ block0(v0: i64, v1: i32):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 13)
-;   Inst 0:   stp fp, lr, [sp, #-16]!
-;   Inst 1:   mov fp, sp
-;   Inst 2:   str x28, [sp, #-16]!
-;   Inst 3:   stp x26, x27, [sp, #-16]!
-;   Inst 4:   stp x24, x25, [sp, #-16]!
-;   Inst 5:   mov x25, x0
-;   Inst 6:   mov x26, x1
-;   Inst 7:   1: ldaxr w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxr w24, w28, [x25]; cbnz w24, 1b
-;   Inst 8:   ldp x24, x25, [sp], #16
-;   Inst 9:   ldp x26, x27, [sp], #16
-;   Inst 10:   ldr x28, [sp], #16
-;   Inst 11:   ldp fp, lr, [sp], #16
-;   Inst 12:   ret
-; }}
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   str x28, [sp, #-16]!
+;   stp x26, x27, [sp, #-16]!
+;   stp x24, x25, [sp, #-16]!
+; block0:
+;   mov x25, x0
+;   mov x4, x1
+;   mov x26, x4
+;   1: ldaxr w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxr w24, w28, [x25]; cbnz w24, 1b
+;   ldp x24, x25, [sp], #16
+;   ldp x26, x27, [sp], #16
+;   ldr x28, [sp], #16
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %atomic_rmw_nand_i16(i64, i16) {
 block0(v0: i64, v1: i16):
@@ -179,25 +131,21 @@ block0(v0: i64, v1: i16):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 13)
-;   Inst 0:   stp fp, lr, [sp, #-16]!
-;   Inst 1:   mov fp, sp
-;   Inst 2:   str x28, [sp, #-16]!
-;   Inst 3:   stp x26, x27, [sp, #-16]!
-;   Inst 4:   stp x24, x25, [sp, #-16]!
-;   Inst 5:   mov x25, x0
-;   Inst 6:   mov x26, x1
-;   Inst 7:   1: ldaxrh w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxrh w24, w28, [x25]; cbnz w24, 1b
-;   Inst 8:   ldp x24, x25, [sp], #16
-;   Inst 9:   ldp x26, x27, [sp], #16
-;   Inst 10:   ldr x28, [sp], #16
-;   Inst 11:   ldp fp, lr, [sp], #16
-;   Inst 12:   ret
-; }}
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   str x28, [sp, #-16]!
+;   stp x26, x27, [sp, #-16]!
+;   stp x24, x25, [sp, #-16]!
+; block0:
+;   mov x25, x0
+;   mov x4, x1
+;   mov x26, x4
+;   1: ldaxrh w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxrh w24, w28, [x25]; cbnz w24, 1b
+;   ldp x24, x25, [sp], #16
+;   ldp x26, x27, [sp], #16
+;   ldr x28, [sp], #16
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %atomic_rmw_nand_i8(i64, i8) {
 block0(v0: i64, v1: i8):
@@ -205,25 +153,21 @@ block0(v0: i64, v1: i8):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 13)
-;   Inst 0:   stp fp, lr, [sp, #-16]!
-;   Inst 1:   mov fp, sp
-;   Inst 2:   str x28, [sp, #-16]!
-;   Inst 3:   stp x26, x27, [sp, #-16]!
-;   Inst 4:   stp x24, x25, [sp, #-16]!
-;   Inst 5:   mov x25, x0
-;   Inst 6:   mov x26, x1
-;   Inst 7:   1: ldaxrb w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxrb w24, w28, [x25]; cbnz w24, 1b
-;   Inst 8:   ldp x24, x25, [sp], #16
-;   Inst 9:   ldp x26, x27, [sp], #16
-;   Inst 10:   ldr x28, [sp], #16
-;   Inst 11:   ldp fp, lr, [sp], #16
-;   Inst 12:   ret
-; }}
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   str x28, [sp, #-16]!
+;   stp x26, x27, [sp, #-16]!
+;   stp x24, x25, [sp, #-16]!
+; block0:
+;   mov x25, x0
+;   mov x4, x1
+;   mov x26, x4
+;   1: ldaxrb w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxrb w24, w28, [x25]; cbnz w24, 1b
+;   ldp x24, x25, [sp], #16
+;   ldp x26, x27, [sp], #16
+;   ldr x28, [sp], #16
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %atomic_rmw_or_i64(i64, i64) {
 block0(v0: i64, v1: i64):
@@ -231,14 +175,9 @@ block0(v0: i64, v1: i64):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   ldsetal x1, x0, [x0]
-;   Inst 1:   ret
-; }}
+; block0:
+;   ldsetal x1, x4, [x0]
+;   ret
 
 function %atomic_rmw_or_i32(i64, i32) {
 block0(v0: i64, v1: i32):
@@ -246,14 +185,9 @@ block0(v0: i64, v1: i32):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   ldsetal w1, w0, [x0]
-;   Inst 1:   ret
-; }}
+; block0:
+;   ldsetal w1, w4, [x0]
+;   ret
 
 function %atomic_rmw_or_i16(i64, i16) {
 block0(v0: i64, v1: i16):
@@ -261,14 +195,9 @@ block0(v0: i64, v1: i16):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   ldsetalh w1, w0, [x0]
-;   Inst 1:   ret
-; }}
+; block0:
+;   ldsetalh w1, w4, [x0]
+;   ret
 
 function %atomic_rmw_or_i8(i64, i8) {
 block0(v0: i64, v1: i8):
@@ -276,14 +205,9 @@ block0(v0: i64, v1: i8):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   ldsetalb w1, w0, [x0]
-;   Inst 1:   ret
-; }}
+; block0:
+;   ldsetalb w1, w4, [x0]
+;   ret
 
 function %atomic_rmw_xor_i64(i64, i64) {
 block0(v0: i64, v1: i64):
@@ -291,14 +215,9 @@ block0(v0: i64, v1: i64):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   ldeoral x1, x0, [x0]
-;   Inst 1:   ret
-; }}
+; block0:
+;   ldeoral x1, x4, [x0]
+;   ret
 
 function %atomic_rmw_xor_i32(i64, i32) {
 block0(v0: i64, v1: i32):
@@ -306,14 +225,9 @@ block0(v0: i64, v1: i32):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   ldeoral w1, w0, [x0]
-;   Inst 1:   ret
-; }}
+; block0:
+;   ldeoral w1, w4, [x0]
+;   ret
 
 function %atomic_rmw_xor_i16(i64, i16) {
 block0(v0: i64, v1: i16):
@@ -321,14 +235,9 @@ block0(v0: i64, v1: i16):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   ldeoralh w1, w0, [x0]
-;   Inst 1:   ret
-; }}
+; block0:
+;   ldeoralh w1, w4, [x0]
+;   ret
 
 function %atomic_rmw_xor_i8(i64, i8) {
 block0(v0: i64, v1: i8):
@@ -336,14 +245,9 @@ block0(v0: i64, v1: i8):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   ldeoralb w1, w0, [x0]
-;   Inst 1:   ret
-; }}
+; block0:
+;   ldeoralb w1, w4, [x0]
+;   ret
 
 function %atomic_rmw_smax_i64(i64, i64) {
 block0(v0: i64, v1: i64):
@@ -351,14 +255,9 @@ block0(v0: i64, v1: i64):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   ldsmaxal x1, x0, [x0]
-;   Inst 1:   ret
-; }}
+; block0:
+;   ldsmaxal x1, x4, [x0]
+;   ret
 
 function %atomic_rmw_smax_i32(i64, i32) {
 block0(v0: i64, v1: i32):
@@ -366,14 +265,9 @@ block0(v0: i64, v1: i32):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   ldsmaxal w1, w0, [x0]
-;   Inst 1:   ret
-; }}
+; block0:
+;   ldsmaxal w1, w4, [x0]
+;   ret
 
 function %atomic_rmw_smax_i16(i64, i16) {
 block0(v0: i64, v1: i16):
@@ -381,14 +275,9 @@ block0(v0: i64, v1: i16):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   ldsmaxalh w1, w0, [x0]
-;   Inst 1:   ret
-; }}
+; block0:
+;   ldsmaxalh w1, w4, [x0]
+;   ret
 
 function %atomic_rmw_smax_i8(i64, i8) {
 block0(v0: i64, v1: i8):
@@ -396,14 +285,9 @@ block0(v0: i64, v1: i8):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   ldsmaxalb w1, w0, [x0]
-;   Inst 1:   ret
-; }}
+; block0:
+;   ldsmaxalb w1, w4, [x0]
+;   ret
 
 function %atomic_rmw_umax_i64(i64, i64) {
 block0(v0: i64, v1: i64):
@@ -411,14 +295,9 @@ block0(v0: i64, v1: i64):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   ldumaxal x1, x0, [x0]
-;   Inst 1:   ret
-; }}
+; block0:
+;   ldumaxal x1, x4, [x0]
+;   ret
 
 function %atomic_rmw_umax_i32(i64, i32) {
 block0(v0: i64, v1: i32):
@@ -426,14 +305,9 @@ block0(v0: i64, v1: i32):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   ldumaxal w1, w0, [x0]
-;   Inst 1:   ret
-; }}
+; block0:
+;   ldumaxal w1, w4, [x0]
+;   ret
 
 function %atomic_rmw_umax_i16(i64, i16) {
 block0(v0: i64, v1: i16):
@@ -441,14 +315,9 @@ block0(v0: i64, v1: i16):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   ldumaxalh w1, w0, [x0]
-;   Inst 1:   ret
-; }}
+; block0:
+;   ldumaxalh w1, w4, [x0]
+;   ret
 
 function %atomic_rmw_umax_i8(i64, i8) {
 block0(v0: i64, v1: i8):
@@ -456,14 +325,9 @@ block0(v0: i64, v1: i8):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   ldumaxalb w1, w0, [x0]
-;   Inst 1:   ret
-; }}
+; block0:
+;   ldumaxalb w1, w4, [x0]
+;   ret
 
 function %atomic_rmw_smin_i64(i64, i64) {
 block0(v0: i64, v1: i64):
@@ -471,14 +335,9 @@ block0(v0: i64, v1: i64):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   ldsminal x1, x0, [x0]
-;   Inst 1:   ret
-; }}
+; block0:
+;   ldsminal x1, x4, [x0]
+;   ret
 
 function %atomic_rmw_smin_i32(i64, i32) {
 block0(v0: i64, v1: i32):
@@ -486,14 +345,9 @@ block0(v0: i64, v1: i32):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   ldsminal w1, w0, [x0]
-;   Inst 1:   ret
-; }}
+; block0:
+;   ldsminal w1, w4, [x0]
+;   ret
 
 function %atomic_rmw_smin_i16(i64, i16) {
 block0(v0: i64, v1: i16):
@@ -501,14 +355,9 @@ block0(v0: i64, v1: i16):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   ldsminalh w1, w0, [x0]
-;   Inst 1:   ret
-; }}
+; block0:
+;   ldsminalh w1, w4, [x0]
+;   ret
 
 function %atomic_rmw_smin_i8(i64, i8) {
 block0(v0: i64, v1: i8):
@@ -516,14 +365,9 @@ block0(v0: i64, v1: i8):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   ldsminalb w1, w0, [x0]
-;   Inst 1:   ret
-; }}
+; block0:
+;   ldsminalb w1, w4, [x0]
+;   ret
 
 function %atomic_rmw_umin_i64(i64, i64) {
 block0(v0: i64, v1: i64):
@@ -531,14 +375,9 @@ block0(v0: i64, v1: i64):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   lduminal x1, x0, [x0]
-;   Inst 1:   ret
-; }}
+; block0:
+;   lduminal x1, x4, [x0]
+;   ret
 
 function %atomic_rmw_umin_i32(i64, i32) {
 block0(v0: i64, v1: i32):
@@ -546,14 +385,9 @@ block0(v0: i64, v1: i32):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   lduminal w1, w0, [x0]
-;   Inst 1:   ret
-; }}
+; block0:
+;   lduminal w1, w4, [x0]
+;   ret
 
 function %atomic_rmw_umin_i16(i64, i16) {
 block0(v0: i64, v1: i16):
@@ -561,14 +395,9 @@ block0(v0: i64, v1: i16):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   lduminalh w1, w0, [x0]
-;   Inst 1:   ret
-; }}
+; block0:
+;   lduminalh w1, w4, [x0]
+;   ret
 
 function %atomic_rmw_umin_i8(i64, i8) {
 block0(v0: i64, v1: i8):
@@ -576,12 +405,7 @@ block0(v0: i64, v1: i8):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   lduminalb w1, w0, [x0]
-;   Inst 1:   ret
-; }}
+; block0:
+;   lduminalb w1, w4, [x0]
+;   ret
 
diff --git a/cranelift/filetests/filetests/isa/aarch64/atomic-rmw.clif b/cranelift/filetests/filetests/isa/aarch64/atomic-rmw.clif
index b793cd27ae..1229c08560 100644
--- a/cranelift/filetests/filetests/isa/aarch64/atomic-rmw.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/atomic-rmw.clif
@@ -7,25 +7,21 @@ block0(v0: i64, v1: i64):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 13)
-;   Inst 0:   stp fp, lr, [sp, #-16]!
-;   Inst 1:   mov fp, sp
-;   Inst 2:   str x28, [sp, #-16]!
-;   Inst 3:   stp x26, x27, [sp, #-16]!
-;   Inst 4:   stp x24, x25, [sp, #-16]!
-;   Inst 5:   mov x25, x0
-;   Inst 6:   mov x26, x1
-;   Inst 7:   1: ldaxr x27, [x25]; add x28, x27, x26; stlxr w24, x28, [x25]; cbnz w24, 1b
-;   Inst 8:   ldp x24, x25, [sp], #16
-;   Inst 9:   ldp x26, x27, [sp], #16
-;   Inst 10:   ldr x28, [sp], #16
-;   Inst 11:   ldp fp, lr, [sp], #16
-;   Inst 12:   ret
-; }}
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   str x28, [sp, #-16]!
+;   stp x26, x27, [sp, #-16]!
+;   stp x24, x25, [sp, #-16]!
+; block0:
+;   mov x25, x0
+;   mov x4, x1
+;   mov x26, x4
+;   1: ldaxr x27, [x25]; add x28, x27, x26; stlxr w24, x28, [x25]; cbnz w24, 1b
+;   ldp x24, x25, [sp], #16
+;   ldp x26, x27, [sp], #16
+;   ldr x28, [sp], #16
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %atomic_rmw_add_i32(i64, i32) {
 block0(v0: i64, v1: i32):
@@ -33,25 +29,21 @@ block0(v0: i64, v1: i32):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 13)
-;   Inst 0:   stp fp, lr, [sp, #-16]!
-;   Inst 1:   mov fp, sp
-;   Inst 2:   str x28, [sp, #-16]!
-;   Inst 3:   stp x26, x27, [sp, #-16]!
-;   Inst 4:   stp x24, x25, [sp, #-16]!
-;   Inst 5:   mov x25, x0
-;   Inst 6:   mov x26, x1
-;   Inst 7:   1: ldaxr w27, [x25]; add w28, w27, w26; stlxr w24, w28, [x25]; cbnz w24, 1b
-;   Inst 8:   ldp x24, x25, [sp], #16
-;   Inst 9:   ldp x26, x27, [sp], #16
-;   Inst 10:   ldr x28, [sp], #16
-;   Inst 11:   ldp fp, lr, [sp], #16
-;   Inst 12:   ret
-; }}
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   str x28, [sp, #-16]!
+;   stp x26, x27, [sp, #-16]!
+;   stp x24, x25, [sp, #-16]!
+; block0:
+;   mov x25, x0
+;   mov x4, x1
+;   mov x26, x4
+;   1: ldaxr w27, [x25]; add w28, w27, w26; stlxr w24, w28, [x25]; cbnz w24, 1b
+;   ldp x24, x25, [sp], #16
+;   ldp x26, x27, [sp], #16
+;   ldr x28, [sp], #16
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %atomic_rmw_add_i16(i64, i16) {
 block0(v0: i64, v1: i16):
@@ -59,25 +51,21 @@ block0(v0: i64, v1: i16):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 13)
-;   Inst 0:   stp fp, lr, [sp, #-16]!
-;   Inst 1:   mov fp, sp
-;   Inst 2:   str x28, [sp, #-16]!
-;   Inst 3:   stp x26, x27, [sp, #-16]!
-;   Inst 4:   stp x24, x25, [sp, #-16]!
-;   Inst 5:   mov x25, x0
-;   Inst 6:   mov x26, x1
-;   Inst 7:   1: ldaxrh w27, [x25]; add w28, w27, w26; stlxrh w24, w28, [x25]; cbnz w24, 1b
-;   Inst 8:   ldp x24, x25, [sp], #16
-;   Inst 9:   ldp x26, x27, [sp], #16
-;   Inst 10:   ldr x28, [sp], #16
-;   Inst 11:   ldp fp, lr, [sp], #16
-;   Inst 12:   ret
-; }}
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   str x28, [sp, #-16]!
+;   stp x26, x27, [sp, #-16]!
+;   stp x24, x25, [sp, #-16]!
+; block0:
+;   mov x25, x0
+;   mov x4, x1
+;   mov x26, x4
+;   1: ldaxrh w27, [x25]; add w28, w27, w26; stlxrh w24, w28, [x25]; cbnz w24, 1b
+;   ldp x24, x25, [sp], #16
+;   ldp x26, x27, [sp], #16
+;   ldr x28, [sp], #16
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %atomic_rmw_add_i8(i64, i8) {
 block0(v0: i64, v1: i8):
@@ -85,25 +73,21 @@ block0(v0: i64, v1: i8):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 13)
-;   Inst 0:   stp fp, lr, [sp, #-16]!
-;   Inst 1:   mov fp, sp
-;   Inst 2:   str x28, [sp, #-16]!
-;   Inst 3:   stp x26, x27, [sp, #-16]!
-;   Inst 4:   stp x24, x25, [sp, #-16]!
-;   Inst 5:   mov x25, x0
-;   Inst 6:   mov x26, x1
-;   Inst 7:   1: ldaxrb w27, [x25]; add w28, w27, w26; stlxrb w24, w28, [x25]; cbnz w24, 1b
-;   Inst 8:   ldp x24, x25, [sp], #16
-;   Inst 9:   ldp x26, x27, [sp], #16
-;   Inst 10:   ldr x28, [sp], #16
-;   Inst 11:   ldp fp, lr, [sp], #16
-;   Inst 12:   ret
-; }}
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   str x28, [sp, #-16]!
+;   stp x26, x27, [sp, #-16]!
+;   stp x24, x25, [sp, #-16]!
+; block0:
+;   mov x25, x0
+;   mov x4, x1
+;   mov x26, x4
+;   1: ldaxrb w27, [x25]; add w28, w27, w26; stlxrb w24, w28, [x25]; cbnz w24, 1b
+;   ldp x24, x25, [sp], #16
+;   ldp x26, x27, [sp], #16
+;   ldr x28, [sp], #16
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %atomic_rmw_and_i64(i64, i64) {
 block0(v0: i64, v1: i64):
@@ -111,25 +95,21 @@ block0(v0: i64, v1: i64):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 13)
-;   Inst 0:   stp fp, lr, [sp, #-16]!
-;   Inst 1:   mov fp, sp
-;   Inst 2:   str x28, [sp, #-16]!
-;   Inst 3:   stp x26, x27, [sp, #-16]!
-;   Inst 4:   stp x24, x25, [sp, #-16]!
-;   Inst 5:   mov x25, x0
-;   Inst 6:   mov x26, x1
-;   Inst 7:   1: ldaxr x27, [x25]; and x28, x27, x26; stlxr w24, x28, [x25]; cbnz w24, 1b
-;   Inst 8:   ldp x24, x25, [sp], #16
-;   Inst 9:   ldp x26, x27, [sp], #16
-;   Inst 10:   ldr x28, [sp], #16
-;   Inst 11:   ldp fp, lr, [sp], #16
-;   Inst 12:   ret
-; }}
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   str x28, [sp, #-16]!
+;   stp x26, x27, [sp, #-16]!
+;   stp x24, x25, [sp, #-16]!
+; block0:
+;   mov x25, x0
+;   mov x4, x1
+;   mov x26, x4
+;   1: ldaxr x27, [x25]; and x28, x27, x26; stlxr w24, x28, [x25]; cbnz w24, 1b
+;   ldp x24, x25, [sp], #16
+;   ldp x26, x27, [sp], #16
+;   ldr x28, [sp], #16
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %atomic_rmw_and_i32(i64, i32) {
 block0(v0: i64, v1: i32):
@@ -137,25 +117,21 @@ block0(v0: i64, v1: i32):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 13)
-;   Inst 0:   stp fp, lr, [sp, #-16]!
-;   Inst 1:   mov fp, sp
-;   Inst 2:   str x28, [sp, #-16]!
-;   Inst 3:   stp x26, x27, [sp, #-16]!
-;   Inst 4:   stp x24, x25, [sp, #-16]!
-;   Inst 5:   mov x25, x0
-;   Inst 6:   mov x26, x1
-;   Inst 7:   1: ldaxr w27, [x25]; and w28, w27, w26; stlxr w24, w28, [x25]; cbnz w24, 1b
-;   Inst 8:   ldp x24, x25, [sp], #16
-;   Inst 9:   ldp x26, x27, [sp], #16
-;   Inst 10:   ldr x28, [sp], #16
-;   Inst 11:   ldp fp, lr, [sp], #16
-;   Inst 12:   ret
-; }}
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   str x28, [sp, #-16]!
+;   stp x26, x27, [sp, #-16]!
+;   stp x24, x25, [sp, #-16]!
+; block0:
+;   mov x25, x0
+;   mov x4, x1
+;   mov x26, x4
+;   1: ldaxr w27, [x25]; and w28, w27, w26; stlxr w24, w28, [x25]; cbnz w24, 1b
+;   ldp x24, x25, [sp], #16
+;   ldp x26, x27, [sp], #16
+;   ldr x28, [sp], #16
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %atomic_rmw_and_i16(i64, i16) {
 block0(v0: i64, v1: i16):
@@ -163,25 +139,21 @@ block0(v0: i64, v1: i16):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 13)
-;   Inst 0:   stp fp, lr, [sp, #-16]!
-;   Inst 1:   mov fp, sp
-;   Inst 2:   str x28, [sp, #-16]!
-;   Inst 3:   stp x26, x27, [sp, #-16]!
-;   Inst 4:   stp x24, x25, [sp, #-16]!
-;   Inst 5:   mov x25, x0
-;   Inst 6:   mov x26, x1
-;   Inst 7:   1: ldaxrh w27, [x25]; and w28, w27, w26; stlxrh w24, w28, [x25]; cbnz w24, 1b
-;   Inst 8:   ldp x24, x25, [sp], #16
-;   Inst 9:   ldp x26, x27, [sp], #16
-;   Inst 10:   ldr x28, [sp], #16
-;   Inst 11:   ldp fp, lr, [sp], #16
-;   Inst 12:   ret
-; }}
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   str x28, [sp, #-16]!
+;   stp x26, x27, [sp, #-16]!
+;   stp x24, x25, [sp, #-16]!
+; block0:
+;   mov x25, x0
+;   mov x4, x1
+;   mov x26, x4
+;   1: ldaxrh w27, [x25]; and w28, w27, w26; stlxrh w24, w28, [x25]; cbnz w24, 1b
+;   ldp x24, x25, [sp], #16
+;   ldp x26, x27, [sp], #16
+;   ldr x28, [sp], #16
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %atomic_rmw_and_i8(i64, i8) {
 block0(v0: i64, v1: i8):
@@ -189,25 +161,21 @@ block0(v0: i64, v1: i8):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 13)
-;   Inst 0:   stp fp, lr, [sp, #-16]!
-;   Inst 1:   mov fp, sp
-;   Inst 2:   str x28, [sp, #-16]!
-;   Inst 3:   stp x26, x27, [sp, #-16]!
-;   Inst 4:   stp x24, x25, [sp, #-16]!
-;   Inst 5:   mov x25, x0
-;   Inst 6:   mov x26, x1
-;   Inst 7:   1: ldaxrb w27, [x25]; and w28, w27, w26; stlxrb w24, w28, [x25]; cbnz w24, 1b
-;   Inst 8:   ldp x24, x25, [sp], #16
-;   Inst 9:   ldp x26, x27, [sp], #16
-;   Inst 10:   ldr x28, [sp], #16
-;   Inst 11:   ldp fp, lr, [sp], #16
-;   Inst 12:   ret
-; }}
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   str x28, [sp, #-16]!
+;   stp x26, x27, [sp, #-16]!
+;   stp x24, x25, [sp, #-16]!
+; block0:
+;   mov x25, x0
+;   mov x4, x1
+;   mov x26, x4
+;   1: ldaxrb w27, [x25]; and w28, w27, w26; stlxrb w24, w28, [x25]; cbnz w24, 1b
+;   ldp x24, x25, [sp], #16
+;   ldp x26, x27, [sp], #16
+;   ldr x28, [sp], #16
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %atomic_rmw_nand_i64(i64, i64) {
 block0(v0: i64, v1: i64):
@@ -215,25 +183,21 @@ block0(v0: i64, v1: i64):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 13)
-;   Inst 0:   stp fp, lr, [sp, #-16]!
-;   Inst 1:   mov fp, sp
-;   Inst 2:   str x28, [sp, #-16]!
-;   Inst 3:   stp x26, x27, [sp, #-16]!
-;   Inst 4:   stp x24, x25, [sp, #-16]!
-;   Inst 5:   mov x25, x0
-;   Inst 6:   mov x26, x1
-;   Inst 7:   1: ldaxr x27, [x25]; and x28, x27, x26; mvn x28, x28; stlxr w24, x28, [x25]; cbnz w24, 1b
-;   Inst 8:   ldp x24, x25, [sp], #16
-;   Inst 9:   ldp x26, x27, [sp], #16
-;   Inst 10:   ldr x28, [sp], #16
-;   Inst 11:   ldp fp, lr, [sp], #16
-;   Inst 12:   ret
-; }}
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   str x28, [sp, #-16]!
+;   stp x26, x27, [sp, #-16]!
+;   stp x24, x25, [sp, #-16]!
+; block0:
+;   mov x25, x0
+;   mov x4, x1
+;   mov x26, x4
+;   1: ldaxr x27, [x25]; and x28, x27, x26; mvn x28, x28; stlxr w24, x28, [x25]; cbnz w24, 1b
+;   ldp x24, x25, [sp], #16
+;   ldp x26, x27, [sp], #16
+;   ldr x28, [sp], #16
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %atomic_rmw_nand_i32(i64, i32) {
 block0(v0: i64, v1: i32):
@@ -241,25 +205,21 @@ block0(v0: i64, v1: i32):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 13)
-;   Inst 0:   stp fp, lr, [sp, #-16]!
-;   Inst 1:   mov fp, sp
-;   Inst 2:   str x28, [sp, #-16]!
-;   Inst 3:   stp x26, x27, [sp, #-16]!
-;   Inst 4:   stp x24, x25, [sp, #-16]!
-;   Inst 5:   mov x25, x0
-;   Inst 6:   mov x26, x1
-;   Inst 7:   1: ldaxr w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxr w24, w28, [x25]; cbnz w24, 1b
-;   Inst 8:   ldp x24, x25, [sp], #16
-;   Inst 9:   ldp x26, x27, [sp], #16
-;   Inst 10:   ldr x28, [sp], #16
-;   Inst 11:   ldp fp, lr, [sp], #16
-;   Inst 12:   ret
-; }}
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   str x28, [sp, #-16]!
+;   stp x26, x27, [sp, #-16]!
+;   stp x24, x25, [sp, #-16]!
+; block0:
+;   mov x25, x0
+;   mov x4, x1
+;   mov x26, x4
+;   1: ldaxr w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxr w24, w28, [x25]; cbnz w24, 1b
+;   ldp x24, x25, [sp], #16
+;   ldp x26, x27, [sp], #16
+;   ldr x28, [sp], #16
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %atomic_rmw_nand_i16(i64, i16) {
 block0(v0: i64, v1: i16):
@@ -267,25 +227,21 @@ block0(v0: i64, v1: i16):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 13)
-;   Inst 0:   stp fp, lr, [sp, #-16]!
-;   Inst 1:   mov fp, sp
-;   Inst 2:   str x28, [sp, #-16]!
-;   Inst 3:   stp x26, x27, [sp, #-16]!
-;   Inst 4:   stp x24, x25, [sp, #-16]!
-;   Inst 5:   mov x25, x0
-;   Inst 6:   mov x26, x1
-;   Inst 7:   1: ldaxrh w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxrh w24, w28, [x25]; cbnz w24, 1b
-;   Inst 8:   ldp x24, x25, [sp], #16
-;   Inst 9:   ldp x26, x27, [sp], #16
-;   Inst 10:   ldr x28, [sp], #16
-;   Inst 11:   ldp fp, lr, [sp], #16
-;   Inst 12:   ret
-; }}
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   str x28, [sp, #-16]!
+;   stp x26, x27, [sp, #-16]!
+;   stp x24, x25, [sp, #-16]!
+; block0:
+;   mov x25, x0
+;   mov x4, x1
+;   mov x26, x4
+;   1: ldaxrh w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxrh w24, w28, [x25]; cbnz w24, 1b
+;   ldp x24, x25, [sp], #16
+;   ldp x26, x27, [sp], #16
+;   ldr x28, [sp], #16
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %atomic_rmw_nand_i8(i64, i8) {
 block0(v0: i64, v1: i8):
@@ -293,25 +249,21 @@ block0(v0: i64, v1: i8):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 13)
-;   Inst 0:   stp fp, lr, [sp, #-16]!
-;   Inst 1:   mov fp, sp
-;   Inst 2:   str x28, [sp, #-16]!
-;   Inst 3:   stp x26, x27, [sp, #-16]!
-;   Inst 4:   stp x24, x25, [sp, #-16]!
-;   Inst 5:   mov x25, x0
-;   Inst 6:   mov x26, x1
-;   Inst 7:   1: ldaxrb w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxrb w24, w28, [x25]; cbnz w24, 1b
-;   Inst 8:   ldp x24, x25, [sp], #16
-;   Inst 9:   ldp x26, x27, [sp], #16
-;   Inst 10:   ldr x28, [sp], #16
-;   Inst 11:   ldp fp, lr, [sp], #16
-;   Inst 12:   ret
-; }}
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   str x28, [sp, #-16]!
+;   stp x26, x27, [sp, #-16]!
+;   stp x24, x25, [sp, #-16]!
+; block0:
+;   mov x25, x0
+;   mov x4, x1
+;   mov x26, x4
+;   1: ldaxrb w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxrb w24, w28, [x25]; cbnz w24, 1b
+;   ldp x24, x25, [sp], #16
+;   ldp x26, x27, [sp], #16
+;   ldr x28, [sp], #16
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %atomic_rmw_or_i64(i64, i64) {
 block0(v0: i64, v1: i64):
@@ -319,25 +271,21 @@ block0(v0: i64, v1: i64):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 13)
-;   Inst 0:   stp fp, lr, [sp, #-16]!
-;   Inst 1:   mov fp, sp
-;   Inst 2:   str x28, [sp, #-16]!
-;   Inst 3:   stp x26, x27, [sp, #-16]!
-;   Inst 4:   stp x24, x25, [sp, #-16]!
-;   Inst 5:   mov x25, x0
-;   Inst 6:   mov x26, x1
-;   Inst 7:   1: ldaxr x27, [x25]; orr x28, x27, x26; stlxr w24, x28, [x25]; cbnz w24, 1b
-;   Inst 8:   ldp x24, x25, [sp], #16
-;   Inst 9:   ldp x26, x27, [sp], #16
-;   Inst 10:   ldr x28, [sp], #16
-;   Inst 11:   ldp fp, lr, [sp], #16
-;   Inst 12:   ret
-; }}
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   str x28, [sp, #-16]!
+;   stp x26, x27, [sp, #-16]!
+;   stp x24, x25, [sp, #-16]!
+; block0:
+;   mov x25, x0
+;   mov x4, x1
+;   mov x26, x4
+;   1: ldaxr x27, [x25]; orr x28, x27, x26; stlxr w24, x28, [x25]; cbnz w24, 1b
+;   ldp x24, x25, [sp], #16
+;   ldp x26, x27, [sp], #16
+;   ldr x28, [sp], #16
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %atomic_rmw_or_i32(i64, i32) {
 block0(v0: i64, v1: i32):
@@ -345,25 +293,21 @@ block0(v0: i64, v1: i32):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 13)
-;   Inst 0:   stp fp, lr, [sp, #-16]!
-;   Inst 1:   mov fp, sp
-;   Inst 2:   str x28, [sp, #-16]!
-;   Inst 3:   stp x26, x27, [sp, #-16]!
-;   Inst 4:   stp x24, x25, [sp, #-16]!
-;   Inst 5:   mov x25, x0
-;   Inst 6:   mov x26, x1
-;   Inst 7:   1: ldaxr w27, [x25]; orr w28, w27, w26; stlxr w24, w28, [x25]; cbnz w24, 1b
-;   Inst 8:   ldp x24, x25, [sp], #16
-;   Inst 9:   ldp x26, x27, [sp], #16
-;   Inst 10:   ldr x28, [sp], #16
-;   Inst 11:   ldp fp, lr, [sp], #16
-;   Inst 12:   ret
-; }}
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   str x28, [sp, #-16]!
+;   stp x26, x27, [sp, #-16]!
+;   stp x24, x25, [sp, #-16]!
+; block0:
+;   mov x25, x0
+;   mov x4, x1
+;   mov x26, x4
+;   1: ldaxr w27, [x25]; orr w28, w27, w26; stlxr w24, w28, [x25]; cbnz w24, 1b
+;   ldp x24, x25, [sp], #16
+;   ldp x26, x27, [sp], #16
+;   ldr x28, [sp], #16
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %atomic_rmw_or_i16(i64, i16) {
 block0(v0: i64, v1: i16):
@@ -371,25 +315,21 @@ block0(v0: i64, v1: i16):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 13)
-;   Inst 0:   stp fp, lr, [sp, #-16]!
-;   Inst 1:   mov fp, sp
-;   Inst 2:   str x28, [sp, #-16]!
-;   Inst 3:   stp x26, x27, [sp, #-16]!
-;   Inst 4:   stp x24, x25, [sp, #-16]!
-;   Inst 5:   mov x25, x0
-;   Inst 6:   mov x26, x1
-;   Inst 7:   1: ldaxrh w27, [x25]; orr w28, w27, w26; stlxrh w24, w28, [x25]; cbnz w24, 1b
-;   Inst 8:   ldp x24, x25, [sp], #16
-;   Inst 9:   ldp x26, x27, [sp], #16
-;   Inst 10:   ldr x28, [sp], #16
-;   Inst 11:   ldp fp, lr, [sp], #16
-;   Inst 12:   ret
-; }}
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   str x28, [sp, #-16]!
+;   stp x26, x27, [sp, #-16]!
+;   stp x24, x25, [sp, #-16]!
+; block0:
+;   mov x25, x0
+;   mov x4, x1
+;   mov x26, x4
+;   1: ldaxrh w27, [x25]; orr w28, w27, w26; stlxrh w24, w28, [x25]; cbnz w24, 1b
+;   ldp x24, x25, [sp], #16
+;   ldp x26, x27, [sp], #16
+;   ldr x28, [sp], #16
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %atomic_rmw_or_i8(i64, i8) {
 block0(v0: i64, v1: i8):
@@ -397,25 +337,21 @@ block0(v0: i64, v1: i8):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 13)
-;   Inst 0:   stp fp, lr, [sp, #-16]!
-;   Inst 1:   mov fp, sp
-;   Inst 2:   str x28, [sp, #-16]!
-;   Inst 3:   stp x26, x27, [sp, #-16]!
-;   Inst 4:   stp x24, x25, [sp, #-16]!
-;   Inst 5:   mov x25, x0
-;   Inst 6:   mov x26, x1
-;   Inst 7:   1: ldaxrb w27, [x25]; orr w28, w27, w26; stlxrb w24, w28, [x25]; cbnz w24, 1b
-;   Inst 8:   ldp x24, x25, [sp], #16
-;   Inst 9:   ldp x26, x27, [sp], #16
-;   Inst 10:   ldr x28, [sp], #16
-;   Inst 11:   ldp fp, lr, [sp], #16
-;   Inst 12:   ret
-; }}
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   str x28, [sp, #-16]!
+;   stp x26, x27, [sp, #-16]!
+;   stp x24, x25, [sp, #-16]!
+; block0:
+;   mov x25, x0
+;   mov x4, x1
+;   mov x26, x4
+;   1: ldaxrb w27, [x25]; orr w28, w27, w26; stlxrb w24, w28, [x25]; cbnz w24, 1b
+;   ldp x24, x25, [sp], #16
+;   ldp x26, x27, [sp], #16
+;   ldr x28, [sp], #16
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %atomic_rmw_xor_i64(i64, i64) {
 block0(v0: i64, v1: i64):
@@ -423,25 +359,21 @@ block0(v0: i64, v1: i64):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 13)
-;   Inst 0:   stp fp, lr, [sp, #-16]!
-;   Inst 1:   mov fp, sp
-;   Inst 2:   str x28, [sp, #-16]!
-;   Inst 3:   stp x26, x27, [sp, #-16]!
-;   Inst 4:   stp x24, x25, [sp, #-16]!
-;   Inst 5:   mov x25, x0
-;   Inst 6:   mov x26, x1
-;   Inst 7:   1: ldaxr x27, [x25]; eor x28, x27, x26; stlxr w24, x28, [x25]; cbnz w24, 1b
-;   Inst 8:   ldp x24, x25, [sp], #16
-;   Inst 9:   ldp x26, x27, [sp], #16
-;   Inst 10:   ldr x28, [sp], #16
-;   Inst 11:   ldp fp, lr, [sp], #16
-;   Inst 12:   ret
-; }}
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   str x28, [sp, #-16]!
+;   stp x26, x27, [sp, #-16]!
+;   stp x24, x25, [sp, #-16]!
+; block0:
+;   mov x25, x0
+;   mov x4, x1
+;   mov x26, x4
+;   1: ldaxr x27, [x25]; eor x28, x27, x26; stlxr w24, x28, [x25]; cbnz w24, 1b
+;   ldp x24, x25, [sp], #16
+;   ldp x26, x27, [sp], #16
+;   ldr x28, [sp], #16
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %atomic_rmw_xor_i32(i64, i32) {
 block0(v0: i64, v1: i32):
@@ -449,25 +381,21 @@ block0(v0: i64, v1: i32):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 13)
-;   Inst 0:   stp fp, lr, [sp, #-16]!
-;   Inst 1:   mov fp, sp
-;   Inst 2:   str x28, [sp, #-16]!
-;   Inst 3:   stp x26, x27, [sp, #-16]!
-;   Inst 4:   stp x24, x25, [sp, #-16]!
-;   Inst 5:   mov x25, x0
-;   Inst 6:   mov x26, x1
-;   Inst 7:   1: ldaxr w27, [x25]; eor w28, w27, w26; stlxr w24, w28, [x25]; cbnz w24, 1b
-;   Inst 8:   ldp x24, x25, [sp], #16
-;   Inst 9:   ldp x26, x27, [sp], #16
-;   Inst 10:   ldr x28, [sp], #16
-;   Inst 11:   ldp fp, lr, [sp], #16
-;   Inst 12:   ret
-; }}
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   str x28, [sp, #-16]!
+;   stp x26, x27, [sp, #-16]!
+;   stp x24, x25, [sp, #-16]!
+; block0:
+;   mov x25, x0
+;   mov x4, x1
+;   mov x26, x4
+;   1: ldaxr w27, [x25]; eor w28, w27, w26; stlxr w24, w28, [x25]; cbnz w24, 1b
+;   ldp x24, x25, [sp], #16
+;   ldp x26, x27, [sp], #16
+;   ldr x28, [sp], #16
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %atomic_rmw_xor_i16(i64, i16) {
 block0(v0: i64, v1: i16):
@@ -475,25 +403,21 @@ block0(v0: i64, v1: i16):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 13)
-;   Inst 0:   stp fp, lr, [sp, #-16]!
-;   Inst 1:   mov fp, sp
-;   Inst 2:   str x28, [sp, #-16]!
-;   Inst 3:   stp x26, x27, [sp, #-16]!
-;   Inst 4:   stp x24, x25, [sp, #-16]!
-;   Inst 5:   mov x25, x0
-;   Inst 6:   mov x26, x1
-;   Inst 7:   1: ldaxrh w27, [x25]; eor w28, w27, w26; stlxrh w24, w28, [x25]; cbnz w24, 1b
-;   Inst 8:   ldp x24, x25, [sp], #16
-;   Inst 9:   ldp x26, x27, [sp], #16
-;   Inst 10:   ldr x28, [sp], #16
-;   Inst 11:   ldp fp, lr, [sp], #16
-;   Inst 12:   ret
-; }}
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   str x28, [sp, #-16]!
+;   stp x26, x27, [sp, #-16]!
+;   stp x24, x25, [sp, #-16]!
+; block0:
+;   mov x25, x0
+;   mov x4, x1
+;   mov x26, x4
+;   1: ldaxrh w27, [x25]; eor w28, w27, w26; stlxrh w24, w28, [x25]; cbnz w24, 1b
+;   ldp x24, x25, [sp], #16
+;   ldp x26, x27, [sp], #16
+;   ldr x28, [sp], #16
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %atomic_rmw_xor_i8(i64, i8) {
 block0(v0: i64, v1: i8):
@@ -501,25 +425,21 @@ block0(v0: i64, v1: i8):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 13)
-;   Inst 0:   stp fp, lr, [sp, #-16]!
-;   Inst 1:   mov fp, sp
-;   Inst 2:   str x28, [sp, #-16]!
-;   Inst 3:   stp x26, x27, [sp, #-16]!
-;   Inst 4:   stp x24, x25, [sp, #-16]!
-;   Inst 5:   mov x25, x0
-;   Inst 6:   mov x26, x1
-;   Inst 7:   1: ldaxrb w27, [x25]; eor w28, w27, w26; stlxrb w24, w28, [x25]; cbnz w24, 1b
-;   Inst 8:   ldp x24, x25, [sp], #16
-;   Inst 9:   ldp x26, x27, [sp], #16
-;   Inst 10:   ldr x28, [sp], #16
-;   Inst 11:   ldp fp, lr, [sp], #16
-;   Inst 12:   ret
-; }}
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   str x28, [sp, #-16]!
+;   stp x26, x27, [sp, #-16]!
+;   stp x24, x25, [sp, #-16]!
+; block0:
+;   mov x25, x0
+;   mov x4, x1
+;   mov x26, x4
+;   1: ldaxrb w27, [x25]; eor w28, w27, w26; stlxrb w24, w28, [x25]; cbnz w24, 1b
+;   ldp x24, x25, [sp], #16
+;   ldp x26, x27, [sp], #16
+;   ldr x28, [sp], #16
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %atomic_rmw_smax_i64(i64, i64) {
 block0(v0: i64, v1: i64):
@@ -527,25 +447,21 @@ block0(v0: i64, v1: i64):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 13)
-;   Inst 0:   stp fp, lr, [sp, #-16]!
-;   Inst 1:   mov fp, sp
-;   Inst 2:   str x28, [sp, #-16]!
-;   Inst 3:   stp x26, x27, [sp, #-16]!
-;   Inst 4:   stp x24, x25, [sp, #-16]!
-;   Inst 5:   mov x25, x0
-;   Inst 6:   mov x26, x1
-;   Inst 7:   1: ldaxr x27, [x25]; cmp x27, x26; csel x28, x27, x26, gt; stlxr w24, x28, [x25]; cbnz w24, 1b
-;   Inst 8:   ldp x24, x25, [sp], #16
-;   Inst 9:   ldp x26, x27, [sp], #16
-;   Inst 10:   ldr x28, [sp], #16
-;   Inst 11:   ldp fp, lr, [sp], #16
-;   Inst 12:   ret
-; }}
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   str x28, [sp, #-16]!
+;   stp x26, x27, [sp, #-16]!
+;   stp x24, x25, [sp, #-16]!
+; block0:
+;   mov x25, x0
+;   mov x4, x1
+;   mov x26, x4
+;   1: ldaxr x27, [x25]; cmp x27, x26; csel x28, x27, x26, gt; stlxr w24, x28, [x25]; cbnz w24, 1b
+;   ldp x24, x25, [sp], #16
+;   ldp x26, x27, [sp], #16
+;   ldr x28, [sp], #16
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %atomic_rmw_smax_i32(i64, i32) {
 block0(v0: i64, v1: i32):
@@ -553,25 +469,21 @@ block0(v0: i64, v1: i32):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 13)
-;   Inst 0:   stp fp, lr, [sp, #-16]!
-;   Inst 1:   mov fp, sp
-;   Inst 2:   str x28, [sp, #-16]!
-;   Inst 3:   stp x26, x27, [sp, #-16]!
-;   Inst 4:   stp x24, x25, [sp, #-16]!
-;   Inst 5:   mov x25, x0
-;   Inst 6:   mov x26, x1
-;   Inst 7:   1: ldaxr w27, [x25]; cmp w27, w26; csel w28, w27, w26, gt; stlxr w24, w28, [x25]; cbnz w24, 1b
-;   Inst 8:   ldp x24, x25, [sp], #16
-;   Inst 9:   ldp x26, x27, [sp], #16
-;   Inst 10:   ldr x28, [sp], #16
-;   Inst 11:   ldp fp, lr, [sp], #16
-;   Inst 12:   ret
-; }}
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   str x28, [sp, #-16]!
+;   stp x26, x27, [sp, #-16]!
+;   stp x24, x25, [sp, #-16]!
+; block0:
+;   mov x25, x0
+;   mov x4, x1
+;   mov x26, x4
+;   1: ldaxr w27, [x25]; cmp w27, w26; csel w28, w27, w26, gt; stlxr w24, w28, [x25]; cbnz w24, 1b
+;   ldp x24, x25, [sp], #16
+;   ldp x26, x27, [sp], #16
+;   ldr x28, [sp], #16
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %atomic_rmw_smax_i16(i64, i16) {
 block0(v0: i64, v1: i16):
@@ -579,25 +491,21 @@ block0(v0: i64, v1: i16):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 13)
-;   Inst 0:   stp fp, lr, [sp, #-16]!
-;   Inst 1:   mov fp, sp
-;   Inst 2:   str x28, [sp, #-16]!
-;   Inst 3:   stp x26, x27, [sp, #-16]!
-;   Inst 4:   stp x24, x25, [sp, #-16]!
-;   Inst 5:   mov x25, x0
-;   Inst 6:   mov x26, x1
-;   Inst 7:   1: ldaxrh w27, [x25]; cmp w27, w26; csel w28, w27, w26, gt; stlxrh w24, w28, [x25]; cbnz w24, 1b
-;   Inst 8:   ldp x24, x25, [sp], #16
-;   Inst 9:   ldp x26, x27, [sp], #16
-;   Inst 10:   ldr x28, [sp], #16
-;   Inst 11:   ldp fp, lr, [sp], #16
-;   Inst 12:   ret
-; }}
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   str x28, [sp, #-16]!
+;   stp x26, x27, [sp, #-16]!
+;   stp x24, x25, [sp, #-16]!
+; block0:
+;   mov x25, x0
+;   mov x4, x1
+;   mov x26, x4
+;   1: ldaxrh w27, [x25]; cmp w27, w26; csel w28, w27, w26, gt; stlxrh w24, w28, [x25]; cbnz w24, 1b
+;   ldp x24, x25, [sp], #16
+;   ldp x26, x27, [sp], #16
+;   ldr x28, [sp], #16
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %atomic_rmw_smax_i8(i64, i8) {
 block0(v0: i64, v1: i8):
@@ -605,25 +513,21 @@ block0(v0: i64, v1: i8):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 13)
-;   Inst 0:   stp fp, lr, [sp, #-16]!
-;   Inst 1:   mov fp, sp
-;   Inst 2:   str x28, [sp, #-16]!
-;   Inst 3:   stp x26, x27, [sp, #-16]!
-;   Inst 4:   stp x24, x25, [sp, #-16]!
-;   Inst 5:   mov x25, x0
-;   Inst 6:   mov x26, x1
-;   Inst 7:   1: ldaxrb w27, [x25]; cmp w27, w26; csel w28, w27, w26, gt; stlxrb w24, w28, [x25]; cbnz w24, 1b
-;   Inst 8:   ldp x24, x25, [sp], #16
-;   Inst 9:   ldp x26, x27, [sp], #16
-;   Inst 10:   ldr x28, [sp], #16
-;   Inst 11:   ldp fp, lr, [sp], #16
-;   Inst 12:   ret
-; }}
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   str x28, [sp, #-16]!
+;   stp x26, x27, [sp, #-16]!
+;   stp x24, x25, [sp, #-16]!
+; block0:
+;   mov x25, x0
+;   mov x4, x1
+;   mov x26, x4
+;   1: ldaxrb w27, [x25]; cmp w27, w26; csel w28, w27, w26, gt; stlxrb w24, w28, [x25]; cbnz w24, 1b
+;   ldp x24, x25, [sp], #16
+;   ldp x26, x27, [sp], #16
+;   ldr x28, [sp], #16
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %atomic_rmw_umax_i64(i64, i64) {
 block0(v0: i64, v1: i64):
@@ -631,25 +535,21 @@ block0(v0: i64, v1: i64):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 13)
-;   Inst 0:   stp fp, lr, [sp, #-16]!
-;   Inst 1:   mov fp, sp
-;   Inst 2:   str x28, [sp, #-16]!
-;   Inst 3:   stp x26, x27, [sp, #-16]!
-;   Inst 4:   stp x24, x25, [sp, #-16]!
-;   Inst 5:   mov x25, x0
-;   Inst 6:   mov x26, x1
-;   Inst 7:   1: ldaxr x27, [x25]; cmp x27, x26; csel x28, x27, x26, hi; stlxr w24, x28, [x25]; cbnz w24, 1b
-;   Inst 8:   ldp x24, x25, [sp], #16
-;   Inst 9:   ldp x26, x27, [sp], #16
-;   Inst 10:   ldr x28, [sp], #16
-;   Inst 11:   ldp fp, lr, [sp], #16
-;   Inst 12:   ret
-; }}
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   str x28, [sp, #-16]!
+;   stp x26, x27, [sp, #-16]!
+;   stp x24, x25, [sp, #-16]!
+; block0:
+;   mov x25, x0
+;   mov x4, x1
+;   mov x26, x4
+;   1: ldaxr x27, [x25]; cmp x27, x26; csel x28, x27, x26, hi; stlxr w24, x28, [x25]; cbnz w24, 1b
+;   ldp x24, x25, [sp], #16
+;   ldp x26, x27, [sp], #16
+;   ldr x28, [sp], #16
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %atomic_rmw_umax_i32(i64, i32) {
 block0(v0: i64, v1: i32):
@@ -657,25 +557,21 @@ block0(v0: i64, v1: i32):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 13)
-;   Inst 0:   stp fp, lr, [sp, #-16]!
-;   Inst 1:   mov fp, sp
-;   Inst 2:   str x28, [sp, #-16]!
-;   Inst 3:   stp x26, x27, [sp, #-16]!
-;   Inst 4:   stp x24, x25, [sp, #-16]!
-;   Inst 5:   mov x25, x0
-;   Inst 6:   mov x26, x1
-;   Inst 7:   1: ldaxr w27, [x25]; cmp w27, w26; csel w28, w27, w26, hi; stlxr w24, w28, [x25]; cbnz w24, 1b
-;   Inst 8:   ldp x24, x25, [sp], #16
-;   Inst 9:   ldp x26, x27, [sp], #16
-;   Inst 10:   ldr x28, [sp], #16
-;   Inst 11:   ldp fp, lr, [sp], #16
-;   Inst 12:   ret
-; }}
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   str x28, [sp, #-16]!
+;   stp x26, x27, [sp, #-16]!
+;   stp x24, x25, [sp, #-16]!
+; block0:
+;   mov x25, x0
+;   mov x4, x1
+;   mov x26, x4
+;   1: ldaxr w27, [x25]; cmp w27, w26; csel w28, w27, w26, hi; stlxr w24, w28, [x25]; cbnz w24, 1b
+;   ldp x24, x25, [sp], #16
+;   ldp x26, x27, [sp], #16
+;   ldr x28, [sp], #16
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %atomic_rmw_umax_i16(i64, i16) {
 block0(v0: i64, v1: i16):
@@ -683,25 +579,21 @@ block0(v0: i64, v1: i16):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 13)
-;   Inst 0:   stp fp, lr, [sp, #-16]!
-;   Inst 1:   mov fp, sp
-;   Inst 2:   str x28, [sp, #-16]!
-;   Inst 3:   stp x26, x27, [sp, #-16]!
-;   Inst 4:   stp x24, x25, [sp, #-16]!
-;   Inst 5:   mov x25, x0
-;   Inst 6:   mov x26, x1
-;   Inst 7:   1: ldaxrh w27, [x25]; cmp w27, w26; csel w28, w27, w26, hi; stlxrh w24, w28, [x25]; cbnz w24, 1b
-;   Inst 8:   ldp x24, x25, [sp], #16
-;   Inst 9:   ldp x26, x27, [sp], #16
-;   Inst 10:   ldr x28, [sp], #16
-;   Inst 11:   ldp fp, lr, [sp], #16
-;   Inst 12:   ret
-; }}
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   str x28, [sp, #-16]!
+;   stp x26, x27, [sp, #-16]!
+;   stp x24, x25, [sp, #-16]!
+; block0:
+;   mov x25, x0
+;   mov x4, x1
+;   mov x26, x4
+;   1: ldaxrh w27, [x25]; cmp w27, w26; csel w28, w27, w26, hi; stlxrh w24, w28, [x25]; cbnz w24, 1b
+;   ldp x24, x25, [sp], #16
+;   ldp x26, x27, [sp], #16
+;   ldr x28, [sp], #16
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %atomic_rmw_umax_i8(i64, i8) {
 block0(v0: i64, v1: i8):
@@ -709,25 +601,21 @@ block0(v0: i64, v1: i8):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 13)
-;   Inst 0:   stp fp, lr, [sp, #-16]!
-;   Inst 1:   mov fp, sp
-;   Inst 2:   str x28, [sp, #-16]!
-;   Inst 3:   stp x26, x27, [sp, #-16]!
-;   Inst 4:   stp x24, x25, [sp, #-16]!
-;   Inst 5:   mov x25, x0
-;   Inst 6:   mov x26, x1
-;   Inst 7:   1: ldaxrb w27, [x25]; cmp w27, w26; csel w28, w27, w26, hi; stlxrb w24, w28, [x25]; cbnz w24, 1b
-;   Inst 8:   ldp x24, x25, [sp], #16
-;   Inst 9:   ldp x26, x27, [sp], #16
-;   Inst 10:   ldr x28, [sp], #16
-;   Inst 11:   ldp fp, lr, [sp], #16
-;   Inst 12:   ret
-; }}
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   str x28, [sp, #-16]!
+;   stp x26, x27, [sp, #-16]!
+;   stp x24, x25, [sp, #-16]!
+; block0:
+;   mov x25, x0
+;   mov x4, x1
+;   mov x26, x4
+;   1: ldaxrb w27, [x25]; cmp w27, w26; csel w28, w27, w26, hi; stlxrb w24, w28, [x25]; cbnz w24, 1b
+;   ldp x24, x25, [sp], #16
+;   ldp x26, x27, [sp], #16
+;   ldr x28, [sp], #16
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %atomic_rmw_smin_i64(i64, i64) {
 block0(v0: i64, v1: i64):
@@ -735,25 +623,21 @@ block0(v0: i64, v1: i64):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 13)
-;   Inst 0:   stp fp, lr, [sp, #-16]!
-;   Inst 1:   mov fp, sp
-;   Inst 2:   str x28, [sp, #-16]!
-;   Inst 3:   stp x26, x27, [sp, #-16]!
-;   Inst 4:   stp x24, x25, [sp, #-16]!
-;   Inst 5:   mov x25, x0
-;   Inst 6:   mov x26, x1
-;   Inst 7:   1: ldaxr x27, [x25]; cmp x27, x26; csel x28, x27, x26, lt; stlxr w24, x28, [x25]; cbnz w24, 1b
-;   Inst 8:   ldp x24, x25, [sp], #16
-;   Inst 9:   ldp x26, x27, [sp], #16
-;   Inst 10:   ldr x28, [sp], #16
-;   Inst 11:   ldp fp, lr, [sp], #16
-;   Inst 12:   ret
-; }}
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   str x28, [sp, #-16]!
+;   stp x26, x27, [sp, #-16]!
+;   stp x24, x25, [sp, #-16]!
+; block0:
+;   mov x25, x0
+;   mov x4, x1
+;   mov x26, x4
+;   1: ldaxr x27, [x25]; cmp x27, x26; csel x28, x27, x26, lt; stlxr w24, x28, [x25]; cbnz w24, 1b
+;   ldp x24, x25, [sp], #16
+;   ldp x26, x27, [sp], #16
+;   ldr x28, [sp], #16
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %atomic_rmw_smin_i32(i64, i32) {
 block0(v0: i64, v1: i32):
@@ -761,25 +645,21 @@ block0(v0: i64, v1: i32):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 13)
-;   Inst 0:   stp fp, lr, [sp, #-16]!
-;   Inst 1:   mov fp, sp
-;   Inst 2:   str x28, [sp, #-16]!
-;   Inst 3:   stp x26, x27, [sp, #-16]!
-;   Inst 4:   stp x24, x25, [sp, #-16]!
-;   Inst 5:   mov x25, x0
-;   Inst 6:   mov x26, x1
-;   Inst 7:   1: ldaxr w27, [x25]; cmp w27, w26; csel w28, w27, w26, lt; stlxr w24, w28, [x25]; cbnz w24, 1b
-;   Inst 8:   ldp x24, x25, [sp], #16
-;   Inst 9:   ldp x26, x27, [sp], #16
-;   Inst 10:   ldr x28, [sp], #16
-;   Inst 11:   ldp fp, lr, [sp], #16
-;   Inst 12:   ret
-; }}
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   str x28, [sp, #-16]!
+;   stp x26, x27, [sp, #-16]!
+;   stp x24, x25, [sp, #-16]!
+; block0:
+;   mov x25, x0
+;   mov x4, x1
+;   mov x26, x4
+;   1: ldaxr w27, [x25]; cmp w27, w26; csel w28, w27, w26, lt; stlxr w24, w28, [x25]; cbnz w24, 1b
+;   ldp x24, x25, [sp], #16
+;   ldp x26, x27, [sp], #16
+;   ldr x28, [sp], #16
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %atomic_rmw_smin_i16(i64, i16) {
 block0(v0: i64, v1: i16):
@@ -787,25 +667,21 @@ block0(v0: i64, v1: i16):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 13)
-;   Inst 0:   stp fp, lr, [sp, #-16]!
-;   Inst 1:   mov fp, sp
-;   Inst 2:   str x28, [sp, #-16]!
-;   Inst 3:   stp x26, x27, [sp, #-16]!
-;   Inst 4:   stp x24, x25, [sp, #-16]!
-;   Inst 5:   mov x25, x0
-;   Inst 6:   mov x26, x1
-;   Inst 7:   1: ldaxrh w27, [x25]; cmp w27, w26; csel w28, w27, w26, lt; stlxrh w24, w28, [x25]; cbnz w24, 1b
-;   Inst 8:   ldp x24, x25, [sp], #16
-;   Inst 9:   ldp x26, x27, [sp], #16
-;   Inst 10:   ldr x28, [sp], #16
-;   Inst 11:   ldp fp, lr, [sp], #16
-;   Inst 12:   ret
-; }}
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   str x28, [sp, #-16]!
+;   stp x26, x27, [sp, #-16]!
+;   stp x24, x25, [sp, #-16]!
+; block0:
+;   mov x25, x0
+;   mov x4, x1
+;   mov x26, x4
+;   1: ldaxrh w27, [x25]; cmp w27, w26; csel w28, w27, w26, lt; stlxrh w24, w28, [x25]; cbnz w24, 1b
+;   ldp x24, x25, [sp], #16
+;   ldp x26, x27, [sp], #16
+;   ldr x28, [sp], #16
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %atomic_rmw_smin_i8(i64, i8) {
 block0(v0: i64, v1: i8):
@@ -813,25 +689,21 @@ block0(v0: i64, v1: i8):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 13)
-;   Inst 0:   stp fp, lr, [sp, #-16]!
-;   Inst 1:   mov fp, sp
-;   Inst 2:   str x28, [sp, #-16]!
-;   Inst 3:   stp x26, x27, [sp, #-16]!
-;   Inst 4:   stp x24, x25, [sp, #-16]!
-;   Inst 5:   mov x25, x0
-;   Inst 6:   mov x26, x1
-;   Inst 7:   1: ldaxrb w27, [x25]; cmp w27, w26; csel w28, w27, w26, lt; stlxrb w24, w28, [x25]; cbnz w24, 1b
-;   Inst 8:   ldp x24, x25, [sp], #16
-;   Inst 9:   ldp x26, x27, [sp], #16
-;   Inst 10:   ldr x28, [sp], #16
-;   Inst 11:   ldp fp, lr, [sp], #16
-;   Inst 12:   ret
-; }}
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   str x28, [sp, #-16]!
+;   stp x26, x27, [sp, #-16]!
+;   stp x24, x25, [sp, #-16]!
+; block0:
+;   mov x25, x0
+;   mov x4, x1
+;   mov x26, x4
+;   1: ldaxrb w27, [x25]; cmp w27, w26; csel w28, w27, w26, lt; stlxrb w24, w28, [x25]; cbnz w24, 1b
+;   ldp x24, x25, [sp], #16
+;   ldp x26, x27, [sp], #16
+;   ldr x28, [sp], #16
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %atomic_rmw_umin_i64(i64, i64) {
 block0(v0: i64, v1: i64):
@@ -839,25 +711,21 @@ block0(v0: i64, v1: i64):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 13)
-;   Inst 0:   stp fp, lr, [sp, #-16]!
-;   Inst 1:   mov fp, sp
-;   Inst 2:   str x28, [sp, #-16]!
-;   Inst 3:   stp x26, x27, [sp, #-16]!
-;   Inst 4:   stp x24, x25, [sp, #-16]!
-;   Inst 5:   mov x25, x0
-;   Inst 6:   mov x26, x1
-;   Inst 7:   1: ldaxr x27, [x25]; cmp x27, x26; csel x28, x27, x26, lo; stlxr w24, x28, [x25]; cbnz w24, 1b
-;   Inst 8:   ldp x24, x25, [sp], #16
-;   Inst 9:   ldp x26, x27, [sp], #16
-;   Inst 10:   ldr x28, [sp], #16
-;   Inst 11:   ldp fp, lr, [sp], #16
-;   Inst 12:   ret
-; }}
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   str x28, [sp, #-16]!
+;   stp x26, x27, [sp, #-16]!
+;   stp x24, x25, [sp, #-16]!
+; block0:
+;   mov x25, x0
+;   mov x4, x1
+;   mov x26, x4
+;   1: ldaxr x27, [x25]; cmp x27, x26; csel x28, x27, x26, lo; stlxr w24, x28, [x25]; cbnz w24, 1b
+;   ldp x24, x25, [sp], #16
+;   ldp x26, x27, [sp], #16
+;   ldr x28, [sp], #16
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %atomic_rmw_umin_i32(i64, i32) {
 block0(v0: i64, v1: i32):
@@ -865,25 +733,21 @@ block0(v0: i64, v1: i32):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 13)
-;   Inst 0:   stp fp, lr, [sp, #-16]!
-;   Inst 1:   mov fp, sp
-;   Inst 2:   str x28, [sp, #-16]!
-;   Inst 3:   stp x26, x27, [sp, #-16]!
-;   Inst 4:   stp x24, x25, [sp, #-16]!
-;   Inst 5:   mov x25, x0
-;   Inst 6:   mov x26, x1
-;   Inst 7:   1: ldaxr w27, [x25]; cmp w27, w26; csel w28, w27, w26, lo; stlxr w24, w28, [x25]; cbnz w24, 1b
-;   Inst 8:   ldp x24, x25, [sp], #16
-;   Inst 9:   ldp x26, x27, [sp], #16
-;   Inst 10:   ldr x28, [sp], #16
-;   Inst 11:   ldp fp, lr, [sp], #16
-;   Inst 12:   ret
-; }}
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   str x28, [sp, #-16]!
+;   stp x26, x27, [sp, #-16]!
+;   stp x24, x25, [sp, #-16]!
+; block0:
+;   mov x25, x0
+;   mov x4, x1
+;   mov x26, x4
+;   1: ldaxr w27, [x25]; cmp w27, w26; csel w28, w27, w26, lo; stlxr w24, w28, [x25]; cbnz w24, 1b
+;   ldp x24, x25, [sp], #16
+;   ldp x26, x27, [sp], #16
+;   ldr x28, [sp], #16
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %atomic_rmw_umin_i16(i64, i16) {
 block0(v0: i64, v1: i16):
@@ -891,25 +755,21 @@ block0(v0: i64, v1: i16):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 13)
-;   Inst 0:   stp fp, lr, [sp, #-16]!
-;   Inst 1:   mov fp, sp
-;   Inst 2:   str x28, [sp, #-16]!
-;   Inst 3:   stp x26, x27, [sp, #-16]!
-;   Inst 4:   stp x24, x25, [sp, #-16]!
-;   Inst 5:   mov x25, x0
-;   Inst 6:   mov x26, x1
-;   Inst 7:   1: ldaxrh w27, [x25]; cmp w27, w26; csel w28, w27, w26, lo; stlxrh w24, w28, [x25]; cbnz w24, 1b
-;   Inst 8:   ldp x24, x25, [sp], #16
-;   Inst 9:   ldp x26, x27, [sp], #16
-;   Inst 10:   ldr x28, [sp], #16
-;   Inst 11:   ldp fp, lr, [sp], #16
-;   Inst 12:   ret
-; }}
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   str x28, [sp, #-16]!
+;   stp x26, x27, [sp, #-16]!
+;   stp x24, x25, [sp, #-16]!
+; block0:
+;   mov x25, x0
+;   mov x4, x1
+;   mov x26, x4
+;   1: ldaxrh w27, [x25]; cmp w27, w26; csel w28, w27, w26, lo; stlxrh w24, w28, [x25]; cbnz w24, 1b
+;   ldp x24, x25, [sp], #16
+;   ldp x26, x27, [sp], #16
+;   ldr x28, [sp], #16
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %atomic_rmw_umin_i8(i64, i8) {
 block0(v0: i64, v1: i8):
@@ -917,23 +777,19 @@ block0(v0: i64, v1: i8):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 13)
-;   Inst 0:   stp fp, lr, [sp, #-16]!
-;   Inst 1:   mov fp, sp
-;   Inst 2:   str x28, [sp, #-16]!
-;   Inst 3:   stp x26, x27, [sp, #-16]!
-;   Inst 4:   stp x24, x25, [sp, #-16]!
-;   Inst 5:   mov x25, x0
-;   Inst 6:   mov x26, x1
-;   Inst 7:   1: ldaxrb w27, [x25]; cmp w27, w26; csel w28, w27, w26, lo; stlxrb w24, w28, [x25]; cbnz w24, 1b
-;   Inst 8:   ldp x24, x25, [sp], #16
-;   Inst 9:   ldp x26, x27, [sp], #16
-;   Inst 10:   ldr x28, [sp], #16
-;   Inst 11:   ldp fp, lr, [sp], #16
-;   Inst 12:   ret
-; }}
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   str x28, [sp, #-16]!
+;   stp x26, x27, [sp, #-16]!
+;   stp x24, x25, [sp, #-16]!
+; block0:
+;   mov x25, x0
+;   mov x4, x1
+;   mov x26, x4
+;   1: ldaxrb w27, [x25]; cmp w27, w26; csel w28, w27, w26, lo; stlxrb w24, w28, [x25]; cbnz w24, 1b
+;   ldp x24, x25, [sp], #16
+;   ldp x26, x27, [sp], #16
+;   ldr x28, [sp], #16
+;   ldp fp, lr, [sp], #16
+;   ret
 
diff --git a/cranelift/filetests/filetests/isa/aarch64/atomic_load.clif b/cranelift/filetests/filetests/isa/aarch64/atomic_load.clif
index a72474a42c..9d5ff8e132 100644
--- a/cranelift/filetests/filetests/isa/aarch64/atomic_load.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/atomic_load.clif
@@ -7,14 +7,9 @@ block0(v0: i64):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   ldar x0, [x0]
-;   Inst 1:   ret
-; }}
+; block0:
+;   ldar x0, [x0]
+;   ret
 
 function %atomic_load_i32(i64) -> i32 {
 block0(v0: i64):
@@ -22,14 +17,9 @@ block0(v0: i64):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   ldar w0, [x0]
-;   Inst 1:   ret
-; }}
+; block0:
+;   ldar w0, [x0]
+;   ret
 
 function %atomic_load_i16(i64) -> i16 {
 block0(v0: i64):
@@ -37,14 +27,9 @@ block0(v0: i64):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   ldarh w0, [x0]
-;   Inst 1:   ret
-; }}
+; block0:
+;   ldarh w0, [x0]
+;   ret
 
 function %atomic_load_i8(i64) -> i8 {
 block0(v0: i64):
@@ -52,14 +37,9 @@ block0(v0: i64):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   ldarb w0, [x0]
-;   Inst 1:   ret
-; }}
+; block0:
+;   ldarb w0, [x0]
+;   ret
 
 function %atomic_load_i32_i64(i64) -> i64 {
 block0(v0: i64):
@@ -68,14 +48,9 @@ block0(v0: i64):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   ldar w0, [x0]
-;   Inst 1:   ret
-; }}
+; block0:
+;   ldar w0, [x0]
+;   ret
 
 function %atomic_load_i16_i64(i64) -> i64 {
 block0(v0: i64):
@@ -84,14 +59,9 @@ block0(v0: i64):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   ldarh w0, [x0]
-;   Inst 1:   ret
-; }}
+; block0:
+;   ldarh w0, [x0]
+;   ret
 
 function %atomic_load_i8_i64(i64) -> i64 {
 block0(v0: i64):
@@ -100,14 +70,9 @@ block0(v0: i64):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   ldarb w0, [x0]
-;   Inst 1:   ret
-; }}
+; block0:
+;   ldarb w0, [x0]
+;   ret
 
 function %atomic_load_i16_i32(i64) -> i32 {
 block0(v0: i64):
@@ -116,14 +81,9 @@ block0(v0: i64):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   ldarh w0, [x0]
-;   Inst 1:   ret
-; }}
+; block0:
+;   ldarh w0, [x0]
+;   ret
 
 function %atomic_load_i8_i32(i64) -> i32 {
 block0(v0: i64):
@@ -132,12 +92,7 @@ block0(v0: i64):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   ldarb w0, [x0]
-;   Inst 1:   ret
-; }}
+; block0:
+;   ldarb w0, [x0]
+;   ret
 
diff --git a/cranelift/filetests/filetests/isa/aarch64/atomic_store.clif b/cranelift/filetests/filetests/isa/aarch64/atomic_store.clif
index 17b982a59b..63bea58d84 100644
--- a/cranelift/filetests/filetests/isa/aarch64/atomic_store.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/atomic_store.clif
@@ -7,14 +7,9 @@ block0(v0: i64, v1: i64):
   return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   stlr x0, [x1]
-;   Inst 1:   ret
-; }}
+; block0:
+;   stlr x0, [x1]
+;   ret
 
 function %atomic_store_i32(i32, i64) {
 block0(v0: i32, v1: i64):
@@ -22,14 +17,9 @@ block0(v0: i32, v1: i64):
   return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   stlr w0, [x1]
-;   Inst 1:   ret
-; }}
+; block0:
+;   stlr w0, [x1]
+;   ret
 
 function %atomic_store_i16(i16, i64) {
 block0(v0: i16, v1: i64):
@@ -37,14 +27,9 @@ block0(v0: i16, v1: i64):
   return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   stlrh w0, [x1]
-;   Inst 1:   ret
-; }}
+; block0:
+;   stlrh w0, [x1]
+;   ret
 
 function %atomic_store_i8(i8, i64) {
 block0(v0: i8, v1: i64):
@@ -52,14 +37,9 @@ block0(v0: i8, v1: i64):
   return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   stlrb w0, [x1]
-;   Inst 1:   ret
-; }}
+; block0:
+;   stlrb w0, [x1]
+;   ret
 
 function %atomic_store_i64_i32(i64, i64) {
 block0(v0: i64, v1: i64):
@@ -68,14 +48,9 @@ block0(v0: i64, v1: i64):
   return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   stlr w0, [x1]
-;   Inst 1:   ret
-; }}
+; block0:
+;   stlr w0, [x1]
+;   ret
 
 function %atomic_store_i64_i16(i64, i64) {
 block0(v0: i64, v1: i64):
@@ -84,14 +59,9 @@ block0(v0: i64, v1: i64):
   return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   stlrh w0, [x1]
-;   Inst 1:   ret
-; }}
+; block0:
+;   stlrh w0, [x1]
+;   ret
 
 function %atomic_store_i64_i8(i64, i64) {
 block0(v0: i64, v1: i64):
@@ -100,14 +70,9 @@ block0(v0: i64, v1: i64):
   return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   stlrb w0, [x1]
-;   Inst 1:   ret
-; }}
+; block0:
+;   stlrb w0, [x1]
+;   ret
 
 function %atomic_store_i32_i16(i32, i64) {
 block0(v0: i32, v1: i64):
@@ -116,14 +81,9 @@ block0(v0: i32, v1: i64):
   return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   stlrh w0, [x1]
-;   Inst 1:   ret
-; }}
+; block0:
+;   stlrh w0, [x1]
+;   ret
 
 function %atomic_store_i32_i8(i32, i64) {
 block0(v0: i32, v1: i64):
@@ -132,12 +92,7 @@ block0(v0: i32, v1: i64):
   return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   stlrb w0, [x1]
-;   Inst 1:   ret
-; }}
+; block0:
+;   stlrb w0, [x1]
+;   ret
 
diff --git a/cranelift/filetests/filetests/isa/aarch64/basic1.clif b/cranelift/filetests/filetests/isa/aarch64/basic1.clif
index 4a0d0ae23b..a6caf19f9c 100644
--- a/cranelift/filetests/filetests/isa/aarch64/basic1.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/basic1.clif
@@ -8,12 +8,7 @@ block0(v0: i32, v1: i32):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   add w0, w0, w1
-;   Inst 1:   ret
-; }}
+; block0:
+;   add w0, w0, w1
+;   ret
 
diff --git a/cranelift/filetests/filetests/isa/aarch64/bitops.clif b/cranelift/filetests/filetests/isa/aarch64/bitops.clif
index 69a1a189fb..ef24f89690 100644
--- a/cranelift/filetests/filetests/isa/aarch64/bitops.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/bitops.clif
@@ -8,15 +8,10 @@ block0(v0: i8):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   rbit w0, w0
-;   Inst 1:   lsr w0, w0, #24
-;   Inst 2:   ret
-; }}
+; block0:
+;   rbit w3, w0
+;   lsr w0, w3, #24
+;   ret
 
 function %a(i16) -> i16 {
 block0(v0: i16):
@@ -24,15 +19,10 @@ block0(v0: i16):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   rbit w0, w0
-;   Inst 1:   lsr w0, w0, #16
-;   Inst 2:   ret
-; }}
+; block0:
+;   rbit w3, w0
+;   lsr w0, w3, #16
+;   ret
 
 function %a(i32) -> i32 {
 block0(v0: i32):
@@ -40,14 +30,9 @@ block0(v0: i32):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   rbit w0, w0
-;   Inst 1:   ret
-; }}
+; block0:
+;   rbit w0, w0
+;   ret
 
 function %a(i64) -> i64 {
 block0(v0: i64):
@@ -55,14 +40,9 @@ block0(v0: i64):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   rbit x0, x0
-;   Inst 1:   ret
-; }}
+; block0:
+;   rbit x0, x0
+;   ret
 
 function %a(i128) -> i128 {
 block0(v0: i128):
@@ -70,16 +50,11 @@ block0(v0: i128):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 4)
-;   Inst 0:   rbit x2, x0
-;   Inst 1:   rbit x0, x1
-;   Inst 2:   mov x1, x2
-;   Inst 3:   ret
-; }}
+; block0:
+;   rbit x6, x0
+;   rbit x0, x1
+;   mov x1, x6
+;   ret
 
 function %b(i8) -> i8 {
 block0(v0: i8):
@@ -87,16 +62,11 @@ block0(v0: i8):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 4)
-;   Inst 0:   uxtb w0, w0
-;   Inst 1:   clz w0, w0
-;   Inst 2:   sub w0, w0, #24
-;   Inst 3:   ret
-; }}
+; block0:
+;   uxtb w3, w0
+;   clz w5, w3
+;   sub w0, w5, #24
+;   ret
 
 function %b(i16) -> i16 {
 block0(v0: i16):
@@ -104,16 +74,11 @@ block0(v0: i16):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 4)
-;   Inst 0:   uxth w0, w0
-;   Inst 1:   clz w0, w0
-;   Inst 2:   sub w0, w0, #16
-;   Inst 3:   ret
-; }}
+; block0:
+;   uxth w3, w0
+;   clz w5, w3
+;   sub w0, w5, #16
+;   ret
 
 function %b(i32) -> i32 {
 block0(v0: i32):
@@ -121,14 +86,9 @@ block0(v0: i32):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   clz w0, w0
-;   Inst 1:   ret
-; }}
+; block0:
+;   clz w0, w0
+;   ret
 
 function %b(i64) -> i64 {
 block0(v0: i64):
@@ -136,14 +96,9 @@ block0(v0: i64):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   clz x0, x0
-;   Inst 1:   ret
-; }}
+; block0:
+;   clz x0, x0
+;   ret
 
 function %b(i128) -> i128 {
 block0(v0: i128):
@@ -151,18 +106,13 @@ block0(v0: i128):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 6)
-;   Inst 0:   clz x1, x1
-;   Inst 1:   clz x0, x0
-;   Inst 2:   lsr x2, x1, #6
-;   Inst 3:   madd x0, x0, x2, x1
-;   Inst 4:   movz x1, #0
-;   Inst 5:   ret
-; }}
+; block0:
+;   clz x6, x1
+;   clz x8, x0
+;   lsr x10, x6, #6
+;   madd x0, x8, x10, x6
+;   movz x1, #0
+;   ret
 
 function %c(i8) -> i8 {
 block0(v0: i8):
@@ -170,16 +120,11 @@ block0(v0: i8):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 4)
-;   Inst 0:   uxtb w0, w0
-;   Inst 1:   cls w0, w0
-;   Inst 2:   sub w0, w0, #24
-;   Inst 3:   ret
-; }}
+; block0:
+;   uxtb w3, w0
+;   cls w5, w3
+;   sub w0, w5, #24
+;   ret
 
 function %c(i16) -> i16 {
 block0(v0: i16):
@@ -187,16 +132,11 @@ block0(v0: i16):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 4)
-;   Inst 0:   uxth w0, w0
-;   Inst 1:   cls w0, w0
-;   Inst 2:   sub w0, w0, #16
-;   Inst 3:   ret
-; }}
+; block0:
+;   uxth w3, w0
+;   cls w5, w3
+;   sub w0, w5, #16
+;   ret
 
 function %c(i32) -> i32 {
 block0(v0: i32):
@@ -204,14 +144,9 @@ block0(v0: i32):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   cls w0, w0
-;   Inst 1:   ret
-; }}
+; block0:
+;   cls w0, w0
+;   ret
 
 function %c(i64) -> i64 {
 block0(v0: i64):
@@ -219,14 +154,9 @@ block0(v0: i64):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   cls x0, x0
-;   Inst 1:   ret
-; }}
+; block0:
+;   cls x0, x0
+;   ret
 
 function %c(i128) -> i128 {
 block0(v0: i128):
@@ -234,22 +164,17 @@ block0(v0: i128):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 10)
-;   Inst 0:   cls x2, x0
-;   Inst 1:   cls x3, x1
-;   Inst 2:   eon x0, x1, x0
-;   Inst 3:   lsr x0, x0, #63
-;   Inst 4:   madd x0, x2, x0, x0
-;   Inst 5:   subs xzr, x3, #63
-;   Inst 6:   csel x0, x0, xzr, eq
-;   Inst 7:   add x0, x0, x3
-;   Inst 8:   movz x1, #0
-;   Inst 9:   ret
-; }}
+; block0:
+;   cls x6, x0
+;   cls x8, x1
+;   eon x10, x1, x0
+;   lsr x12, x10, #63
+;   madd x14, x6, x12, x12
+;   subs xzr, x8, #63
+;   csel x1, x14, xzr, eq
+;   add x0, x1, x8
+;   movz x1, #0
+;   ret
 
 function %d(i8) -> i8 {
 block0(v0: i8):
@@ -257,16 +182,11 @@ block0(v0: i8):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 4)
-;   Inst 0:   rbit w0, w0
-;   Inst 1:   orr w0, w0, #8388608
-;   Inst 2:   clz w0, w0
-;   Inst 3:   ret
-; }}
+; block0:
+;   rbit w3, w0
+;   orr w5, w3, #8388608
+;   clz w0, w5
+;   ret
 
 function %d(i16) -> i16 {
 block0(v0: i16):
@@ -274,16 +194,11 @@ block0(v0: i16):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 4)
-;   Inst 0:   rbit w0, w0
-;   Inst 1:   orr w0, w0, #32768
-;   Inst 2:   clz w0, w0
-;   Inst 3:   ret
-; }}
+; block0:
+;   rbit w3, w0
+;   orr w5, w3, #32768
+;   clz w0, w5
+;   ret
 
 function %d(i32) -> i32 {
 block0(v0: i32):
@@ -291,15 +206,10 @@ block0(v0: i32):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   rbit w0, w0
-;   Inst 1:   clz w0, w0
-;   Inst 2:   ret
-; }}
+; block0:
+;   rbit w3, w0
+;   clz w0, w3
+;   ret
 
 function %d(i64) -> i64 {
 block0(v0: i64):
@@ -307,15 +217,10 @@ block0(v0: i64):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   rbit x0, x0
-;   Inst 1:   clz x0, x0
-;   Inst 2:   ret
-; }}
+; block0:
+;   rbit x3, x0
+;   clz x0, x3
+;   ret
 
 function %d(i128) -> i128 {
 block0(v0: i128):
@@ -323,20 +228,15 @@ block0(v0: i128):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 8)
-;   Inst 0:   rbit x0, x0
-;   Inst 1:   rbit x1, x1
-;   Inst 2:   clz x0, x0
-;   Inst 3:   clz x1, x1
-;   Inst 4:   lsr x2, x0, #6
-;   Inst 5:   madd x0, x1, x2, x0
-;   Inst 6:   movz x1, #0
-;   Inst 7:   ret
-; }}
+; block0:
+;   rbit x6, x0
+;   rbit x8, x1
+;   clz x10, x6
+;   clz x12, x8
+;   lsr x14, x10, #6
+;   madd x0, x12, x14, x10
+;   movz x1, #0
+;   ret
 
 function %d(i128) -> i128 {
 block0(v0: i128):
@@ -344,19 +244,19 @@ block0(v0: i128):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 7)
-;   Inst 0:   fmov d0, x0
-;   Inst 1:   mov v0.d[1], x1
-;   Inst 2:   cnt v0.16b, v0.16b
-;   Inst 3:   addv b0, v0.16b
-;   Inst 4:   umov w0, v0.b[0]
-;   Inst 5:   movz x1, #0
-;   Inst 6:   ret
-; }}
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   stp d11, d13, [sp, #-16]!
+; block0:
+;   fmov d6, x0
+;   mov v6.d[1], x1
+;   cnt v11.16b, v6.16b
+;   addv b13, v11.16b
+;   umov w0, v13.b[0]
+;   movz x1, #0
+;   ldp d11, d13, [sp], #16
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %d(i64) -> i64 {
 block0(v0: i64):
@@ -364,17 +264,12 @@ block0(v0: i64):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 5)
-;   Inst 0:   fmov d0, x0
-;   Inst 1:   cnt v0.8b, v0.8b
-;   Inst 2:   addv b0, v0.8b
-;   Inst 3:   umov w0, v0.b[0]
-;   Inst 4:   ret
-; }}
+; block0:
+;   fmov d3, x0
+;   cnt v5.8b, v3.8b
+;   addv b7, v5.8b
+;   umov w0, v7.b[0]
+;   ret
 
 function %d(i32) -> i32 {
 block0(v0: i32):
@@ -382,17 +277,12 @@ block0(v0: i32):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 5)
-;   Inst 0:   fmov s0, w0
-;   Inst 1:   cnt v0.8b, v0.8b
-;   Inst 2:   addv b0, v0.8b
-;   Inst 3:   umov w0, v0.b[0]
-;   Inst 4:   ret
-; }}
+; block0:
+;   fmov s3, w0
+;   cnt v5.8b, v3.8b
+;   addv b7, v5.8b
+;   umov w0, v7.b[0]
+;   ret
 
 function %d(i16) -> i16 {
 block0(v0: i16):
@@ -400,17 +290,12 @@ block0(v0: i16):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 5)
-;   Inst 0:   fmov s0, w0
-;   Inst 1:   cnt v0.8b, v0.8b
-;   Inst 2:   addp v0.8b, v0.8b, v0.8b
-;   Inst 3:   umov w0, v0.b[0]
-;   Inst 4:   ret
-; }}
+; block0:
+;   fmov s3, w0
+;   cnt v5.8b, v3.8b
+;   addp v7.8b, v5.8b, v5.8b
+;   umov w0, v7.b[0]
+;   ret
 
 function %d(i8) -> i8 {
 block0(v0: i8):
@@ -418,16 +303,11 @@ block0(v0: i8):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 4)
-;   Inst 0:   fmov s0, w0
-;   Inst 1:   cnt v0.8b, v0.8b
-;   Inst 2:   umov w0, v0.b[0]
-;   Inst 3:   ret
-; }}
+; block0:
+;   fmov s3, w0
+;   cnt v5.8b, v3.8b
+;   umov w0, v5.b[0]
+;   ret
 
 function %bextend_b8() -> b32 {
 block0:
@@ -436,15 +316,10 @@ block0:
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   movz x0, #255
-;   Inst 1:   sxtb w0, w0
-;   Inst 2:   ret
-; }}
+; block0:
+;   movz x2, #255
+;   sxtb w0, w2
+;   ret
 
 function %bextend_b1() -> b32 {
 block0:
@@ -453,15 +328,10 @@ block0:
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   movz x0, #1
-;   Inst 1:   sbfx w0, w0, #0, #1
-;   Inst 2:   ret
-; }}
+; block0:
+;   movz x2, #1
+;   sbfx w0, w2, #0, #1
+;   ret
 
 function %bnot_i32(i32) -> i32 {
 block0(v0: i32):
@@ -469,14 +339,9 @@ block0(v0: i32):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   orn w0, wzr, w0
-;   Inst 1:   ret
-; }}
+; block0:
+;   orn w0, wzr, w0
+;   ret
 
 function %bnot_i64(i64) -> i64 {
 block0(v0: i64):
@@ -484,14 +349,9 @@ block0(v0: i64):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   orn x0, xzr, x0
-;   Inst 1:   ret
-; }}
+; block0:
+;   orn x0, xzr, x0
+;   ret
 
 function %bnot_i64_with_shift(i64) -> i64 {
 block0(v0: i64):
@@ -501,14 +361,9 @@ block0(v0: i64):
     return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   orn x0, xzr, x0, LSL 3
-;   Inst 1:   ret
-; }}
+; block0:
+;   orn x0, xzr, x0, LSL 3
+;   ret
 
 function %bnot_i128(i128) -> i128 {
 block0(v0: i128):
@@ -516,15 +371,10 @@ block0(v0: i128):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   orn x0, xzr, x0
-;   Inst 1:   orn x1, xzr, x1
-;   Inst 2:   ret
-; }}
+; block0:
+;   orn x0, xzr, x0
+;   orn x1, xzr, x1
+;   ret
 
 function %bnot_i8x16(i8x16) -> i8x16 {
 block0(v0: i8x16):
@@ -532,14 +382,9 @@ block0(v0: i8x16):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   mvn v0.16b, v0.16b
-;   Inst 1:   ret
-; }}
+; block0:
+;   mvn v0.16b, v0.16b
+;   ret
 
 function %band_i32(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
@@ -547,14 +392,9 @@ block0(v0: i32, v1: i32):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   and w0, w0, w1
-;   Inst 1:   ret
-; }}
+; block0:
+;   and w0, w0, w1
+;   ret
 
 function %band_i64(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -562,14 +402,9 @@ block0(v0: i64, v1: i64):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   and x0, x0, x1
-;   Inst 1:   ret
-; }}
+; block0:
+;   and x0, x0, x1
+;   ret
 
 function %band_i128(i128, i128) -> i128 {
 block0(v0: i128, v1: i128):
@@ -577,15 +412,10 @@ block0(v0: i128, v1: i128):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   and x0, x0, x2
-;   Inst 1:   and x1, x1, x3
-;   Inst 2:   ret
-; }}
+; block0:
+;   and x0, x0, x2
+;   and x1, x1, x3
+;   ret
 
 function %band_i8x16(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
@@ -593,14 +423,9 @@ block0(v0: i8x16, v1: i8x16):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   and v0.16b, v0.16b, v1.16b
-;   Inst 1:   ret
-; }}
+; block0:
+;   and v0.16b, v0.16b, v1.16b
+;   ret
 
 function %band_i64_constant(i64) -> i64 {
 block0(v0: i64):
@@ -609,14 +434,9 @@ block0(v0: i64):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   and x0, x0, #3
-;   Inst 1:   ret
-; }}
+; block0:
+;   and x0, x0, #3
+;   ret
 
 function %band_i64_constant2(i64) -> i64 {
 block0(v0: i64):
@@ -625,14 +445,9 @@ block0(v0: i64):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   and x0, x0, #3
-;   Inst 1:   ret
-; }}
+; block0:
+;   and x0, x0, #3
+;   ret
 
 function %band_i64_constant_shift(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -642,14 +457,9 @@ block0(v0: i64, v1: i64):
     return v4
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   and x0, x0, x1, LSL 3
-;   Inst 1:   ret
-; }}
+; block0:
+;   and x0, x0, x1, LSL 3
+;   ret
 
 function %band_i64_constant_shift2(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -659,14 +469,9 @@ block0(v0: i64, v1: i64):
     return v4
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   and x0, x0, x1, LSL 3
-;   Inst 1:   ret
-; }}
+; block0:
+;   and x0, x0, x1, LSL 3
+;   ret
 
 function %bor_i32(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
@@ -674,14 +479,9 @@ block0(v0: i32, v1: i32):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   orr w0, w0, w1
-;   Inst 1:   ret
-; }}
+; block0:
+;   orr w0, w0, w1
+;   ret
 
 function %bor_i64(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -689,14 +489,9 @@ block0(v0: i64, v1: i64):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   orr x0, x0, x1
-;   Inst 1:   ret
-; }}
+; block0:
+;   orr x0, x0, x1
+;   ret
 
 function %bor_i128(i128, i128) -> i128 {
 block0(v0: i128, v1: i128):
@@ -704,15 +499,10 @@ block0(v0: i128, v1: i128):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   orr x0, x0, x2
-;   Inst 1:   orr x1, x1, x3
-;   Inst 2:   ret
-; }}
+; block0:
+;   orr x0, x0, x2
+;   orr x1, x1, x3
+;   ret
 
 function %bor_i8x16(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
@@ -720,14 +510,9 @@ block0(v0: i8x16, v1: i8x16):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   orr v0.16b, v0.16b, v1.16b
-;   Inst 1:   ret
-; }}
+; block0:
+;   orr v0.16b, v0.16b, v1.16b
+;   ret
 
 function %bor_i64_constant(i64) -> i64 {
 block0(v0: i64):
@@ -736,14 +521,9 @@ block0(v0: i64):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   orr x0, x0, #3
-;   Inst 1:   ret
-; }}
+; block0:
+;   orr x0, x0, #3
+;   ret
 
 function %bor_i64_constant2(i64) -> i64 {
 block0(v0: i64):
@@ -752,14 +532,9 @@ block0(v0: i64):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   orr x0, x0, #3
-;   Inst 1:   ret
-; }}
+; block0:
+;   orr x0, x0, #3
+;   ret
 
 function %bor_i64_constant_shift(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -769,14 +544,9 @@ block0(v0: i64, v1: i64):
     return v4
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   orr x0, x0, x1, LSL 3
-;   Inst 1:   ret
-; }}
+; block0:
+;   orr x0, x0, x1, LSL 3
+;   ret
 
 function %bor_i64_constant_shift2(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -786,14 +556,9 @@ block0(v0: i64, v1: i64):
     return v4
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   orr x0, x0, x1, LSL 3
-;   Inst 1:   ret
-; }}
+; block0:
+;   orr x0, x0, x1, LSL 3
+;   ret
 
 function %bxor_i32(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
@@ -801,14 +566,9 @@ block0(v0: i32, v1: i32):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   eor w0, w0, w1
-;   Inst 1:   ret
-; }}
+; block0:
+;   eor w0, w0, w1
+;   ret
 
 function %bxor_i64(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -816,14 +576,9 @@ block0(v0: i64, v1: i64):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   eor x0, x0, x1
-;   Inst 1:   ret
-; }}
+; block0:
+;   eor x0, x0, x1
+;   ret
 
 function %bxor_i128(i128, i128) -> i128 {
 block0(v0: i128, v1: i128):
@@ -831,15 +586,10 @@ block0(v0: i128, v1: i128):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   eor x0, x0, x2
-;   Inst 1:   eor x1, x1, x3
-;   Inst 2:   ret
-; }}
+; block0:
+;   eor x0, x0, x2
+;   eor x1, x1, x3
+;   ret
 
 function %bxor_i8x16(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
@@ -847,14 +597,9 @@ block0(v0: i8x16, v1: i8x16):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   eor v0.16b, v0.16b, v1.16b
-;   Inst 1:   ret
-; }}
+; block0:
+;   eor v0.16b, v0.16b, v1.16b
+;   ret
 
 function %bxor_i64_constant(i64) -> i64 {
 block0(v0: i64):
@@ -863,14 +608,9 @@ block0(v0: i64):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   eor x0, x0, #3
-;   Inst 1:   ret
-; }}
+; block0:
+;   eor x0, x0, #3
+;   ret
 
 function %bxor_i64_constant2(i64) -> i64 {
 block0(v0: i64):
@@ -879,14 +619,9 @@ block0(v0: i64):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   eor x0, x0, #3
-;   Inst 1:   ret
-; }}
+; block0:
+;   eor x0, x0, #3
+;   ret
 
 function %bxor_i64_constant_shift(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -896,14 +631,9 @@ block0(v0: i64, v1: i64):
     return v4
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   eor x0, x0, x1, LSL 3
-;   Inst 1:   ret
-; }}
+; block0:
+;   eor x0, x0, x1, LSL 3
+;   ret
 
 function %bxor_i64_constant_shift2(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -913,14 +643,9 @@ block0(v0: i64, v1: i64):
     return v4
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   eor x0, x0, x1, LSL 3
-;   Inst 1:   ret
-; }}
+; block0:
+;   eor x0, x0, x1, LSL 3
+;   ret
 
 function %band_not_i32(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
@@ -928,14 +653,9 @@ block0(v0: i32, v1: i32):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   bic w0, w0, w1
-;   Inst 1:   ret
-; }}
+; block0:
+;   bic w0, w0, w1
+;   ret
 
 function %band_not_i64(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -943,14 +663,9 @@ block0(v0: i64, v1: i64):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   bic x0, x0, x1
-;   Inst 1:   ret
-; }}
+; block0:
+;   bic x0, x0, x1
+;   ret
 
 function %band_not_i128(i128, i128) -> i128 {
 block0(v0: i128, v1: i128):
@@ -958,15 +673,10 @@ block0(v0: i128, v1: i128):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   bic x0, x0, x2
-;   Inst 1:   bic x1, x1, x3
-;   Inst 2:   ret
-; }}
+; block0:
+;   bic x0, x0, x2
+;   bic x1, x1, x3
+;   ret
 
 function %band_not_i8x16(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
@@ -974,14 +684,9 @@ block0(v0: i8x16, v1: i8x16):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   bic v0.16b, v0.16b, v1.16b
-;   Inst 1:   ret
-; }}
+; block0:
+;   bic v0.16b, v0.16b, v1.16b
+;   ret
 
 function %band_not_i64_constant(i64) -> i64 {
 block0(v0: i64):
@@ -990,14 +695,9 @@ block0(v0: i64):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   bic x0, x0, #4
-;   Inst 1:   ret
-; }}
+; block0:
+;   bic x0, x0, #4
+;   ret
 
 function %band_not_i64_constant_shift(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -1007,14 +707,9 @@ block0(v0: i64, v1: i64):
     return v4
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   bic x0, x0, x1, LSL 4
-;   Inst 1:   ret
-; }}
+; block0:
+;   bic x0, x0, x1, LSL 4
+;   ret
 
 function %bor_not_i32(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
@@ -1022,14 +717,9 @@ block0(v0: i32, v1: i32):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   orn w0, w0, w1
-;   Inst 1:   ret
-; }}
+; block0:
+;   orn w0, w0, w1
+;   ret
 
 function %bor_not_i64(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -1037,14 +727,9 @@ block0(v0: i64, v1: i64):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   orn x0, x0, x1
-;   Inst 1:   ret
-; }}
+; block0:
+;   orn x0, x0, x1
+;   ret
 
 function %bor_not_i128(i128, i128) -> i128 {
 block0(v0: i128, v1: i128):
@@ -1052,15 +737,10 @@ block0(v0: i128, v1: i128):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   orn x0, x0, x2
-;   Inst 1:   orn x1, x1, x3
-;   Inst 2:   ret
-; }}
+; block0:
+;   orn x0, x0, x2
+;   orn x1, x1, x3
+;   ret
 
 function %bor_not_i64_constant(i64) -> i64 {
 block0(v0: i64):
@@ -1069,14 +749,9 @@ block0(v0: i64):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   orn x0, x0, #4
-;   Inst 1:   ret
-; }}
+; block0:
+;   orn x0, x0, #4
+;   ret
 
 function %bor_not_i64_constant_shift(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -1086,14 +761,9 @@ block0(v0: i64, v1: i64):
     return v4
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   orn x0, x0, x1, LSL 4
-;   Inst 1:   ret
-; }}
+; block0:
+;   orn x0, x0, x1, LSL 4
+;   ret
 
 function %bxor_not_i32(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
@@ -1101,14 +771,9 @@ block0(v0: i32, v1: i32):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   eon w0, w0, w1
-;   Inst 1:   ret
-; }}
+; block0:
+;   eon w0, w0, w1
+;   ret
 
 function %bxor_not_i64(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -1116,14 +781,9 @@ block0(v0: i64, v1: i64):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   eon x0, x0, x1
-;   Inst 1:   ret
-; }}
+; block0:
+;   eon x0, x0, x1
+;   ret
 
 function %bxor_not_i128(i128, i128) -> i128 {
 block0(v0: i128, v1: i128):
@@ -1131,15 +791,10 @@ block0(v0: i128, v1: i128):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   eon x0, x0, x2
-;   Inst 1:   eon x1, x1, x3
-;   Inst 2:   ret
-; }}
+; block0:
+;   eon x0, x0, x2
+;   eon x1, x1, x3
+;   ret
 
 function %bxor_not_i64_constant(i64) -> i64 {
 block0(v0: i64):
@@ -1148,14 +803,9 @@ block0(v0: i64):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   eon x0, x0, #4
-;   Inst 1:   ret
-; }}
+; block0:
+;   eon x0, x0, #4
+;   ret
 
 function %bxor_not_i64_constant_shift(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -1165,14 +815,9 @@ block0(v0: i64, v1: i64):
     return v4
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   eon x0, x0, x1, LSL 4
-;   Inst 1:   ret
-; }}
+; block0:
+;   eon x0, x0, x1, LSL 4
+;   ret
 
 function %ishl_i128_i8(i128, i8) -> i128 {
 block0(v0: i128, v1: i8):
@@ -1180,25 +825,17 @@ block0(v0: i128, v1: i8):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 13)
-;   Inst 0:   lsl x4, x0, x2
-;   Inst 1:   lsl x3, x1, x2
-;   Inst 2:   orn w1, wzr, w2
-;   Inst 3:   lsr x0, x0, #1
-;   Inst 4:   lsr x0, x0, x1
-;   Inst 5:   orr x0, x3, x0
-;   Inst 6:   ands xzr, x2, #64
-;   Inst 7:   csel x1, xzr, x4, ne
-;   Inst 8:   csel x0, x4, x0, ne
-;   Inst 9:   mov x2, x0
-;   Inst 10:   mov x0, x1
-;   Inst 11:   mov x1, x2
-;   Inst 12:   ret
-; }}
+; block0:
+;   lsl x8, x0, x2
+;   lsl x10, x1, x2
+;   orn w12, wzr, w2
+;   lsr x14, x0, #1
+;   lsr x0, x14, x12
+;   orr x3, x10, x0
+;   ands xzr, x2, #64
+;   csel x0, xzr, x8, ne
+;   csel x1, x8, x3, ne
+;   ret
 
 function %ishl_i128_i128(i128, i128) -> i128 {
 block0(v0: i128, v1: i128):
@@ -1206,25 +843,17 @@ block0(v0: i128, v1: i128):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 13)
-;   Inst 0:   lsl x3, x0, x2
-;   Inst 1:   lsl x1, x1, x2
-;   Inst 2:   orn w4, wzr, w2
-;   Inst 3:   lsr x0, x0, #1
-;   Inst 4:   lsr x0, x0, x4
-;   Inst 5:   orr x0, x1, x0
-;   Inst 6:   ands xzr, x2, #64
-;   Inst 7:   csel x1, xzr, x3, ne
-;   Inst 8:   csel x0, x3, x0, ne
-;   Inst 9:   mov x2, x0
-;   Inst 10:   mov x0, x1
-;   Inst 11:   mov x1, x2
-;   Inst 12:   ret
-; }}
+; block0:
+;   lsl x10, x0, x2
+;   lsl x12, x1, x2
+;   orn w14, wzr, w2
+;   lsr x0, x0, #1
+;   lsr x3, x0, x14
+;   orr x4, x12, x3
+;   ands xzr, x2, #64
+;   csel x0, xzr, x10, ne
+;   csel x1, x10, x4, ne
+;   ret
 
 function %ushr_i128_i8(i128, i8) -> i128 {
 block0(v0: i128, v1: i8):
@@ -1232,25 +861,17 @@ block0(v0: i128, v1: i8):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 13)
-;   Inst 0:   lsr x3, x0, x2
-;   Inst 1:   lsr x0, x1, x2
-;   Inst 2:   orn w4, wzr, w2
-;   Inst 3:   lsl x1, x1, #1
-;   Inst 4:   lsl x1, x1, x4
-;   Inst 5:   orr x1, x3, x1
-;   Inst 6:   ands xzr, x2, #64
-;   Inst 7:   csel x1, x0, x1, ne
-;   Inst 8:   csel x0, xzr, x0, ne
-;   Inst 9:   mov x2, x0
-;   Inst 10:   mov x0, x1
-;   Inst 11:   mov x1, x2
-;   Inst 12:   ret
-; }}
+; block0:
+;   lsr x8, x0, x2
+;   lsr x10, x1, x2
+;   orn w12, wzr, w2
+;   lsl x14, x1, #1
+;   lsl x0, x14, x12
+;   orr x3, x8, x0
+;   ands xzr, x2, #64
+;   csel x0, x10, x3, ne
+;   csel x1, xzr, x10, ne
+;   ret
 
 function %ushr_i128_i128(i128, i128) -> i128 {
 block0(v0: i128, v1: i128):
@@ -1258,25 +879,17 @@ block0(v0: i128, v1: i128):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 13)
-;   Inst 0:   lsr x3, x0, x2
-;   Inst 1:   lsr x0, x1, x2
-;   Inst 2:   orn w4, wzr, w2
-;   Inst 3:   lsl x1, x1, #1
-;   Inst 4:   lsl x1, x1, x4
-;   Inst 5:   orr x1, x3, x1
-;   Inst 6:   ands xzr, x2, #64
-;   Inst 7:   csel x1, x0, x1, ne
-;   Inst 8:   csel x0, xzr, x0, ne
-;   Inst 9:   mov x2, x0
-;   Inst 10:   mov x0, x1
-;   Inst 11:   mov x1, x2
-;   Inst 12:   ret
-; }}
+; block0:
+;   lsr x10, x0, x2
+;   lsr x12, x1, x2
+;   orn w14, wzr, w2
+;   lsl x0, x1, #1
+;   lsl x3, x0, x14
+;   orr x4, x10, x3
+;   ands xzr, x2, #64
+;   csel x0, x12, x4, ne
+;   csel x1, xzr, x12, ne
+;   ret
 
 function %sshr_i128_i8(i128, i8) -> i128 {
 block0(v0: i128, v1: i8):
@@ -1284,25 +897,18 @@ block0(v0: i128, v1: i8):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 13)
-;   Inst 0:   lsr x3, x0, x2
-;   Inst 1:   asr x0, x1, x2
-;   Inst 2:   orn w4, wzr, w2
-;   Inst 3:   lsl x5, x1, #1
-;   Inst 4:   lsl x4, x5, x4
-;   Inst 5:   asr x1, x1, #63
-;   Inst 6:   orr x3, x3, x4
-;   Inst 7:   ands xzr, x2, #64
-;   Inst 8:   csel x2, x0, x3, ne
-;   Inst 9:   csel x0, x1, x0, ne
-;   Inst 10:   mov x1, x0
-;   Inst 11:   mov x0, x2
-;   Inst 12:   ret
-; }}
+; block0:
+;   lsr x8, x0, x2
+;   asr x10, x1, x2
+;   orn w12, wzr, w2
+;   lsl x14, x1, #1
+;   lsl x0, x14, x12
+;   asr x3, x1, #63
+;   orr x4, x8, x0
+;   ands xzr, x2, #64
+;   csel x0, x10, x4, ne
+;   csel x1, x3, x10, ne
+;   ret
 
 function %sshr_i128_i128(i128, i128) -> i128 {
 block0(v0: i128, v1: i128):
@@ -1310,23 +916,16 @@ block0(v0: i128, v1: i128):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 13)
-;   Inst 0:   lsr x3, x0, x2
-;   Inst 1:   asr x0, x1, x2
-;   Inst 2:   orn w4, wzr, w2
-;   Inst 3:   lsl x5, x1, #1
-;   Inst 4:   lsl x4, x5, x4
-;   Inst 5:   asr x1, x1, #63
-;   Inst 6:   orr x3, x3, x4
-;   Inst 7:   ands xzr, x2, #64
-;   Inst 8:   csel x2, x0, x3, ne
-;   Inst 9:   csel x0, x1, x0, ne
-;   Inst 10:   mov x1, x0
-;   Inst 11:   mov x0, x2
-;   Inst 12:   ret
-; }}
+; block0:
+;   lsr x10, x0, x2
+;   asr x12, x1, x2
+;   orn w14, wzr, w2
+;   lsl x0, x1, #1
+;   lsl x3, x0, x14
+;   asr x4, x1, #63
+;   orr x6, x10, x3
+;   ands xzr, x2, #64
+;   csel x0, x12, x6, ne
+;   csel x1, x4, x12, ne
+;   ret
 
diff --git a/cranelift/filetests/filetests/isa/aarch64/call-indirect.clif b/cranelift/filetests/filetests/isa/aarch64/call-indirect.clif
index 1d71b9038d..ff0dcd2da5 100644
--- a/cranelift/filetests/filetests/isa/aarch64/call-indirect.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/call-indirect.clif
@@ -9,15 +9,10 @@ block0(v0: i64, v1: i64):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 5)
-;   Inst 0:   stp fp, lr, [sp, #-16]!
-;   Inst 1:   mov fp, sp
-;   Inst 2:   blr x1
-;   Inst 3:   ldp fp, lr, [sp], #16
-;   Inst 4:   ret
-; }}
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+; block0:
+;   blr x1
+;   ldp fp, lr, [sp], #16
+;   ret
 
diff --git a/cranelift/filetests/filetests/isa/aarch64/call.clif b/cranelift/filetests/filetests/isa/aarch64/call.clif
index 21ba70c987..41ed9a3e9d 100644
--- a/cranelift/filetests/filetests/isa/aarch64/call.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/call.clif
@@ -1,4 +1,4 @@
-test compile
+test compile precise-output
 set unwind_info=false
 set enable_probestack=false
 target aarch64
@@ -11,12 +11,13 @@ block0(v0: i64):
     return v1
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  ldr x1, 8 ; b 12 ; data
-; nextln:  blr x1
-; nextln:  ldp fp, lr, [sp], #16
-; nextln:  ret
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+; block0:
+;   ldr x5, 8 ; b 12 ; data TestCase { length: 1, ascii: [103, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0
+;   blr x5
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %f2(i32) -> i64 {
     fn0 = %g(i32 uext) -> i64 baldrdash_system_v
@@ -26,20 +27,40 @@ block0(v0: i32):
     return v1
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; check:  mov w0, w0
-; nextln:  ldr x1, 8 ; b 12 ; data
-; nextln:  blr x1
-; check:  ldp fp, lr, [sp], #16
-; nextln:  ret
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   stp x27, x28, [sp, #-16]!
+;   stp x25, x26, [sp, #-16]!
+;   stp x23, x24, [sp, #-16]!
+;   stp x21, x22, [sp, #-16]!
+;   stp x19, x20, [sp, #-16]!
+;   stp d14, d15, [sp, #-16]!
+;   stp d12, d13, [sp, #-16]!
+;   stp d10, d11, [sp, #-16]!
+;   stp d8, d9, [sp, #-16]!
+; block0:
+;   mov w0, w0
+;   ldr x5, 8 ; b 12 ; data TestCase { length: 1, ascii: [103, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0
+;   blr x5
+;   ldp d8, d9, [sp], #16
+;   ldp d10, d11, [sp], #16
+;   ldp d12, d13, [sp], #16
+;   ldp d14, d15, [sp], #16
+;   ldp x19, x20, [sp], #16
+;   ldp x21, x22, [sp], #16
+;   ldp x23, x24, [sp], #16
+;   ldp x25, x26, [sp], #16
+;   ldp x27, x28, [sp], #16
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %f3(i32) -> i32 uext baldrdash_system_v {
 block0(v0: i32):
     return v0
 }
 
-; check:  mov w0, w0
+; block0:
+;   mov w0, w0
 
 function %f4(i32) -> i64 {
     fn0 = %g(i32 sext) -> i64 baldrdash_system_v
@@ -49,20 +70,40 @@ block0(v0: i32):
     return v1
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; check:  sxtw x0, w0
-; nextln:  ldr x1, 8 ; b 12 ; data
-; nextln:  blr x1
-; check:  ldp fp, lr, [sp], #16
-; nextln:  ret
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   stp x27, x28, [sp, #-16]!
+;   stp x25, x26, [sp, #-16]!
+;   stp x23, x24, [sp, #-16]!
+;   stp x21, x22, [sp, #-16]!
+;   stp x19, x20, [sp, #-16]!
+;   stp d14, d15, [sp, #-16]!
+;   stp d12, d13, [sp, #-16]!
+;   stp d10, d11, [sp, #-16]!
+;   stp d8, d9, [sp, #-16]!
+; block0:
+;   sxtw x0, w0
+;   ldr x5, 8 ; b 12 ; data TestCase { length: 1, ascii: [103, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0
+;   blr x5
+;   ldp d8, d9, [sp], #16
+;   ldp d10, d11, [sp], #16
+;   ldp d12, d13, [sp], #16
+;   ldp d14, d15, [sp], #16
+;   ldp x19, x20, [sp], #16
+;   ldp x21, x22, [sp], #16
+;   ldp x23, x24, [sp], #16
+;   ldp x25, x26, [sp], #16
+;   ldp x27, x28, [sp], #16
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %f5(i32) -> i32 sext baldrdash_system_v {
 block0(v0: i32):
     return v0
 }
 
-; check:  sxtw x0, w0
+; block0:
+;   sxtw x0, w0
 
 function %f6(i8) -> i64 {
     fn0 = %g(i32, i32, i32, i32, i32, i32, i32, i32, i8 sext) -> i64
@@ -73,26 +114,27 @@ block0(v0: i8):
     return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  mov x8, x0
-; nextln:  sub sp, sp, #16
-; nextln:  virtual_sp_offset_adjust 16
-; nextln:  movz x0, #42
-; nextln:  movz x1, #42
-; nextln:  movz x2, #42
-; nextln:  movz x3, #42
-; nextln:  movz x4, #42
-; nextln:  movz x5, #42
-; nextln:  movz x6, #42
-; nextln:  movz x7, #42
-; nextln:  sturb w8, [sp]
-; nextln:  ldr x8, 8 ; b 12 ; data
-; nextln:  blr x8
-; nextln:  add sp, sp, #16
-; nextln:  virtual_sp_offset_adjust -16
-; nextln:  ldp fp, lr, [sp], #16
-; nextln:  ret
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+; block0:
+;   mov x15, x0
+;   sub sp, sp, #16
+;   virtual_sp_offset_adjust 16
+;   movz x0, #42
+;   movz x1, #42
+;   movz x2, #42
+;   movz x3, #42
+;   movz x4, #42
+;   movz x5, #42
+;   movz x6, #42
+;   movz x7, #42
+;   strb w15, [sp]
+;   ldr x15, 8 ; b 12 ; data TestCase { length: 1, ascii: [103, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0
+;   blr x15
+;   add sp, sp, #16
+;   virtual_sp_offset_adjust -16
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %f7(i8) -> i32, i32, i32, i32, i32, i32, i32, i32, i8 sext {
 block0(v0: i8):
@@ -100,18 +142,19 @@ block0(v0: i8):
     return v1, v1, v1, v1, v1, v1, v1, v1, v0
 }
 
-; check:  mov x9, x0
-; nextln:  mov x8, x1
-; nextln:  movz x0, #42
-; nextln:  movz x1, #42
-; nextln:  movz x2, #42
-; nextln:  movz x3, #42
-; nextln:  movz x4, #42
-; nextln:  movz x5, #42
-; nextln:  movz x6, #42
-; nextln:  movz x7, #42
-; nextln:  sturb w9, [x8]
-; nextln:  ret
+; block0:
+;   mov x14, x0
+;   mov x8, x1
+;   movz x0, #42
+;   movz x1, #42
+;   movz x2, #42
+;   movz x3, #42
+;   movz x4, #42
+;   movz x5, #42
+;   movz x6, #42
+;   movz x7, #42
+;   strb w14, [x8]
+;   ret
 
 function %f8() {
     fn0 = %g0() -> f32
@@ -131,32 +174,33 @@ block0:
     return
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  sub sp, sp, #48
-; nextln:  ldr x0, 8 ; b 12 ; data
-; nextln:  blr x0
-; nextln:  str q0, [sp]
-; nextln:  ldr x0, 8 ; b 12 ; data
-; nextln:  blr x0
-; nextln:  str q0, [sp, #16]
-; nextln:  ldr x0, 8 ; b 12 ; data
-; nextln:  blr x0
-; nextln:  str q0, [sp, #32]
-; nextln:  ldr x0, 8 ; b 12 ; data
-; nextln:  blr x0
-; nextln:  ldr q0, [sp]
-; nextln:  ldr x0, 8 ; b 12 ; data
-; nextln:  blr x0
-; nextln:  ldr q0, [sp, #16]
-; nextln:  ldr x0, 8 ; b 12 ; data
-; nextln:  blr x0
-; nextln:  ldr q0, [sp, #32]
-; nextln:  ldr x0, 8 ; b 12 ; data
-; nextln:  blr x0
-; nextln:  add sp, sp, #48
-; nextln:  ldp fp, lr, [sp], #16
-; nextln:  ret
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   sub sp, sp, #48
+; block0:
+;   ldr x9, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0
+;   blr x9
+;   str q0, [sp]
+;   ldr x11, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0
+;   blr x11
+;   str q0, [sp, #16]
+;   ldr x13, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0
+;   blr x13
+;   str q0, [sp, #32]
+;   ldr x15, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0
+;   blr x15
+;   ldr q0, [sp]
+;   ldr x1, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 51, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0
+;   blr x1
+;   ldr q0, [sp, #16]
+;   ldr x3, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 52, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0
+;   blr x3
+;   ldr q0, [sp, #32]
+;   ldr x5, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 52, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0
+;   blr x5
+;   add sp, sp, #48
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %f9() {
     fn0 = %g0() -> i8x16
@@ -174,32 +218,33 @@ block0:
     return
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  sub sp, sp, #48
-; nextln:  ldr x0, 8 ; b 12 ; data
-; nextln:  blr x0
-; nextln:  str q0, [sp]
-; nextln:  ldr x0, 8 ; b 12 ; data
-; nextln:  blr x0
-; nextln:  str q0, [sp, #16]
-; nextln:  ldr x0, 8 ; b 12 ; data
-; nextln:  blr x0
-; nextln:  str q0, [sp, #32]
-; nextln:  ldr x0, 8 ; b 12 ; data
-; nextln:  blr x0
-; nextln:  ldr q0, [sp]
-; nextln:  ldr x0, 8 ; b 12 ; data
-; nextln:  blr x0
-; nextln:  ldr q0, [sp, #16]
-; nextln:  ldr x0, 8 ; b 12 ; data
-; nextln:  blr x0
-; nextln:  ldr q0, [sp, #32]
-; nextln:  ldr x0, 8 ; b 12 ; data
-; nextln:  blr x0
-; nextln:  add sp, sp, #48
-; nextln:  ldp fp, lr, [sp], #16
-; nextln:  ret
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   sub sp, sp, #48
+; block0:
+;   ldr x9, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0
+;   blr x9
+;   str q0, [sp]
+;   ldr x11, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0
+;   blr x11
+;   str q0, [sp, #16]
+;   ldr x13, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0
+;   blr x13
+;   str q0, [sp, #32]
+;   ldr x15, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0
+;   blr x15
+;   ldr q0, [sp]
+;   ldr x1, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0
+;   blr x1
+;   ldr q0, [sp, #16]
+;   ldr x3, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0
+;   blr x3
+;   ldr q0, [sp, #32]
+;   ldr x5, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0
+;   blr x5
+;   add sp, sp, #48
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %f10() {
     fn0 = %g0() -> f32
@@ -221,44 +266,43 @@ block0:
     return
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  sub sp, sp, #48
-; nextln:  ldr x0, 8 ; b 12 ; data
-; nextln:  blr x0
-; nextln:  str q0, [sp]
-; nextln:  ldr x0, 8 ; b 12 ; data
-; nextln:  blr x0
-; nextln:  str q0, [sp, #16]
-; nextln:  ldr x0, 8 ; b 12 ; data
-; nextln:  blr x0
-; nextln:  str q0, [sp, #32]
-; nextln:  ldr x0, 8 ; b 12 ; data
-; nextln:  blr x0
-; nextln:  ldr q0, [sp]
-; nextln:  ldr x0, 8 ; b 12 ; data
-; nextln:  blr x0
-; nextln:  ldr q0, [sp, #16]
-; nextln:  ldr x0, 8 ; b 12 ; data
-; nextln:  blr x0
-; nextln:  ldr q0, [sp, #32]
-; nextln:  ldr x0, 8 ; b 12 ; data
-; nextln:  blr x0
-; nextln:  add sp, sp, #48
-; nextln:  ldp fp, lr, [sp], #16
-; nextln:  ret
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   sub sp, sp, #48
+; block0:
+;   ldr x9, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0
+;   blr x9
+;   str q0, [sp]
+;   ldr x11, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0
+;   blr x11
+;   str q0, [sp, #16]
+;   ldr x13, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0
+;   blr x13
+;   str q0, [sp, #32]
+;   ldr x15, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 51, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0
+;   blr x15
+;   ldr q0, [sp]
+;   ldr x1, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 52, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0
+;   blr x1
+;   ldr q0, [sp, #16]
+;   ldr x3, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 53, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0
+;   blr x3
+;   ldr q0, [sp, #32]
+;   ldr x5, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 54, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0
+;   blr x5
+;   add sp, sp, #48
+;   ldp fp, lr, [sp], #16
+;   ret
 
-
-; i128 tests
 function %f11(i128, i64) -> i64 {
 block0(v0: i128, v1: i64):
     v2, v3 = isplit v0
     return v3
 }
 
-; check: mov x0, x1
-; nextln: ret
-
+; block0:
+;   mov x0, x1
+;   ret
 
 function %f11_call(i64) -> i64 {
     fn0 = %f11(i128, i64) -> i64
@@ -270,28 +314,27 @@ block0(v0: i64):
     return v3
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: mov x1, x0
-; nextln: movz x0, #42
-; nextln: movz x2, #42
-; nextln: ldr x3, 8 ; b 12 ; data
-; nextln: blr x3
-; nextln: ldp fp, lr, [sp], #16
-; nextln: ret
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+; block0:
+;   mov x9, x0
+;   movz x0, #42
+;   mov x1, x9
+;   movz x2, #42
+;   ldr x14, 8 ; b 12 ; data TestCase { length: 3, ascii: [102, 49, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0
+;   blr x14
+;   ldp fp, lr, [sp], #16
+;   ret
 
-
-; The AArch64 ABI requires that the i128 argument be aligned
-; and to be passed in x2 and x3
 function %f12(i64, i128) -> i64 {
 block0(v0: i64, v1: i128):
     v2, v3 = isplit v1
     return v2
 }
 
-; check: mov x0, x2
-; nextln: ret
-
+; block0:
+;   mov x0, x2
+;   ret
 
 function %f12_call(i64) -> i64 {
     fn0 = %f12(i64, i128) -> i64
@@ -303,29 +346,26 @@ block0(v0: i64):
     return v3
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movz x3, #42
-; nextln: mov x2, x0
-; nextln: movz x0, #42
-; nextln: ldr x1, 8 ; b 12 ; data
-; nextln: blr x1
-; nextln: ldp fp, lr, [sp], #16
-; nextln: ret
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+; block0:
+;   movz x3, #42
+;   mov x2, x0
+;   movz x0, #42
+;   ldr x14, 8 ; b 12 ; data TestCase { length: 3, ascii: [102, 49, 50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0
+;   blr x14
+;   ldp fp, lr, [sp], #16
+;   ret
 
-
-
-; The Apple AArch64 ABI allows the i128 argument to not be aligned
-; and to be passed in x1 and x2
 function %f13(i64, i128) -> i64 apple_aarch64 {
 block0(v0: i64, v1: i128):
     v2, v3 = isplit v1
     return v2
 }
 
-; check: mov x0, x1
-; nextln: ret
-
+; block0:
+;   mov x0, x1
+;   ret
 
 function %f13_call(i64) -> i64 apple_aarch64 {
     fn0 = %f13(i64, i128) -> i64 apple_aarch64
@@ -337,31 +377,29 @@ block0(v0: i64):
     return v3
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movz x2, #42
-; nextln: mov x1, x0
-; nextln: movz x0, #42
-; nextln: ldr x3, 8 ; b 12 ; data
-; nextln: blr x3
-; nextln: ldp fp, lr, [sp], #16
-; nextln: ret
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+; block0:
+;   movz x2, #42
+;   mov x1, x0
+;   movz x0, #42
+;   ldr x14, 8 ; b 12 ; data TestCase { length: 3, ascii: [102, 49, 51, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0
+;   blr x14
+;   ldp fp, lr, [sp], #16
+;   ret
 
-
-
-; We only have 8 registers to pass data in
-; make sure we spill the last argument even though there is one slot available
 function %f14(i128, i128, i128, i64, i128) -> i128 {
 block0(v0: i128, v1: i128, v2: i128, v3: i64, v4: i128):
     return v4
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: ldur x0, [fp, #16]
-; nextln: ldur x1, [fp, #24]
-; nextln: ldp fp, lr, [sp], #16
-; nextln: ret
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+; block0:
+;   ldr x0, [fp, #16]
+;   ldr x1, [fp, #24]
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %f14_call(i128, i64) -> i128 {
     fn0 = %f14(i128, i128, i128, i64, i128) -> i128
@@ -371,50 +409,40 @@ block0(v0: i128, v1: i64):
     return v2
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+; block0:
+;   mov x14, x2
+;   sub sp, sp, #16
+;   virtual_sp_offset_adjust 16
+;   mov x13, x0
+;   mov x15, x1
+;   mov x2, x13
+;   mov x3, x15
+;   mov x4, x13
+;   mov x5, x15
+;   mov x6, x14
+;   str x13, [sp]
+;   str x15, [sp, #8]
+;   ldr x7, 8 ; b 12 ; data TestCase { length: 3, ascii: [102, 49, 52, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0
+;   blr x7
+;   add sp, sp, #16
+;   virtual_sp_offset_adjust -16
+;   ldp fp, lr, [sp], #16
+;   ret
 
-; TODO: Some codegen optimization possible here with x0,x1 moving to x7,x8 and then moving back
-; nextln: mov x7, x0
-; nextln: mov x8, x1
-; nextln: mov x6, x2
-; nextln: sub sp, sp, #16
-; nextln: virtual_sp_offset_adjust 16
-; nextln: mov x0, x7
-; nextln: mov x1, x8
-; nextln: mov x2, x7
-; nextln: mov x3, x8
-; nextln: mov x4, x7
-; nextln: mov x5, x8
-; nextln: stur x7, [sp]
-; nextln: stur x8, [sp, #8]
-
-; nextln: ldr x7, 8 ; b 12 ; data
-; nextln: blr x7
-; nextln: add sp, sp, #16
-; nextln: virtual_sp_offset_adjust -16
-
-; nextln: ldp fp, lr, [sp], #16
-; nextln: ret
-
-
-
-; We have one register slot available (Similar to %f14), however apple
-; allows us to start i128 on non even numbered registers (x7 in this case).
-;
-; It is unspecified if we can split the i128 into x7 + the stack.
-; In practice LLVM does not do this, so we are going to go with that.
 function %f15(i128, i128, i128, i64, i128) -> i128 apple_aarch64{
 block0(v0: i128, v1: i128, v2: i128, v3: i64, v4: i128):
     return v4
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: ldur x0, [fp, #16]
-; nextln: ldur x1, [fp, #24]
-; nextln: ldp fp, lr, [sp], #16
-; nextln: ret
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+; block0:
+;   ldr x0, [fp, #16]
+;   ldr x1, [fp, #24]
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %f15_call(i128, i64) -> i128 apple_aarch64 {
     fn0 = %f15(i128, i128, i128, i64, i128) -> i128 apple_aarch64
@@ -424,31 +452,27 @@ block0(v0: i128, v1: i64):
     return v2
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-
-; nextln: mov x7, x0
-; nextln: mov x8, x1
-; nextln: mov x6, x2
-; nextln: sub sp, sp, #16
-; nextln: virtual_sp_offset_adjust 16
-; nextln: mov x0, x7
-; nextln: mov x1, x8
-; nextln: mov x2, x7
-; nextln: mov x3, x8
-; nextln: mov x4, x7
-; nextln: mov x5, x8
-; nextln: stur x7, [sp]
-; nextln: stur x8, [sp, #8]
-
-; nextln: ldr x7, 8 ; b 12 ; data
-; nextln: blr x7
-; nextln: add sp, sp, #16
-; nextln: virtual_sp_offset_adjust -16
-
-; nextln: ldp fp, lr, [sp], #16
-; nextln: ret
-
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+; block0:
+;   mov x14, x2
+;   sub sp, sp, #16
+;   virtual_sp_offset_adjust 16
+;   mov x13, x0
+;   mov x15, x1
+;   mov x2, x13
+;   mov x3, x15
+;   mov x4, x13
+;   mov x5, x15
+;   mov x6, x14
+;   str x13, [sp]
+;   str x15, [sp, #8]
+;   ldr x7, 8 ; b 12 ; data TestCase { length: 3, ascii: [102, 49, 53, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0
+;   blr x7
+;   add sp, sp, #16
+;   virtual_sp_offset_adjust -16
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %f16() -> i32, i32 wasmtime_system_v {
 block0:
@@ -457,9 +481,10 @@ block0:
     return v0, v1
 }
 
-; check: mov x1, x0
-; nextln: movz x0, #0
-; nextln: movz x2, #1
-; nextln: stur w2, [x1]
-; nextln: ret
+; block0:
+;   mov x11, x0
+;   movz x0, #0
+;   movz x7, #1
+;   str w7, [x11]
+;   ret
 
diff --git a/cranelift/filetests/filetests/isa/aarch64/compare_zero.clif b/cranelift/filetests/filetests/isa/aarch64/compare_zero.clif
index 1828d811e4..122ea536a4 100644
--- a/cranelift/filetests/filetests/isa/aarch64/compare_zero.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/compare_zero.clif
@@ -10,14 +10,9 @@ block0(v0: i8x16):
   return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   cmeq v0.16b, v0.16b, #0
-;   Inst 1:   ret
-; }}
+; block0:
+;   cmeq v0.16b, v0.16b, #0
+;   ret
 
 function %f1(i16x8) -> b16x8 {
 block0(v0: i16x8):
@@ -27,14 +22,9 @@ block0(v0: i16x8):
   return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   cmeq v0.8h, v0.8h, #0
-;   Inst 1:   ret
-; }}
+; block0:
+;   cmeq v0.8h, v0.8h, #0
+;   ret
 
 function %f2(i32x4) -> b32x4 {
 block0(v0: i32x4):
@@ -44,15 +34,10 @@ block0(v0: i32x4):
   return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   cmeq v0.4s, v0.4s, #0
-;   Inst 1:   mvn v0.16b, v0.16b
-;   Inst 2:   ret
-; }}
+; block0:
+;   cmeq v3.4s, v0.4s, #0
+;   mvn v0.16b, v3.16b
+;   ret
 
 function %f3(i64x2) -> b64x2 {
 block0(v0: i64x2):
@@ -62,15 +47,10 @@ block0(v0: i64x2):
   return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   cmeq v0.2d, v0.2d, #0
-;   Inst 1:   mvn v0.16b, v0.16b
-;   Inst 2:   ret
-; }}
+; block0:
+;   cmeq v3.2d, v0.2d, #0
+;   mvn v0.16b, v3.16b
+;   ret
 
 function %f4(i8x16) -> b8x16 {
 block0(v0: i8x16):
@@ -80,14 +60,9 @@ block0(v0: i8x16):
   return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   cmle v0.16b, v0.16b, #0
-;   Inst 1:   ret
-; }}
+; block0:
+;   cmle v0.16b, v0.16b, #0
+;   ret
 
 function %f5(i16x8) -> b16x8 {
 block0(v0: i16x8):
@@ -97,14 +72,9 @@ block0(v0: i16x8):
   return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   cmge v0.8h, v0.8h, #0
-;   Inst 1:   ret
-; }}
+; block0:
+;   cmge v0.8h, v0.8h, #0
+;   ret
 
 function %f6(i32x4) -> b32x4 {
 block0(v0: i32x4):
@@ -114,14 +84,9 @@ block0(v0: i32x4):
   return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   cmge v0.4s, v0.4s, #0
-;   Inst 1:   ret
-; }}
+; block0:
+;   cmge v0.4s, v0.4s, #0
+;   ret
 
 function %f7(i64x2) -> b64x2 {
 block0(v0: i64x2):
@@ -131,14 +96,9 @@ block0(v0: i64x2):
   return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   cmle v0.2d, v0.2d, #0
-;   Inst 1:   ret
-; }}
+; block0:
+;   cmle v0.2d, v0.2d, #0
+;   ret
 
 function %f8(i8x16) -> b8x16 {
 block0(v0: i8x16):
@@ -148,14 +108,9 @@ block0(v0: i8x16):
   return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   cmlt v0.16b, v0.16b, #0
-;   Inst 1:   ret
-; }}
+; block0:
+;   cmlt v0.16b, v0.16b, #0
+;   ret
 
 function %f9(i16x8) -> b16x8 {
 block0(v0: i16x8):
@@ -165,14 +120,9 @@ block0(v0: i16x8):
   return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   cmgt v0.8h, v0.8h, #0
-;   Inst 1:   ret
-; }}
+; block0:
+;   cmgt v0.8h, v0.8h, #0
+;   ret
 
 function %f10(i32x4) -> b32x4 {
 block0(v0: i32x4):
@@ -182,14 +132,9 @@ block0(v0: i32x4):
   return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   cmgt v0.4s, v0.4s, #0
-;   Inst 1:   ret
-; }}
+; block0:
+;   cmgt v0.4s, v0.4s, #0
+;   ret
 
 function %f11(i64x2) -> b64x2 {
 block0(v0: i64x2):
@@ -199,14 +144,9 @@ block0(v0: i64x2):
   return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   cmlt v0.2d, v0.2d, #0
-;   Inst 1:   ret
-; }}
+; block0:
+;   cmlt v0.2d, v0.2d, #0
+;   ret
 
 function %f12(f32x4) -> b32x4 {
 block0(v0: f32x4):
@@ -216,14 +156,9 @@ block0(v0: f32x4):
   return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   fcmeq v0.4s, v0.4s, #0.0
-;   Inst 1:   ret
-; }}
+; block0:
+;   fcmeq v0.4s, v0.4s, #0.0
+;   ret
 
 function %f13(f64x2) -> b64x2 {
 block0(v0: f64x2):
@@ -233,14 +168,9 @@ block0(v0: f64x2):
   return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   fcmeq v0.2d, v0.2d, #0.0
-;   Inst 1:   ret
-; }}
+; block0:
+;   fcmeq v0.2d, v0.2d, #0.0
+;   ret
 
 function %f14(f64x2) -> b64x2 {
 block0(v0: f64x2):
@@ -250,15 +180,10 @@ block0(v0: f64x2):
   return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   fcmeq v0.2d, v0.2d, #0.0
-;   Inst 1:   mvn v0.16b, v0.16b
-;   Inst 2:   ret
-; }}
+; block0:
+;   fcmeq v3.2d, v0.2d, #0.0
+;   mvn v0.16b, v3.16b
+;   ret
 
 function %f15(f32x4) -> b32x4 {
 block0(v0: f32x4):
@@ -268,15 +193,10 @@ block0(v0: f32x4):
   return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   fcmeq v0.4s, v0.4s, #0.0
-;   Inst 1:   mvn v0.16b, v0.16b
-;   Inst 2:   ret
-; }}
+; block0:
+;   fcmeq v3.4s, v0.4s, #0.0
+;   mvn v0.16b, v3.16b
+;   ret
 
 function %f16(f32x4) -> b32x4 {
 block0(v0: f32x4):
@@ -286,14 +206,9 @@ block0(v0: f32x4):
   return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   fcmle v0.4s, v0.4s, #0.0
-;   Inst 1:   ret
-; }}
+; block0:
+;   fcmle v0.4s, v0.4s, #0.0
+;   ret
 
 function %f17(f64x2) -> b64x2 {
 block0(v0: f64x2):
@@ -303,14 +218,9 @@ block0(v0: f64x2):
   return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   fcmge v0.2d, v0.2d, #0.0
-;   Inst 1:   ret
-; }}
+; block0:
+;   fcmge v0.2d, v0.2d, #0.0
+;   ret
 
 function %f18(f64x2) -> b64x2 {
 block0(v0: f64x2):
@@ -320,14 +230,9 @@ block0(v0: f64x2):
   return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   fcmge v0.2d, v0.2d, #0.0
-;   Inst 1:   ret
-; }}
+; block0:
+;   fcmge v0.2d, v0.2d, #0.0
+;   ret
 
 function %f19(f32x4) -> b32x4 {
 block0(v0: f32x4):
@@ -337,14 +242,9 @@ block0(v0: f32x4):
   return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   fcmle v0.4s, v0.4s, #0.0
-;   Inst 1:   ret
-; }}
+; block0:
+;   fcmle v0.4s, v0.4s, #0.0
+;   ret
 
 function %f20(f32x4) -> b32x4 {
 block0(v0: f32x4):
@@ -354,14 +254,9 @@ block0(v0: f32x4):
   return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   fcmlt v0.4s, v0.4s, #0.0
-;   Inst 1:   ret
-; }}
+; block0:
+;   fcmlt v0.4s, v0.4s, #0.0
+;   ret
 
 function %f21(f64x2) -> b64x2 {
 block0(v0: f64x2):
@@ -371,14 +266,9 @@ block0(v0: f64x2):
   return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   fcmgt v0.2d, v0.2d, #0.0
-;   Inst 1:   ret
-; }}
+; block0:
+;   fcmgt v0.2d, v0.2d, #0.0
+;   ret
 
 function %f22(f64x2) -> b64x2 {
 block0(v0: f64x2):
@@ -388,14 +278,9 @@ block0(v0: f64x2):
   return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   fcmgt v0.2d, v0.2d, #0.0
-;   Inst 1:   ret
-; }}
+; block0:
+;   fcmgt v0.2d, v0.2d, #0.0
+;   ret
 
 function %f23(f32x4) -> b32x4 {
 block0(v0: f32x4):
@@ -405,11 +290,7 @@ block0(v0: f32x4):
   return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   fcmlt v0.4s, v0.4s, #0.0
-;   Inst 1:   ret
-; }}
+; block0:
+;   fcmlt v0.4s, v0.4s, #0.0
+;   ret
+
diff --git a/cranelift/filetests/filetests/isa/aarch64/condbr.clif b/cranelift/filetests/filetests/isa/aarch64/condbr.clif
index 1b61291530..9195757667 100644
--- a/cranelift/filetests/filetests/isa/aarch64/condbr.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/condbr.clif
@@ -8,15 +8,10 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   subs xzr, x0, x1
-;   Inst 1:   cset x0, eq
-;   Inst 2:   ret
-; }}
+; block0:
+;   subs xzr, x0, x1
+;   cset x0, eq
+;   ret
 
 function %icmp_eq_i128(i128, i128) -> b1 {
 block0(v0: i128, v1: i128):
@@ -24,17 +19,12 @@ block0(v0: i128, v1: i128):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 5)
-;   Inst 0:   eor x0, x0, x2
-;   Inst 1:   eor x1, x1, x3
-;   Inst 2:   adds xzr, x0, x1
-;   Inst 3:   cset x0, eq
-;   Inst 4:   ret
-; }}
+; block0:
+;   eor x10, x0, x2
+;   eor x12, x1, x3
+;   adds xzr, x10, x12
+;   cset x0, eq
+;   ret
 
 function %icmp_ne_i128(i128, i128) -> b1 {
 block0(v0: i128, v1: i128):
@@ -42,17 +32,12 @@ block0(v0: i128, v1: i128):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 5)
-;   Inst 0:   eor x0, x0, x2
-;   Inst 1:   eor x1, x1, x3
-;   Inst 2:   adds xzr, x0, x1
-;   Inst 3:   cset x0, ne
-;   Inst 4:   ret
-; }}
+; block0:
+;   eor x10, x0, x2
+;   eor x12, x1, x3
+;   adds xzr, x10, x12
+;   cset x0, ne
+;   ret
 
 function %icmp_slt_i128(i128, i128) -> b1 {
 block0(v0: i128, v1: i128):
@@ -60,18 +45,13 @@ block0(v0: i128, v1: i128):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 6)
-;   Inst 0:   subs xzr, x0, x2
-;   Inst 1:   cset x0, lo
-;   Inst 2:   subs xzr, x1, x3
-;   Inst 3:   cset x1, lt
-;   Inst 4:   csel x0, x0, x1, eq
-;   Inst 5:   ret
-; }}
+; block0:
+;   subs xzr, x0, x2
+;   cset x11, lo
+;   subs xzr, x1, x3
+;   cset x14, lt
+;   csel x0, x11, x14, eq
+;   ret
 
 function %icmp_ult_i128(i128, i128) -> b1 {
 block0(v0: i128, v1: i128):
@@ -79,18 +59,13 @@ block0(v0: i128, v1: i128):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 6)
-;   Inst 0:   subs xzr, x0, x2
-;   Inst 1:   cset x0, lo
-;   Inst 2:   subs xzr, x1, x3
-;   Inst 3:   cset x1, lo
-;   Inst 4:   csel x0, x0, x1, eq
-;   Inst 5:   ret
-; }}
+; block0:
+;   subs xzr, x0, x2
+;   cset x11, lo
+;   subs xzr, x1, x3
+;   cset x14, lo
+;   csel x0, x11, x14, eq
+;   ret
 
 function %icmp_sle_i128(i128, i128) -> b1 {
 block0(v0: i128, v1: i128):
@@ -98,18 +73,13 @@ block0(v0: i128, v1: i128):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 6)
-;   Inst 0:   subs xzr, x0, x2
-;   Inst 1:   cset x0, ls
-;   Inst 2:   subs xzr, x1, x3
-;   Inst 3:   cset x1, le
-;   Inst 4:   csel x0, x0, x1, eq
-;   Inst 5:   ret
-; }}
+; block0:
+;   subs xzr, x0, x2
+;   cset x11, ls
+;   subs xzr, x1, x3
+;   cset x14, le
+;   csel x0, x11, x14, eq
+;   ret
 
 function %icmp_ule_i128(i128, i128) -> b1 {
 block0(v0: i128, v1: i128):
@@ -117,18 +87,13 @@ block0(v0: i128, v1: i128):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 6)
-;   Inst 0:   subs xzr, x0, x2
-;   Inst 1:   cset x0, ls
-;   Inst 2:   subs xzr, x1, x3
-;   Inst 3:   cset x1, ls
-;   Inst 4:   csel x0, x0, x1, eq
-;   Inst 5:   ret
-; }}
+; block0:
+;   subs xzr, x0, x2
+;   cset x11, ls
+;   subs xzr, x1, x3
+;   cset x14, ls
+;   csel x0, x11, x14, eq
+;   ret
 
 function %icmp_sgt_i128(i128, i128) -> b1 {
 block0(v0: i128, v1: i128):
@@ -136,18 +101,13 @@ block0(v0: i128, v1: i128):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 6)
-;   Inst 0:   subs xzr, x0, x2
-;   Inst 1:   cset x0, hi
-;   Inst 2:   subs xzr, x1, x3
-;   Inst 3:   cset x1, gt
-;   Inst 4:   csel x0, x0, x1, eq
-;   Inst 5:   ret
-; }}
+; block0:
+;   subs xzr, x0, x2
+;   cset x11, hi
+;   subs xzr, x1, x3
+;   cset x14, gt
+;   csel x0, x11, x14, eq
+;   ret
 
 function %icmp_ugt_i128(i128, i128) -> b1 {
 block0(v0: i128, v1: i128):
@@ -155,18 +115,13 @@ block0(v0: i128, v1: i128):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 6)
-;   Inst 0:   subs xzr, x0, x2
-;   Inst 1:   cset x0, hi
-;   Inst 2:   subs xzr, x1, x3
-;   Inst 3:   cset x1, hi
-;   Inst 4:   csel x0, x0, x1, eq
-;   Inst 5:   ret
-; }}
+; block0:
+;   subs xzr, x0, x2
+;   cset x11, hi
+;   subs xzr, x1, x3
+;   cset x14, hi
+;   csel x0, x11, x14, eq
+;   ret
 
 function %icmp_sge_i128(i128, i128) -> b1 {
 block0(v0: i128, v1: i128):
@@ -174,18 +129,13 @@ block0(v0: i128, v1: i128):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 6)
-;   Inst 0:   subs xzr, x0, x2
-;   Inst 1:   cset x0, hs
-;   Inst 2:   subs xzr, x1, x3
-;   Inst 3:   cset x1, ge
-;   Inst 4:   csel x0, x0, x1, eq
-;   Inst 5:   ret
-; }}
+; block0:
+;   subs xzr, x0, x2
+;   cset x11, hs
+;   subs xzr, x1, x3
+;   cset x14, ge
+;   csel x0, x11, x14, eq
+;   ret
 
 function %icmp_uge_i128(i128, i128) -> b1 {
 block0(v0: i128, v1: i128):
@@ -193,18 +143,13 @@ block0(v0: i128, v1: i128):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 6)
-;   Inst 0:   subs xzr, x0, x2
-;   Inst 1:   cset x0, hs
-;   Inst 2:   subs xzr, x1, x3
-;   Inst 3:   cset x1, hs
-;   Inst 4:   csel x0, x0, x1, eq
-;   Inst 5:   ret
-; }}
+; block0:
+;   subs xzr, x0, x2
+;   cset x11, hs
+;   subs xzr, x1, x3
+;   cset x14, hs
+;   csel x0, x11, x14, eq
+;   ret
 
 function %icmp_of_i128(i128, i128) -> b1 {
 block0(v0: i128, v1: i128):
@@ -212,16 +157,11 @@ block0(v0: i128, v1: i128):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 4)
-;   Inst 0:   adds xzr, x0, x2
-;   Inst 1:   adcs xzr, x1, x3
-;   Inst 2:   cset x0, vs
-;   Inst 3:   ret
-; }}
+; block0:
+;   adds xzr, x0, x2
+;   adcs xzr, x1, x3
+;   cset x0, vs
+;   ret
 
 function %icmp_nof_i128(i128, i128) -> b1 {
 block0(v0: i128, v1: i128):
@@ -229,16 +169,11 @@ block0(v0: i128, v1: i128):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 4)
-;   Inst 0:   adds xzr, x0, x2
-;   Inst 1:   adcs xzr, x1, x3
-;   Inst 2:   cset x0, vc
-;   Inst 3:   ret
-; }}
+; block0:
+;   adds xzr, x0, x2
+;   adcs xzr, x1, x3
+;   cset x0, vc
+;   ret
 
 function %f(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -255,26 +190,15 @@ block2:
   return v5
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (successor: Block 1)
-;   (successor: Block 2)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   subs xzr, x0, x1
-;   Inst 1:   b.eq label1 ; b label2
-; Block 1:
-;   (original IR block: block1)
-;   (instruction range: 2 .. 4)
-;   Inst 2:   movz x0, #1
-;   Inst 3:   ret
-; Block 2:
-;   (original IR block: block2)
-;   (instruction range: 4 .. 6)
-;   Inst 4:   movz x0, #2
-;   Inst 5:   ret
-; }}
+; block0:
+;   subs xzr, x0, x1
+;   b.eq label1 ; b label2
+; block1:
+;   movz x0, #1
+;   ret
+; block2:
+;   movz x0, #2
+;   ret
 
 function %f(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -287,29 +211,16 @@ block1:
   return v4
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (successor: Block 1)
-;   (successor: Block 2)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   subs xzr, x0, x1
-;   Inst 1:   b.eq label1 ; b label2
-; Block 1:
-;   (successor: Block 3)
-;   (instruction range: 2 .. 3)
-;   Inst 2:   b label3
-; Block 2:
-;   (successor: Block 3)
-;   (instruction range: 3 .. 4)
-;   Inst 3:   b label3
-; Block 3:
-;   (original IR block: block1)
-;   (instruction range: 4 .. 6)
-;   Inst 4:   movz x0, #1
-;   Inst 5:   ret
-; }}
+; block0:
+;   subs xzr, x0, x1
+;   b.eq label1 ; b label2
+; block1:
+;   b label3
+; block2:
+;   b label3
+; block3:
+;   movz x0, #1
+;   ret
 
 function %i128_brz(i128){
 block0(v0: i128):
@@ -321,28 +232,15 @@ block1:
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (successor: Block 1)
-;   (successor: Block 2)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   orr x0, x0, x1
-;   Inst 1:   cbz x0, label1 ; b label2
-; Block 1:
-;   (successor: Block 3)
-;   (instruction range: 2 .. 3)
-;   Inst 2:   b label3
-; Block 2:
-;   (successor: Block 3)
-;   (instruction range: 3 .. 4)
-;   Inst 3:   b label3
-; Block 3:
-;   (original IR block: block1)
-;   (instruction range: 4 .. 5)
-;   Inst 4:   ret
-; }}
+; block0:
+;   orr x4, x0, x1
+;   cbz x4, label1 ; b label2
+; block1:
+;   b label3
+; block2:
+;   b label3
+; block3:
+;   ret
 
 function %i128_brnz(i128){
 block0(v0: i128):
@@ -354,28 +252,15 @@ block1:
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (successor: Block 1)
-;   (successor: Block 2)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   orr x0, x0, x1
-;   Inst 1:   cbnz x0, label1 ; b label2
-; Block 1:
-;   (successor: Block 3)
-;   (instruction range: 2 .. 3)
-;   Inst 2:   b label3
-; Block 2:
-;   (successor: Block 3)
-;   (instruction range: 3 .. 4)
-;   Inst 3:   b label3
-; Block 3:
-;   (original IR block: block1)
-;   (instruction range: 4 .. 5)
-;   Inst 4:   ret
-; }}
+; block0:
+;   orr x4, x0, x1
+;   cbnz x4, label1 ; b label2
+; block1:
+;   b label3
+; block2:
+;   b label3
+; block3:
+;   ret
 
 function %i128_bricmp_eq(i128, i128) {
 block0(v0: i128, v1: i128):
@@ -386,30 +271,17 @@ block1:
   return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (successor: Block 1)
-;   (successor: Block 2)
-;   (instruction range: 0 .. 4)
-;   Inst 0:   eor x0, x0, x2
-;   Inst 1:   eor x1, x1, x3
-;   Inst 2:   adds xzr, x0, x1
-;   Inst 3:   b.eq label1 ; b label2
-; Block 1:
-;   (successor: Block 3)
-;   (instruction range: 4 .. 5)
-;   Inst 4:   b label3
-; Block 2:
-;   (successor: Block 3)
-;   (instruction range: 5 .. 6)
-;   Inst 5:   b label3
-; Block 3:
-;   (original IR block: block1)
-;   (instruction range: 6 .. 7)
-;   Inst 6:   ret
-; }}
+; block0:
+;   eor x8, x0, x2
+;   eor x10, x1, x3
+;   adds xzr, x8, x10
+;   b.eq label1 ; b label2
+; block1:
+;   b label3
+; block2:
+;   b label3
+; block3:
+;   ret
 
 function %i128_bricmp_ne(i128, i128) {
 block0(v0: i128, v1: i128):
@@ -420,30 +292,17 @@ block1:
   return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (successor: Block 1)
-;   (successor: Block 2)
-;   (instruction range: 0 .. 4)
-;   Inst 0:   eor x0, x0, x2
-;   Inst 1:   eor x1, x1, x3
-;   Inst 2:   adds xzr, x0, x1
-;   Inst 3:   b.ne label1 ; b label2
-; Block 1:
-;   (successor: Block 3)
-;   (instruction range: 4 .. 5)
-;   Inst 4:   b label3
-; Block 2:
-;   (successor: Block 3)
-;   (instruction range: 5 .. 6)
-;   Inst 5:   b label3
-; Block 3:
-;   (original IR block: block1)
-;   (instruction range: 6 .. 7)
-;   Inst 6:   ret
-; }}
+; block0:
+;   eor x8, x0, x2
+;   eor x10, x1, x3
+;   adds xzr, x8, x10
+;   b.ne label1 ; b label2
+; block1:
+;   b label3
+; block2:
+;   b label3
+; block3:
+;   ret
 
 function %i128_bricmp_slt(i128, i128) {
 block0(v0: i128, v1: i128):
@@ -454,33 +313,20 @@ block1:
   return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (successor: Block 1)
-;   (successor: Block 2)
-;   (instruction range: 0 .. 7)
-;   Inst 0:   subs xzr, x0, x2
-;   Inst 1:   cset x0, lo
-;   Inst 2:   subs xzr, x1, x3
-;   Inst 3:   cset x1, lt
-;   Inst 4:   csel x0, x0, x1, eq
-;   Inst 5:   subs xzr, xzr, x0
-;   Inst 6:   b.lt label1 ; b label2
-; Block 1:
-;   (successor: Block 3)
-;   (instruction range: 7 .. 8)
-;   Inst 7:   b label3
-; Block 2:
-;   (successor: Block 3)
-;   (instruction range: 8 .. 9)
-;   Inst 8:   b label3
-; Block 3:
-;   (original IR block: block1)
-;   (instruction range: 9 .. 10)
-;   Inst 9:   ret
-; }}
+; block0:
+;   subs xzr, x0, x2
+;   cset x9, lo
+;   subs xzr, x1, x3
+;   cset x12, lt
+;   csel x9, x9, x12, eq
+;   subs xzr, xzr, x9
+;   b.lt label1 ; b label2
+; block1:
+;   b label3
+; block2:
+;   b label3
+; block3:
+;   ret
 
 function %i128_bricmp_ult(i128, i128) {
 block0(v0: i128, v1: i128):
@@ -491,33 +337,20 @@ block1:
   return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (successor: Block 1)
-;   (successor: Block 2)
-;   (instruction range: 0 .. 7)
-;   Inst 0:   subs xzr, x0, x2
-;   Inst 1:   cset x0, lo
-;   Inst 2:   subs xzr, x1, x3
-;   Inst 3:   cset x1, lo
-;   Inst 4:   csel x0, x0, x1, eq
-;   Inst 5:   subs xzr, xzr, x0
-;   Inst 6:   b.lo label1 ; b label2
-; Block 1:
-;   (successor: Block 3)
-;   (instruction range: 7 .. 8)
-;   Inst 7:   b label3
-; Block 2:
-;   (successor: Block 3)
-;   (instruction range: 8 .. 9)
-;   Inst 8:   b label3
-; Block 3:
-;   (original IR block: block1)
-;   (instruction range: 9 .. 10)
-;   Inst 9:   ret
-; }}
+; block0:
+;   subs xzr, x0, x2
+;   cset x9, lo
+;   subs xzr, x1, x3
+;   cset x12, lo
+;   csel x9, x9, x12, eq
+;   subs xzr, xzr, x9
+;   b.lo label1 ; b label2
+; block1:
+;   b label3
+; block2:
+;   b label3
+; block3:
+;   ret
 
 function %i128_bricmp_sle(i128, i128) {
 block0(v0: i128, v1: i128):
@@ -528,34 +361,21 @@ block1:
   return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (successor: Block 1)
-;   (successor: Block 2)
-;   (instruction range: 0 .. 8)
-;   Inst 0:   subs xzr, x0, x2
-;   Inst 1:   cset x0, ls
-;   Inst 2:   subs xzr, x1, x3
-;   Inst 3:   cset x1, le
-;   Inst 4:   csel x0, x0, x1, eq
-;   Inst 5:   movz x1, #1
-;   Inst 6:   subs xzr, x1, x0
-;   Inst 7:   b.le label1 ; b label2
-; Block 1:
-;   (successor: Block 3)
-;   (instruction range: 8 .. 9)
-;   Inst 8:   b label3
-; Block 2:
-;   (successor: Block 3)
-;   (instruction range: 9 .. 10)
-;   Inst 9:   b label3
-; Block 3:
-;   (original IR block: block1)
-;   (instruction range: 10 .. 11)
-;   Inst 10:   ret
-; }}
+; block0:
+;   subs xzr, x0, x2
+;   cset x9, ls
+;   subs xzr, x1, x3
+;   cset x12, le
+;   csel x9, x9, x12, eq
+;   movz x12, #1
+;   subs xzr, x12, x9
+;   b.le label1 ; b label2
+; block1:
+;   b label3
+; block2:
+;   b label3
+; block3:
+;   ret
 
 function %i128_bricmp_ule(i128, i128) {
 block0(v0: i128, v1: i128):
@@ -566,34 +386,21 @@ block1:
   return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (successor: Block 1)
-;   (successor: Block 2)
-;   (instruction range: 0 .. 8)
-;   Inst 0:   subs xzr, x0, x2
-;   Inst 1:   cset x0, ls
-;   Inst 2:   subs xzr, x1, x3
-;   Inst 3:   cset x1, ls
-;   Inst 4:   csel x0, x0, x1, eq
-;   Inst 5:   movz x1, #1
-;   Inst 6:   subs xzr, x1, x0
-;   Inst 7:   b.ls label1 ; b label2
-; Block 1:
-;   (successor: Block 3)
-;   (instruction range: 8 .. 9)
-;   Inst 8:   b label3
-; Block 2:
-;   (successor: Block 3)
-;   (instruction range: 9 .. 10)
-;   Inst 9:   b label3
-; Block 3:
-;   (original IR block: block1)
-;   (instruction range: 10 .. 11)
-;   Inst 10:   ret
-; }}
+; block0:
+;   subs xzr, x0, x2
+;   cset x9, ls
+;   subs xzr, x1, x3
+;   cset x12, ls
+;   csel x9, x9, x12, eq
+;   movz x12, #1
+;   subs xzr, x12, x9
+;   b.ls label1 ; b label2
+; block1:
+;   b label3
+; block2:
+;   b label3
+; block3:
+;   ret
 
 function %i128_bricmp_sgt(i128, i128) {
 block0(v0: i128, v1: i128):
@@ -604,33 +411,20 @@ block1:
   return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (successor: Block 1)
-;   (successor: Block 2)
-;   (instruction range: 0 .. 7)
-;   Inst 0:   subs xzr, x0, x2
-;   Inst 1:   cset x0, hi
-;   Inst 2:   subs xzr, x1, x3
-;   Inst 3:   cset x1, gt
-;   Inst 4:   csel x0, x0, x1, eq
-;   Inst 5:   subs xzr, x0, xzr
-;   Inst 6:   b.gt label1 ; b label2
-; Block 1:
-;   (successor: Block 3)
-;   (instruction range: 7 .. 8)
-;   Inst 7:   b label3
-; Block 2:
-;   (successor: Block 3)
-;   (instruction range: 8 .. 9)
-;   Inst 8:   b label3
-; Block 3:
-;   (original IR block: block1)
-;   (instruction range: 9 .. 10)
-;   Inst 9:   ret
-; }}
+; block0:
+;   subs xzr, x0, x2
+;   cset x9, hi
+;   subs xzr, x1, x3
+;   cset x12, gt
+;   csel x9, x9, x12, eq
+;   subs xzr, x9, xzr
+;   b.gt label1 ; b label2
+; block1:
+;   b label3
+; block2:
+;   b label3
+; block3:
+;   ret
 
 function %i128_bricmp_ugt(i128, i128) {
 block0(v0: i128, v1: i128):
@@ -641,33 +435,20 @@ block1:
   return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (successor: Block 1)
-;   (successor: Block 2)
-;   (instruction range: 0 .. 7)
-;   Inst 0:   subs xzr, x0, x2
-;   Inst 1:   cset x0, hi
-;   Inst 2:   subs xzr, x1, x3
-;   Inst 3:   cset x1, hi
-;   Inst 4:   csel x0, x0, x1, eq
-;   Inst 5:   subs xzr, x0, xzr
-;   Inst 6:   b.hi label1 ; b label2
-; Block 1:
-;   (successor: Block 3)
-;   (instruction range: 7 .. 8)
-;   Inst 7:   b label3
-; Block 2:
-;   (successor: Block 3)
-;   (instruction range: 8 .. 9)
-;   Inst 8:   b label3
-; Block 3:
-;   (original IR block: block1)
-;   (instruction range: 9 .. 10)
-;   Inst 9:   ret
-; }}
+; block0:
+;   subs xzr, x0, x2
+;   cset x9, hi
+;   subs xzr, x1, x3
+;   cset x12, hi
+;   csel x9, x9, x12, eq
+;   subs xzr, x9, xzr
+;   b.hi label1 ; b label2
+; block1:
+;   b label3
+; block2:
+;   b label3
+; block3:
+;   ret
 
 function %i128_bricmp_sge(i128, i128) {
 block0(v0: i128, v1: i128):
@@ -678,34 +459,21 @@ block1:
   return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (successor: Block 1)
-;   (successor: Block 2)
-;   (instruction range: 0 .. 8)
-;   Inst 0:   subs xzr, x0, x2
-;   Inst 1:   cset x0, hs
-;   Inst 2:   subs xzr, x1, x3
-;   Inst 3:   cset x1, ge
-;   Inst 4:   csel x0, x0, x1, eq
-;   Inst 5:   movz x1, #1
-;   Inst 6:   subs xzr, x0, x1
-;   Inst 7:   b.ge label1 ; b label2
-; Block 1:
-;   (successor: Block 3)
-;   (instruction range: 8 .. 9)
-;   Inst 8:   b label3
-; Block 2:
-;   (successor: Block 3)
-;   (instruction range: 9 .. 10)
-;   Inst 9:   b label3
-; Block 3:
-;   (original IR block: block1)
-;   (instruction range: 10 .. 11)
-;   Inst 10:   ret
-; }}
+; block0:
+;   subs xzr, x0, x2
+;   cset x9, hs
+;   subs xzr, x1, x3
+;   cset x12, ge
+;   csel x9, x9, x12, eq
+;   movz x12, #1
+;   subs xzr, x9, x12
+;   b.ge label1 ; b label2
+; block1:
+;   b label3
+; block2:
+;   b label3
+; block3:
+;   ret
 
 function %i128_bricmp_uge(i128, i128) {
 block0(v0: i128, v1: i128):
@@ -716,34 +484,21 @@ block1:
   return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (successor: Block 1)
-;   (successor: Block 2)
-;   (instruction range: 0 .. 8)
-;   Inst 0:   subs xzr, x0, x2
-;   Inst 1:   cset x0, hs
-;   Inst 2:   subs xzr, x1, x3
-;   Inst 3:   cset x1, hs
-;   Inst 4:   csel x0, x0, x1, eq
-;   Inst 5:   movz x1, #1
-;   Inst 6:   subs xzr, x0, x1
-;   Inst 7:   b.hs label1 ; b label2
-; Block 1:
-;   (successor: Block 3)
-;   (instruction range: 8 .. 9)
-;   Inst 8:   b label3
-; Block 2:
-;   (successor: Block 3)
-;   (instruction range: 9 .. 10)
-;   Inst 9:   b label3
-; Block 3:
-;   (original IR block: block1)
-;   (instruction range: 10 .. 11)
-;   Inst 10:   ret
-; }}
+; block0:
+;   subs xzr, x0, x2
+;   cset x9, hs
+;   subs xzr, x1, x3
+;   cset x12, hs
+;   csel x9, x9, x12, eq
+;   movz x12, #1
+;   subs xzr, x9, x12
+;   b.hs label1 ; b label2
+; block1:
+;   b label3
+; block2:
+;   b label3
+; block3:
+;   ret
 
 function %i128_bricmp_of(i128, i128) {
 block0(v0: i128, v1: i128):
@@ -754,29 +509,16 @@ block1:
   return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (successor: Block 1)
-;   (successor: Block 2)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   adds xzr, x0, x2
-;   Inst 1:   adcs xzr, x1, x3
-;   Inst 2:   b.vs label1 ; b label2
-; Block 1:
-;   (successor: Block 3)
-;   (instruction range: 3 .. 4)
-;   Inst 3:   b label3
-; Block 2:
-;   (successor: Block 3)
-;   (instruction range: 4 .. 5)
-;   Inst 4:   b label3
-; Block 3:
-;   (original IR block: block1)
-;   (instruction range: 5 .. 6)
-;   Inst 5:   ret
-; }}
+; block0:
+;   adds xzr, x0, x2
+;   adcs xzr, x1, x3
+;   b.vs label1 ; b label2
+; block1:
+;   b label3
+; block2:
+;   b label3
+; block3:
+;   ret
 
 function %i128_bricmp_nof(i128, i128) {
 block0(v0: i128, v1: i128):
@@ -787,27 +529,14 @@ block1:
   return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (successor: Block 1)
-;   (successor: Block 2)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   adds xzr, x0, x2
-;   Inst 1:   adcs xzr, x1, x3
-;   Inst 2:   b.vc label1 ; b label2
-; Block 1:
-;   (successor: Block 3)
-;   (instruction range: 3 .. 4)
-;   Inst 3:   b label3
-; Block 2:
-;   (successor: Block 3)
-;   (instruction range: 4 .. 5)
-;   Inst 4:   b label3
-; Block 3:
-;   (original IR block: block1)
-;   (instruction range: 5 .. 6)
-;   Inst 5:   ret
-; }}
+; block0:
+;   adds xzr, x0, x2
+;   adcs xzr, x1, x3
+;   b.vc label1 ; b label2
+; block1:
+;   b label3
+; block2:
+;   b label3
+; block3:
+;   ret
 
diff --git a/cranelift/filetests/filetests/isa/aarch64/condops.clif b/cranelift/filetests/filetests/isa/aarch64/condops.clif
index 9ab7573d95..6813b270e9 100644
--- a/cranelift/filetests/filetests/isa/aarch64/condops.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/condops.clif
@@ -10,16 +10,11 @@ block0(v0: i8, v1: i64, v2: i64):
   return v5
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 4)
-;   Inst 0:   uxtb w0, w0
-;   Inst 1:   subs wzr, w0, #42
-;   Inst 2:   csel x0, x1, x2, eq
-;   Inst 3:   ret
-; }}
+; block0:
+;   uxtb w8, w0
+;   subs wzr, w8, #42
+;   csel x0, x1, x2, eq
+;   ret
 
 function %g(i8) -> b1 {
 block0(v0: i8):
@@ -29,16 +24,11 @@ block0(v0: i8):
   return v5
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 4)
-;   Inst 0:   uxtb w0, w0
-;   Inst 1:   subs wzr, w0, #42
-;   Inst 2:   cset x0, eq
-;   Inst 3:   ret
-; }}
+; block0:
+;   uxtb w4, w0
+;   subs wzr, w4, #42
+;   cset x0, eq
+;   ret
 
 function %h(i8, i8, i8) -> i8 {
 block0(v0: i8, v1: i8, v2: i8):
@@ -46,16 +36,11 @@ block0(v0: i8, v1: i8, v2: i8):
   return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 4)
-;   Inst 0:   and x1, x1, x0
-;   Inst 1:   bic x0, x2, x0
-;   Inst 2:   orr x0, x0, x1
-;   Inst 3:   ret
-; }}
+; block0:
+;   and x8, x1, x0
+;   bic x0, x2, x0
+;   orr x0, x0, x8
+;   ret
 
 function %i(b1, i8, i8) -> i8 {
 block0(v0: b1, v1: i8, v2: i8):
@@ -63,16 +48,11 @@ block0(v0: b1, v1: i8, v2: i8):
   return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 4)
-;   Inst 0:   and w0, w0, #1
-;   Inst 1:   subs wzr, w0, wzr
-;   Inst 2:   csel x0, x1, x2, ne
-;   Inst 3:   ret
-; }}
+; block0:
+;   and w8, w0, #1
+;   subs wzr, w8, wzr
+;   csel x0, x1, x2, ne
+;   ret
 
 function %i(i32, i8, i8) -> i8 {
 block0(v0: i32, v1: i8, v2: i8):
@@ -82,15 +62,10 @@ block0(v0: i32, v1: i8, v2: i8):
   return v5
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   subs wzr, w0, #42
-;   Inst 1:   csel x0, x1, x2, eq
-;   Inst 2:   ret
-; }}
+; block0:
+;   subs wzr, w0, #42
+;   csel x0, x1, x2, eq
+;   ret
 
 function %i128_select(b1, i128, i128) -> i128 {
 block0(v0: b1, v1: i128, v2: i128):
@@ -98,15 +73,10 @@ block0(v0: b1, v1: i128, v2: i128):
   return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 5)
-;   Inst 0:   and w0, w0, #1
-;   Inst 1:   subs wzr, w0, wzr
-;   Inst 2:   csel x0, x2, x4, ne
-;   Inst 3:   csel x1, x3, x5, ne
-;   Inst 4:   ret
-; }}
+; block0:
+;   and w14, w0, #1
+;   subs wzr, w14, wzr
+;   csel x0, x2, x4, ne
+;   csel x1, x3, x5, ne
+;   ret
 
diff --git a/cranelift/filetests/filetests/isa/aarch64/constants.clif b/cranelift/filetests/filetests/isa/aarch64/constants.clif
index 9357a75c76..130ecdd475 100644
--- a/cranelift/filetests/filetests/isa/aarch64/constants.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/constants.clif
@@ -8,14 +8,9 @@ block0:
   return v0
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   movz x0, #255
-;   Inst 1:   ret
-; }}
+; block0:
+;   movz x0, #255
+;   ret
 
 function %f() -> b16 {
 block0:
@@ -23,14 +18,9 @@ block0:
   return v0
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   movz x0, #0
-;   Inst 1:   ret
-; }}
+; block0:
+;   movz x0, #0
+;   ret
 
 function %f() -> i64 {
 block0:
@@ -38,14 +28,9 @@ block0:
   return v0
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   movz x0, #0
-;   Inst 1:   ret
-; }}
+; block0:
+;   movz x0, #0
+;   ret
 
 function %f() -> i64 {
 block0:
@@ -53,14 +38,9 @@ block0:
   return v0
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   movz x0, #65535
-;   Inst 1:   ret
-; }}
+; block0:
+;   movz x0, #65535
+;   ret
 
 function %f() -> i64 {
 block0:
@@ -68,14 +48,9 @@ block0:
   return v0
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   movz x0, #65535, LSL #16
-;   Inst 1:   ret
-; }}
+; block0:
+;   movz x0, #65535, LSL #16
+;   ret
 
 function %f() -> i64 {
 block0:
@@ -83,14 +58,9 @@ block0:
   return v0
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   movz x0, #65535, LSL #32
-;   Inst 1:   ret
-; }}
+; block0:
+;   movz x0, #65535, LSL #32
+;   ret
 
 function %f() -> i64 {
 block0:
@@ -98,14 +68,9 @@ block0:
   return v0
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   movz x0, #65535, LSL #48
-;   Inst 1:   ret
-; }}
+; block0:
+;   movz x0, #65535, LSL #48
+;   ret
 
 function %f() -> i64 {
 block0:
@@ -113,14 +78,9 @@ block0:
   return v0
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   movn x0, #0
-;   Inst 1:   ret
-; }}
+; block0:
+;   movn x0, #0
+;   ret
 
 function %f() -> i64 {
 block0:
@@ -128,14 +88,9 @@ block0:
   return v0
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   movn x0, #65535
-;   Inst 1:   ret
-; }}
+; block0:
+;   movn x0, #65535
+;   ret
 
 function %f() -> i64 {
 block0:
@@ -143,14 +98,9 @@ block0:
   return v0
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   movn x0, #65535, LSL #16
-;   Inst 1:   ret
-; }}
+; block0:
+;   movn x0, #65535, LSL #16
+;   ret
 
 function %f() -> i64 {
 block0:
@@ -158,14 +108,9 @@ block0:
   return v0
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   movn x0, #65535, LSL #32
-;   Inst 1:   ret
-; }}
+; block0:
+;   movn x0, #65535, LSL #32
+;   ret
 
 function %f() -> i64 {
 block0:
@@ -173,14 +118,9 @@ block0:
   return v0
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   movn x0, #65535, LSL #48
-;   Inst 1:   ret
-; }}
+; block0:
+;   movn x0, #65535, LSL #48
+;   ret
 
 function %f() -> i64 {
 block0:
@@ -188,17 +128,12 @@ block0:
   return v0
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 5)
-;   Inst 0:   movz x0, #58
-;   Inst 1:   movk x0, #4626, LSL #16
-;   Inst 2:   movk x0, #61603, LSL #32
-;   Inst 3:   movk x0, #62283, LSL #48
-;   Inst 4:   ret
-; }}
+; block0:
+;   movz x0, #58
+;   movk x0, #4626, LSL #16
+;   movk x0, #61603, LSL #32
+;   movk x0, #62283, LSL #48
+;   ret
 
 function %f() -> i64 {
 block0:
@@ -206,15 +141,10 @@ block0:
   return v0
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   movz x0, #7924, LSL #16
-;   Inst 1:   movk x0, #4841, LSL #48
-;   Inst 2:   ret
-; }}
+; block0:
+;   movz x0, #7924, LSL #16
+;   movk x0, #4841, LSL #48
+;   ret
 
 function %f() -> i64 {
 block0:
@@ -222,15 +152,10 @@ block0:
   return v0
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   movn x0, #57611, LSL #16
-;   Inst 1:   movk x0, #4841, LSL #48
-;   Inst 2:   ret
-; }}
+; block0:
+;   movn x0, #57611, LSL #16
+;   movk x0, #4841, LSL #48
+;   ret
 
 function %f() -> i32 {
 block0:
@@ -238,14 +163,9 @@ block0:
   return v0
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   orr x0, xzr, #4294967295
-;   Inst 1:   ret
-; }}
+; block0:
+;   orr x0, xzr, #4294967295
+;   ret
 
 function %f() -> i32 {
 block0:
@@ -253,14 +173,9 @@ block0:
   return v0
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   movn w0, #8
-;   Inst 1:   ret
-; }}
+; block0:
+;   movn w0, #8
+;   ret
 
 function %f() -> i64 {
 block0:
@@ -268,14 +183,9 @@ block0:
   return v0
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   movn w0, #8
-;   Inst 1:   ret
-; }}
+; block0:
+;   movn w0, #8
+;   ret
 
 function %f() -> i64 {
 block0:
@@ -283,14 +193,9 @@ block0:
   return v0
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   movn x0, #8
-;   Inst 1:   ret
-; }}
+; block0:
+;   movn x0, #8
+;   ret
 
 function %f() -> f64 {
 block0:
@@ -298,14 +203,9 @@ block0:
   return v0
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   fmov d0, #1
-;   Inst 1:   ret
-; }}
+; block0:
+;   fmov d0, #1
+;   ret
 
 function %f() -> f32 {
 block0:
@@ -313,14 +213,9 @@ block0:
   return v0
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   fmov s0, #5
-;   Inst 1:   ret
-; }}
+; block0:
+;   fmov s0, #5
+;   ret
 
 function %f() -> f64 {
 block0:
@@ -328,15 +223,10 @@ block0:
   return v0
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   movz x0, #16457, LSL #48
-;   Inst 1:   fmov d0, x0
-;   Inst 2:   ret
-; }}
+; block0:
+;   movz x2, #16457, LSL #48
+;   fmov d0, x2
+;   ret
 
 function %f() -> f32 {
 block0:
@@ -344,15 +234,10 @@ block0:
   return v0
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   movz x0, #16968, LSL #16
-;   Inst 1:   fmov s0, w0
-;   Inst 2:   ret
-; }}
+; block0:
+;   movz x2, #16968, LSL #16
+;   fmov s0, w2
+;   ret
 
 function %f() -> f64 {
 block0:
@@ -360,14 +245,9 @@ block0:
   return v0
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   movi v0.2s, #0
-;   Inst 1:   ret
-; }}
+; block0:
+;   movi v0.2s, #0
+;   ret
 
 function %f() -> f32 {
 block0:
@@ -375,14 +255,9 @@ block0:
   return v0
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   movi v0.2s, #0
-;   Inst 1:   ret
-; }}
+; block0:
+;   movi v0.2s, #0
+;   ret
 
 function %f() -> f64 {
 block0:
@@ -390,14 +265,9 @@ block0:
   return v0
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   fmov d0, #-16
-;   Inst 1:   ret
-; }}
+; block0:
+;   fmov d0, #-16
+;   ret
 
 function %f() -> f32 {
 block0:
@@ -405,11 +275,7 @@ block0:
   return v0
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   fmov s0, #-16
-;   Inst 1:   ret
-; }}
+; block0:
+;   fmov s0, #-16
+;   ret
+
diff --git a/cranelift/filetests/filetests/isa/aarch64/extend-op.clif b/cranelift/filetests/filetests/isa/aarch64/extend-op.clif
index 4e80cfe7b8..9b31cd20fc 100644
--- a/cranelift/filetests/filetests/isa/aarch64/extend-op.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/extend-op.clif
@@ -10,15 +10,10 @@ block0(v0: i8):
   return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   sxtb x0, w0
-;   Inst 1:   add x0, x0, #42
-;   Inst 2:   ret
-; }}
+; block0:
+;   sxtb x4, w0
+;   add x0, x4, #42
+;   ret
 
 function %f2(i8, i64) -> i64 {
 block0(v0: i8, v1: i64):
@@ -27,14 +22,9 @@ block0(v0: i8, v1: i64):
   return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   add x0, x1, x0, SXTB
-;   Inst 1:   ret
-; }}
+; block0:
+;   add x0, x1, x0, SXTB
+;   ret
 
 function %i128_uextend_i64(i64) -> i128 {
 block0(v0: i64):
@@ -42,14 +32,9 @@ block0(v0: i64):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   movz x1, #0
-;   Inst 1:   ret
-; }}
+; block0:
+;   movz x1, #0
+;   ret
 
 function %i128_sextend_i64(i64) -> i128 {
 block0(v0: i64):
@@ -57,14 +42,9 @@ block0(v0: i64):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   asr x1, x0, #63
-;   Inst 1:   ret
-; }}
+; block0:
+;   asr x1, x0, #63
+;   ret
 
 function %i128_uextend_i32(i32) -> i128 {
 block0(v0: i32):
@@ -72,15 +52,10 @@ block0(v0: i32):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   mov w0, w0
-;   Inst 1:   movz x1, #0
-;   Inst 2:   ret
-; }}
+; block0:
+;   mov w0, w0
+;   movz x1, #0
+;   ret
 
 function %i128_sextend_i32(i32) -> i128 {
 block0(v0: i32):
@@ -88,15 +63,10 @@ block0(v0: i32):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   sxtw x0, w0
-;   Inst 1:   asr x1, x0, #63
-;   Inst 2:   ret
-; }}
+; block0:
+;   sxtw x0, w0
+;   asr x1, x0, #63
+;   ret
 
 function %i128_uextend_i16(i16) -> i128 {
 block0(v0: i16):
@@ -104,15 +74,10 @@ block0(v0: i16):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   uxth w0, w0
-;   Inst 1:   movz x1, #0
-;   Inst 2:   ret
-; }}
+; block0:
+;   uxth w0, w0
+;   movz x1, #0
+;   ret
 
 function %i128_sextend_i16(i16) -> i128 {
 block0(v0: i16):
@@ -120,15 +85,10 @@ block0(v0: i16):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   sxth x0, w0
-;   Inst 1:   asr x1, x0, #63
-;   Inst 2:   ret
-; }}
+; block0:
+;   sxth x0, w0
+;   asr x1, x0, #63
+;   ret
 
 function %i128_uextend_i8(i8) -> i128 {
 block0(v0: i8):
@@ -136,15 +96,10 @@ block0(v0: i8):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   uxtb w0, w0
-;   Inst 1:   movz x1, #0
-;   Inst 2:   ret
-; }}
+; block0:
+;   uxtb w0, w0
+;   movz x1, #0
+;   ret
 
 function %i128_sextend_i8(i8) -> i128 {
 block0(v0: i8):
@@ -152,15 +107,10 @@ block0(v0: i8):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   sxtb x0, w0
-;   Inst 1:   asr x1, x0, #63
-;   Inst 2:   ret
-; }}
+; block0:
+;   sxtb x0, w0
+;   asr x1, x0, #63
+;   ret
 
 function %i8x16_uextend_i16(i8x16) -> i16 {
 block0(v0: i8x16):
@@ -169,14 +119,9 @@ block0(v0: i8x16):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   umov w0, v0.b[1]
-;   Inst 1:   ret
-; }}
+; block0:
+;   umov w0, v0.b[1]
+;   ret
 
 function %i8x16_uextend_i32(i8x16) -> i32 {
 block0(v0: i8x16):
@@ -185,14 +130,9 @@ block0(v0: i8x16):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   umov w0, v0.b[1]
-;   Inst 1:   ret
-; }}
+; block0:
+;   umov w0, v0.b[1]
+;   ret
 
 function %i8x16_uextend_i64(i8x16) -> i64 {
 block0(v0: i8x16):
@@ -201,14 +141,9 @@ block0(v0: i8x16):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   umov w0, v0.b[1]
-;   Inst 1:   ret
-; }}
+; block0:
+;   umov w0, v0.b[1]
+;   ret
 
 function %i8x16_uextend_i128(i8x16) -> i128 {
 block0(v0: i8x16):
@@ -217,15 +152,10 @@ block0(v0: i8x16):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   umov w0, v0.b[1]
-;   Inst 1:   movz x1, #0
-;   Inst 2:   ret
-; }}
+; block0:
+;   umov w0, v0.b[1]
+;   movz x1, #0
+;   ret
 
 function %i8x16_sextend_i16(i8x16) -> i16 {
 block0(v0: i8x16):
@@ -234,14 +164,9 @@ block0(v0: i8x16):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   smov w0, v0.b[1]
-;   Inst 1:   ret
-; }}
+; block0:
+;   smov w0, v0.b[1]
+;   ret
 
 function %i8x16_sextend_i32(i8x16) -> i32 {
 block0(v0: i8x16):
@@ -250,14 +175,9 @@ block0(v0: i8x16):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   smov w0, v0.b[1]
-;   Inst 1:   ret
-; }}
+; block0:
+;   smov w0, v0.b[1]
+;   ret
 
 function %i8x16_sextend_i64(i8x16) -> i64 {
 block0(v0: i8x16):
@@ -266,14 +186,9 @@ block0(v0: i8x16):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   smov x0, v0.b[1]
-;   Inst 1:   ret
-; }}
+; block0:
+;   smov x0, v0.b[1]
+;   ret
 
 function %i8x16_sextend_i128(i8x16) -> i128 {
 block0(v0: i8x16):
@@ -282,15 +197,10 @@ block0(v0: i8x16):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   smov x0, v0.b[1]
-;   Inst 1:   asr x1, x0, #63
-;   Inst 2:   ret
-; }}
+; block0:
+;   smov x0, v0.b[1]
+;   asr x1, x0, #63
+;   ret
 
 function %i16x8_uextend_i32(i16x8) -> i32 {
 block0(v0: i16x8):
@@ -299,14 +209,9 @@ block0(v0: i16x8):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   umov w0, v0.h[1]
-;   Inst 1:   ret
-; }}
+; block0:
+;   umov w0, v0.h[1]
+;   ret
 
 function %i16x8_uextend_i64(i16x8) -> i64 {
 block0(v0: i16x8):
@@ -315,14 +220,9 @@ block0(v0: i16x8):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   umov w0, v0.h[1]
-;   Inst 1:   ret
-; }}
+; block0:
+;   umov w0, v0.h[1]
+;   ret
 
 function %i16x8_uextend_i128(i16x8) -> i128 {
 block0(v0: i16x8):
@@ -331,15 +231,10 @@ block0(v0: i16x8):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   umov w0, v0.h[1]
-;   Inst 1:   movz x1, #0
-;   Inst 2:   ret
-; }}
+; block0:
+;   umov w0, v0.h[1]
+;   movz x1, #0
+;   ret
 
 function %i16x8_sextend_i32(i16x8) -> i32 {
 block0(v0: i16x8):
@@ -348,14 +243,9 @@ block0(v0: i16x8):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   smov w0, v0.h[1]
-;   Inst 1:   ret
-; }}
+; block0:
+;   smov w0, v0.h[1]
+;   ret
 
 function %i16x8_sextend_i64(i16x8) -> i64 {
 block0(v0: i16x8):
@@ -364,14 +254,9 @@ block0(v0: i16x8):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   smov x0, v0.h[1]
-;   Inst 1:   ret
-; }}
+; block0:
+;   smov x0, v0.h[1]
+;   ret
 
 function %i16x8_sextend_i128(i16x8) -> i128 {
 block0(v0: i16x8):
@@ -380,15 +265,10 @@ block0(v0: i16x8):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   smov x0, v0.h[1]
-;   Inst 1:   asr x1, x0, #63
-;   Inst 2:   ret
-; }}
+; block0:
+;   smov x0, v0.h[1]
+;   asr x1, x0, #63
+;   ret
 
 function %i32x4_uextend_i64(i32x4) -> i64 {
 block0(v0: i32x4):
@@ -397,14 +277,9 @@ block0(v0: i32x4):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   mov w0, v0.s[1]
-;   Inst 1:   ret
-; }}
+; block0:
+;   mov w0, v0.s[1]
+;   ret
 
 function %i32x4_uextend_i128(i32x4) -> i128 {
 block0(v0: i32x4):
@@ -413,15 +288,10 @@ block0(v0: i32x4):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   mov w0, v0.s[1]
-;   Inst 1:   movz x1, #0
-;   Inst 2:   ret
-; }}
+; block0:
+;   mov w0, v0.s[1]
+;   movz x1, #0
+;   ret
 
 function %i32x4_sextend_i64(i32x4) -> i64 {
 block0(v0: i32x4):
@@ -430,14 +300,9 @@ block0(v0: i32x4):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   smov x0, v0.s[1]
-;   Inst 1:   ret
-; }}
+; block0:
+;   smov x0, v0.s[1]
+;   ret
 
 function %i32x4_sextend_i128(i32x4) -> i128 {
 block0(v0: i32x4):
@@ -446,15 +311,10 @@ block0(v0: i32x4):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   smov x0, v0.s[1]
-;   Inst 1:   asr x1, x0, #63
-;   Inst 2:   ret
-; }}
+; block0:
+;   smov x0, v0.s[1]
+;   asr x1, x0, #63
+;   ret
 
 function %i64x2_uextend_i128(i64x2) -> i128 {
 block0(v0: i64x2):
@@ -463,15 +323,10 @@ block0(v0: i64x2):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   mov x0, v0.d[1]
-;   Inst 1:   movz x1, #0
-;   Inst 2:   ret
-; }}
+; block0:
+;   mov x0, v0.d[1]
+;   movz x1, #0
+;   ret
 
 function %i64x2_sextend_i128(i64x2) -> i128 {
 block0(v0: i64x2):
@@ -480,13 +335,8 @@ block0(v0: i64x2):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   mov x0, v0.d[1]
-;   Inst 1:   asr x1, x0, #63
-;   Inst 2:   ret
-; }}
+; block0:
+;   mov x0, v0.d[1]
+;   asr x1, x0, #63
+;   ret
 
diff --git a/cranelift/filetests/filetests/isa/aarch64/fcvt-small.clif b/cranelift/filetests/filetests/isa/aarch64/fcvt-small.clif
index d35b6ef54d..0755c94feb 100644
--- a/cranelift/filetests/filetests/isa/aarch64/fcvt-small.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/fcvt-small.clif
@@ -8,15 +8,10 @@ block0(v0: i8):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   uxtb w0, w0
-;   Inst 1:   ucvtf s0, w0
-;   Inst 2:   ret
-; }}
+; block0:
+;   uxtb w4, w0
+;   ucvtf s0, w4
+;   ret
 
 function u0:0(i8) -> f64 {
 block0(v0: i8):
@@ -24,15 +19,10 @@ block0(v0: i8):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   uxtb w0, w0
-;   Inst 1:   ucvtf d0, w0
-;   Inst 2:   ret
-; }}
+; block0:
+;   uxtb w4, w0
+;   ucvtf d0, w4
+;   ret
 
 function u0:0(i16) -> f32 {
 block0(v0: i16):
@@ -40,15 +30,10 @@ block0(v0: i16):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   uxth w0, w0
-;   Inst 1:   ucvtf s0, w0
-;   Inst 2:   ret
-; }}
+; block0:
+;   uxth w4, w0
+;   ucvtf s0, w4
+;   ret
 
 function u0:0(i16) -> f64 {
 block0(v0: i16):
@@ -56,15 +41,10 @@ block0(v0: i16):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   uxth w0, w0
-;   Inst 1:   ucvtf d0, w0
-;   Inst 2:   ret
-; }}
+; block0:
+;   uxth w4, w0
+;   ucvtf d0, w4
+;   ret
 
 function u0:0(f32) -> i8 {
 block0(v0: f32):
@@ -72,23 +52,18 @@ block0(v0: f32):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 11)
-;   Inst 0:   fcmp s0, s0
-;   Inst 1:   b.vc 8 ; udf
-;   Inst 2:   fmov s1, #-1
-;   Inst 3:   fcmp s0, s1
-;   Inst 4:   b.gt 8 ; udf
-;   Inst 5:   movz x0, #17280, LSL #16
-;   Inst 6:   fmov s1, w0
-;   Inst 7:   fcmp s0, s1
-;   Inst 8:   b.mi 8 ; udf
-;   Inst 9:   fcvtzu w0, s0
-;   Inst 10:   ret
-; }}
+; block0:
+;   fcmp s0, s0
+;   b.vc 8 ; udf
+;   fmov s6, #-1
+;   fcmp s0, s6
+;   b.gt 8 ; udf
+;   movz x10, #17280, LSL #16
+;   fmov s6, w10
+;   fcmp s0, s6
+;   b.mi 8 ; udf
+;   fcvtzu w0, s0
+;   ret
 
 function u0:0(f64) -> i8 {
 block0(v0: f64):
@@ -96,23 +71,18 @@ block0(v0: f64):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 11)
-;   Inst 0:   fcmp d0, d0
-;   Inst 1:   b.vc 8 ; udf
-;   Inst 2:   fmov d1, #-1
-;   Inst 3:   fcmp d0, d1
-;   Inst 4:   b.gt 8 ; udf
-;   Inst 5:   movz x0, #16496, LSL #48
-;   Inst 6:   fmov d1, x0
-;   Inst 7:   fcmp d0, d1
-;   Inst 8:   b.mi 8 ; udf
-;   Inst 9:   fcvtzu w0, d0
-;   Inst 10:   ret
-; }}
+; block0:
+;   fcmp d0, d0
+;   b.vc 8 ; udf
+;   fmov d6, #-1
+;   fcmp d0, d6
+;   b.gt 8 ; udf
+;   movz x10, #16496, LSL #48
+;   fmov d6, x10
+;   fcmp d0, d6
+;   b.mi 8 ; udf
+;   fcvtzu w0, d0
+;   ret
 
 function u0:0(f32) -> i16 {
 block0(v0: f32):
@@ -120,23 +90,18 @@ block0(v0: f32):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 11)
-;   Inst 0:   fcmp s0, s0
-;   Inst 1:   b.vc 8 ; udf
-;   Inst 2:   fmov s1, #-1
-;   Inst 3:   fcmp s0, s1
-;   Inst 4:   b.gt 8 ; udf
-;   Inst 5:   movz x0, #18304, LSL #16
-;   Inst 6:   fmov s1, w0
-;   Inst 7:   fcmp s0, s1
-;   Inst 8:   b.mi 8 ; udf
-;   Inst 9:   fcvtzu w0, s0
-;   Inst 10:   ret
-; }}
+; block0:
+;   fcmp s0, s0
+;   b.vc 8 ; udf
+;   fmov s6, #-1
+;   fcmp s0, s6
+;   b.gt 8 ; udf
+;   movz x10, #18304, LSL #16
+;   fmov s6, w10
+;   fcmp s0, s6
+;   b.mi 8 ; udf
+;   fcvtzu w0, s0
+;   ret
 
 function u0:0(f64) -> i16 {
 block0(v0: f64):
@@ -144,21 +109,16 @@ block0(v0: f64):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 11)
-;   Inst 0:   fcmp d0, d0
-;   Inst 1:   b.vc 8 ; udf
-;   Inst 2:   fmov d1, #-1
-;   Inst 3:   fcmp d0, d1
-;   Inst 4:   b.gt 8 ; udf
-;   Inst 5:   movz x0, #16624, LSL #48
-;   Inst 6:   fmov d1, x0
-;   Inst 7:   fcmp d0, d1
-;   Inst 8:   b.mi 8 ; udf
-;   Inst 9:   fcvtzu w0, d0
-;   Inst 10:   ret
-; }}
+; block0:
+;   fcmp d0, d0
+;   b.vc 8 ; udf
+;   fmov d6, #-1
+;   fcmp d0, d6
+;   b.gt 8 ; udf
+;   movz x10, #16624, LSL #48
+;   fmov d6, x10
+;   fcmp d0, d6
+;   b.mi 8 ; udf
+;   fcvtzu w0, d0
+;   ret
 
diff --git a/cranelift/filetests/filetests/isa/aarch64/floating-point.clif b/cranelift/filetests/filetests/isa/aarch64/floating-point.clif
index 16b34f759f..a8c3cb8191 100644
--- a/cranelift/filetests/filetests/isa/aarch64/floating-point.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/floating-point.clif
@@ -8,14 +8,9 @@ block0(v0: f32, v1: f32):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   fadd s0, s0, s1
-;   Inst 1:   ret
-; }}
+; block0:
+;   fadd s0, s0, s1
+;   ret
 
 function %f2(f64, f64) -> f64 {
 block0(v0: f64, v1: f64):
@@ -23,14 +18,9 @@ block0(v0: f64, v1: f64):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   fadd d0, d0, d1
-;   Inst 1:   ret
-; }}
+; block0:
+;   fadd d0, d0, d1
+;   ret
 
 function %f3(f32, f32) -> f32 {
 block0(v0: f32, v1: f32):
@@ -38,14 +28,9 @@ block0(v0: f32, v1: f32):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   fsub s0, s0, s1
-;   Inst 1:   ret
-; }}
+; block0:
+;   fsub s0, s0, s1
+;   ret
 
 function %f4(f64, f64) -> f64 {
 block0(v0: f64, v1: f64):
@@ -53,14 +38,9 @@ block0(v0: f64, v1: f64):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   fsub d0, d0, d1
-;   Inst 1:   ret
-; }}
+; block0:
+;   fsub d0, d0, d1
+;   ret
 
 function %f5(f32, f32) -> f32 {
 block0(v0: f32, v1: f32):
@@ -68,14 +48,9 @@ block0(v0: f32, v1: f32):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   fmul s0, s0, s1
-;   Inst 1:   ret
-; }}
+; block0:
+;   fmul s0, s0, s1
+;   ret
 
 function %f6(f64, f64) -> f64 {
 block0(v0: f64, v1: f64):
@@ -83,14 +58,9 @@ block0(v0: f64, v1: f64):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   fmul d0, d0, d1
-;   Inst 1:   ret
-; }}
+; block0:
+;   fmul d0, d0, d1
+;   ret
 
 function %f7(f32, f32) -> f32 {
 block0(v0: f32, v1: f32):
@@ -98,14 +68,9 @@ block0(v0: f32, v1: f32):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   fdiv s0, s0, s1
-;   Inst 1:   ret
-; }}
+; block0:
+;   fdiv s0, s0, s1
+;   ret
 
 function %f8(f64, f64) -> f64 {
 block0(v0: f64, v1: f64):
@@ -113,14 +78,9 @@ block0(v0: f64, v1: f64):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   fdiv d0, d0, d1
-;   Inst 1:   ret
-; }}
+; block0:
+;   fdiv d0, d0, d1
+;   ret
 
 function %f9(f32, f32) -> f32 {
 block0(v0: f32, v1: f32):
@@ -128,14 +88,9 @@ block0(v0: f32, v1: f32):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   fmin s0, s0, s1
-;   Inst 1:   ret
-; }}
+; block0:
+;   fmin s0, s0, s1
+;   ret
 
 function %f10(f64, f64) -> f64 {
 block0(v0: f64, v1: f64):
@@ -143,14 +98,9 @@ block0(v0: f64, v1: f64):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   fmin d0, d0, d1
-;   Inst 1:   ret
-; }}
+; block0:
+;   fmin d0, d0, d1
+;   ret
 
 function %f11(f32, f32) -> f32 {
 block0(v0: f32, v1: f32):
@@ -158,14 +108,9 @@ block0(v0: f32, v1: f32):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   fmax s0, s0, s1
-;   Inst 1:   ret
-; }}
+; block0:
+;   fmax s0, s0, s1
+;   ret
 
 function %f12(f64, f64) -> f64 {
 block0(v0: f64, v1: f64):
@@ -173,14 +118,9 @@ block0(v0: f64, v1: f64):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   fmax d0, d0, d1
-;   Inst 1:   ret
-; }}
+; block0:
+;   fmax d0, d0, d1
+;   ret
 
 function %f13(f32) -> f32 {
 block0(v0: f32):
@@ -188,14 +128,9 @@ block0(v0: f32):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   fsqrt s0, s0
-;   Inst 1:   ret
-; }}
+; block0:
+;   fsqrt s0, s0
+;   ret
 
 function %f15(f64) -> f64 {
 block0(v0: f64):
@@ -203,14 +138,9 @@ block0(v0: f64):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   fsqrt d0, d0
-;   Inst 1:   ret
-; }}
+; block0:
+;   fsqrt d0, d0
+;   ret
 
 function %f16(f32) -> f32 {
 block0(v0: f32):
@@ -218,14 +148,9 @@ block0(v0: f32):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   fabs s0, s0
-;   Inst 1:   ret
-; }}
+; block0:
+;   fabs s0, s0
+;   ret
 
 function %f17(f64) -> f64 {
 block0(v0: f64):
@@ -233,14 +158,9 @@ block0(v0: f64):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   fabs d0, d0
-;   Inst 1:   ret
-; }}
+; block0:
+;   fabs d0, d0
+;   ret
 
 function %f18(f32) -> f32 {
 block0(v0: f32):
@@ -248,14 +168,9 @@ block0(v0: f32):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   fneg s0, s0
-;   Inst 1:   ret
-; }}
+; block0:
+;   fneg s0, s0
+;   ret
 
 function %f19(f64) -> f64 {
 block0(v0: f64):
@@ -263,14 +178,9 @@ block0(v0: f64):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   fneg d0, d0
-;   Inst 1:   ret
-; }}
+; block0:
+;   fneg d0, d0
+;   ret
 
 function %f20(f32) -> f64 {
 block0(v0: f32):
@@ -278,14 +188,9 @@ block0(v0: f32):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   fcvt d0, s0
-;   Inst 1:   ret
-; }}
+; block0:
+;   fcvt d0, s0
+;   ret
 
 function %f21(f64) -> f32 {
 block0(v0: f64):
@@ -293,14 +198,9 @@ block0(v0: f64):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   fcvt s0, d0
-;   Inst 1:   ret
-; }}
+; block0:
+;   fcvt s0, d0
+;   ret
 
 function %f22(f32) -> f32 {
 block0(v0: f32):
@@ -308,14 +208,9 @@ block0(v0: f32):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   frintp s0, s0
-;   Inst 1:   ret
-; }}
+; block0:
+;   frintp s0, s0
+;   ret
 
 function %f22(f64) -> f64 {
 block0(v0: f64):
@@ -323,14 +218,9 @@ block0(v0: f64):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   frintp d0, d0
-;   Inst 1:   ret
-; }}
+; block0:
+;   frintp d0, d0
+;   ret
 
 function %f23(f32) -> f32 {
 block0(v0: f32):
@@ -338,14 +228,9 @@ block0(v0: f32):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   frintm s0, s0
-;   Inst 1:   ret
-; }}
+; block0:
+;   frintm s0, s0
+;   ret
 
 function %f24(f64) -> f64 {
 block0(v0: f64):
@@ -353,14 +238,9 @@ block0(v0: f64):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   frintm d0, d0
-;   Inst 1:   ret
-; }}
+; block0:
+;   frintm d0, d0
+;   ret
 
 function %f25(f32) -> f32 {
 block0(v0: f32):
@@ -368,14 +248,9 @@ block0(v0: f32):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   frintz s0, s0
-;   Inst 1:   ret
-; }}
+; block0:
+;   frintz s0, s0
+;   ret
 
 function %f26(f64) -> f64 {
 block0(v0: f64):
@@ -383,14 +258,9 @@ block0(v0: f64):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   frintz d0, d0
-;   Inst 1:   ret
-; }}
+; block0:
+;   frintz d0, d0
+;   ret
 
 function %f27(f32) -> f32 {
 block0(v0: f32):
@@ -398,14 +268,9 @@ block0(v0: f32):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   frintn s0, s0
-;   Inst 1:   ret
-; }}
+; block0:
+;   frintn s0, s0
+;   ret
 
 function %f28(f64) -> f64 {
 block0(v0: f64):
@@ -413,14 +278,9 @@ block0(v0: f64):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   frintn d0, d0
-;   Inst 1:   ret
-; }}
+; block0:
+;   frintn d0, d0
+;   ret
 
 function %f29(f32, f32, f32) -> f32 {
 block0(v0: f32, v1: f32, v2: f32):
@@ -428,14 +288,9 @@ block0(v0: f32, v1: f32, v2: f32):
   return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   fmadd s0, s0, s1, s2
-;   Inst 1:   ret
-; }}
+; block0:
+;   fmadd s0, s0, s1, s2
+;   ret
 
 function %f30(f64, f64, f64) -> f64 {
 block0(v0: f64, v1: f64, v2: f64):
@@ -443,14 +298,9 @@ block0(v0: f64, v1: f64, v2: f64):
   return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   fmadd d0, d0, d1, d2
-;   Inst 1:   ret
-; }}
+; block0:
+;   fmadd d0, d0, d1, d2
+;   ret
 
 function %f31(f32, f32) -> f32 {
 block0(v0: f32, v1: f32):
@@ -458,15 +308,10 @@ block0(v0: f32, v1: f32):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   ushr v1.2s, v1.2s, #31
-;   Inst 1:   sli v0.2s, v1.2s, #31
-;   Inst 2:   ret
-; }}
+; block0:
+;   ushr v7.2s, v1.2s, #31
+;   sli v0.2s, v7.2s, #31
+;   ret
 
 function %f32(f64, f64) -> f64 {
 block0(v0: f64, v1: f64):
@@ -474,15 +319,10 @@ block0(v0: f64, v1: f64):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   ushr d1, d1, #63
-;   Inst 1:   sli d0, d1, #63
-;   Inst 2:   ret
-; }}
+; block0:
+;   ushr d7, d1, #63
+;   sli d0, d7, #63
+;   ret
 
 function %f33(f32) -> i32 {
 block0(v0: f32):
@@ -490,23 +330,18 @@ block0(v0: f32):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 11)
-;   Inst 0:   fcmp s0, s0
-;   Inst 1:   b.vc 8 ; udf
-;   Inst 2:   fmov s1, #-1
-;   Inst 3:   fcmp s0, s1
-;   Inst 4:   b.gt 8 ; udf
-;   Inst 5:   movz x0, #20352, LSL #16
-;   Inst 6:   fmov s1, w0
-;   Inst 7:   fcmp s0, s1
-;   Inst 8:   b.mi 8 ; udf
-;   Inst 9:   fcvtzu w0, s0
-;   Inst 10:   ret
-; }}
+; block0:
+;   fcmp s0, s0
+;   b.vc 8 ; udf
+;   fmov s6, #-1
+;   fcmp s0, s6
+;   b.gt 8 ; udf
+;   movz x10, #20352, LSL #16
+;   fmov s6, w10
+;   fcmp s0, s6
+;   b.mi 8 ; udf
+;   fcvtzu w0, s0
+;   ret
 
 function %f34(f32) -> i32 {
 block0(v0: f32):
@@ -514,24 +349,19 @@ block0(v0: f32):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 12)
-;   Inst 0:   fcmp s0, s0
-;   Inst 1:   b.vc 8 ; udf
-;   Inst 2:   movz x0, #52992, LSL #16
-;   Inst 3:   fmov s1, w0
-;   Inst 4:   fcmp s0, s1
-;   Inst 5:   b.ge 8 ; udf
-;   Inst 6:   movz x0, #20224, LSL #16
-;   Inst 7:   fmov s1, w0
-;   Inst 8:   fcmp s0, s1
-;   Inst 9:   b.mi 8 ; udf
-;   Inst 10:   fcvtzs w0, s0
-;   Inst 11:   ret
-; }}
+; block0:
+;   fcmp s0, s0
+;   b.vc 8 ; udf
+;   movz x7, #52992, LSL #16
+;   fmov s7, w7
+;   fcmp s0, s7
+;   b.ge 8 ; udf
+;   movz x12, #20224, LSL #16
+;   fmov s7, w12
+;   fcmp s0, s7
+;   b.mi 8 ; udf
+;   fcvtzs w0, s0
+;   ret
 
 function %f35(f32) -> i64 {
 block0(v0: f32):
@@ -539,23 +369,18 @@ block0(v0: f32):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 11)
-;   Inst 0:   fcmp s0, s0
-;   Inst 1:   b.vc 8 ; udf
-;   Inst 2:   fmov s1, #-1
-;   Inst 3:   fcmp s0, s1
-;   Inst 4:   b.gt 8 ; udf
-;   Inst 5:   movz x0, #24448, LSL #16
-;   Inst 6:   fmov s1, w0
-;   Inst 7:   fcmp s0, s1
-;   Inst 8:   b.mi 8 ; udf
-;   Inst 9:   fcvtzu x0, s0
-;   Inst 10:   ret
-; }}
+; block0:
+;   fcmp s0, s0
+;   b.vc 8 ; udf
+;   fmov s6, #-1
+;   fcmp s0, s6
+;   b.gt 8 ; udf
+;   movz x10, #24448, LSL #16
+;   fmov s6, w10
+;   fcmp s0, s6
+;   b.mi 8 ; udf
+;   fcvtzu x0, s0
+;   ret
 
 function %f36(f32) -> i64 {
 block0(v0: f32):
@@ -563,24 +388,19 @@ block0(v0: f32):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 12)
-;   Inst 0:   fcmp s0, s0
-;   Inst 1:   b.vc 8 ; udf
-;   Inst 2:   movz x0, #57088, LSL #16
-;   Inst 3:   fmov s1, w0
-;   Inst 4:   fcmp s0, s1
-;   Inst 5:   b.ge 8 ; udf
-;   Inst 6:   movz x0, #24320, LSL #16
-;   Inst 7:   fmov s1, w0
-;   Inst 8:   fcmp s0, s1
-;   Inst 9:   b.mi 8 ; udf
-;   Inst 10:   fcvtzs x0, s0
-;   Inst 11:   ret
-; }}
+; block0:
+;   fcmp s0, s0
+;   b.vc 8 ; udf
+;   movz x7, #57088, LSL #16
+;   fmov s7, w7
+;   fcmp s0, s7
+;   b.ge 8 ; udf
+;   movz x12, #24320, LSL #16
+;   fmov s7, w12
+;   fcmp s0, s7
+;   b.mi 8 ; udf
+;   fcvtzs x0, s0
+;   ret
 
 function %f37(f64) -> i32 {
 block0(v0: f64):
@@ -588,23 +408,18 @@ block0(v0: f64):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 11)
-;   Inst 0:   fcmp d0, d0
-;   Inst 1:   b.vc 8 ; udf
-;   Inst 2:   fmov d1, #-1
-;   Inst 3:   fcmp d0, d1
-;   Inst 4:   b.gt 8 ; udf
-;   Inst 5:   movz x0, #16880, LSL #48
-;   Inst 6:   fmov d1, x0
-;   Inst 7:   fcmp d0, d1
-;   Inst 8:   b.mi 8 ; udf
-;   Inst 9:   fcvtzu w0, d0
-;   Inst 10:   ret
-; }}
+; block0:
+;   fcmp d0, d0
+;   b.vc 8 ; udf
+;   fmov d6, #-1
+;   fcmp d0, d6
+;   b.gt 8 ; udf
+;   movz x10, #16880, LSL #48
+;   fmov d6, x10
+;   fcmp d0, d6
+;   b.mi 8 ; udf
+;   fcvtzu w0, d0
+;   ret
 
 function %f38(f64) -> i32 {
 block0(v0: f64):
@@ -612,23 +427,18 @@ block0(v0: f64):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 11)
-;   Inst 0:   fcmp d0, d0
-;   Inst 1:   b.vc 8 ; udf
-;   Inst 2:   ldr d1, pc+8 ; b 12 ; data.f64 -2147483649
-;   Inst 3:   fcmp d0, d1
-;   Inst 4:   b.gt 8 ; udf
-;   Inst 5:   movz x0, #16864, LSL #48
-;   Inst 6:   fmov d1, x0
-;   Inst 7:   fcmp d0, d1
-;   Inst 8:   b.mi 8 ; udf
-;   Inst 9:   fcvtzs w0, d0
-;   Inst 10:   ret
-; }}
+; block0:
+;   fcmp d0, d0
+;   b.vc 8 ; udf
+;   ldr d6, pc+8 ; b 12 ; data.f64 -2147483649
+;   fcmp d0, d6
+;   b.gt 8 ; udf
+;   movz x10, #16864, LSL #48
+;   fmov d6, x10
+;   fcmp d0, d6
+;   b.mi 8 ; udf
+;   fcvtzs w0, d0
+;   ret
 
 function %f39(f64) -> i64 {
 block0(v0: f64):
@@ -636,23 +446,18 @@ block0(v0: f64):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 11)
-;   Inst 0:   fcmp d0, d0
-;   Inst 1:   b.vc 8 ; udf
-;   Inst 2:   fmov d1, #-1
-;   Inst 3:   fcmp d0, d1
-;   Inst 4:   b.gt 8 ; udf
-;   Inst 5:   movz x0, #17392, LSL #48
-;   Inst 6:   fmov d1, x0
-;   Inst 7:   fcmp d0, d1
-;   Inst 8:   b.mi 8 ; udf
-;   Inst 9:   fcvtzu x0, d0
-;   Inst 10:   ret
-; }}
+; block0:
+;   fcmp d0, d0
+;   b.vc 8 ; udf
+;   fmov d6, #-1
+;   fcmp d0, d6
+;   b.gt 8 ; udf
+;   movz x10, #17392, LSL #48
+;   fmov d6, x10
+;   fcmp d0, d6
+;   b.mi 8 ; udf
+;   fcvtzu x0, d0
+;   ret
 
 function %f40(f64) -> i64 {
 block0(v0: f64):
@@ -660,24 +465,19 @@ block0(v0: f64):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 12)
-;   Inst 0:   fcmp d0, d0
-;   Inst 1:   b.vc 8 ; udf
-;   Inst 2:   movz x0, #50144, LSL #48
-;   Inst 3:   fmov d1, x0
-;   Inst 4:   fcmp d0, d1
-;   Inst 5:   b.ge 8 ; udf
-;   Inst 6:   movz x0, #17376, LSL #48
-;   Inst 7:   fmov d1, x0
-;   Inst 8:   fcmp d0, d1
-;   Inst 9:   b.mi 8 ; udf
-;   Inst 10:   fcvtzs x0, d0
-;   Inst 11:   ret
-; }}
+; block0:
+;   fcmp d0, d0
+;   b.vc 8 ; udf
+;   movz x7, #50144, LSL #48
+;   fmov d7, x7
+;   fcmp d0, d7
+;   b.ge 8 ; udf
+;   movz x12, #17376, LSL #48
+;   fmov d7, x12
+;   fcmp d0, d7
+;   b.mi 8 ; udf
+;   fcvtzs x0, d0
+;   ret
 
 function %f41(i32) -> f32 {
 block0(v0: i32):
@@ -685,14 +485,9 @@ block0(v0: i32):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   ucvtf s0, w0
-;   Inst 1:   ret
-; }}
+; block0:
+;   ucvtf s0, w0
+;   ret
 
 function %f42(i32) -> f32 {
 block0(v0: i32):
@@ -700,14 +495,9 @@ block0(v0: i32):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   scvtf s0, w0
-;   Inst 1:   ret
-; }}
+; block0:
+;   scvtf s0, w0
+;   ret
 
 function %f43(i64) -> f32 {
 block0(v0: i64):
@@ -715,14 +505,9 @@ block0(v0: i64):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   ucvtf s0, x0
-;   Inst 1:   ret
-; }}
+; block0:
+;   ucvtf s0, x0
+;   ret
 
 function %f44(i64) -> f32 {
 block0(v0: i64):
@@ -730,14 +515,9 @@ block0(v0: i64):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   scvtf s0, x0
-;   Inst 1:   ret
-; }}
+; block0:
+;   scvtf s0, x0
+;   ret
 
 function %f45(i32) -> f64 {
 block0(v0: i32):
@@ -745,14 +525,9 @@ block0(v0: i32):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   ucvtf d0, w0
-;   Inst 1:   ret
-; }}
+; block0:
+;   ucvtf d0, w0
+;   ret
 
 function %f46(i32) -> f64 {
 block0(v0: i32):
@@ -760,14 +535,9 @@ block0(v0: i32):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   scvtf d0, w0
-;   Inst 1:   ret
-; }}
+; block0:
+;   scvtf d0, w0
+;   ret
 
 function %f47(i64) -> f64 {
 block0(v0: i64):
@@ -775,14 +545,9 @@ block0(v0: i64):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   ucvtf d0, x0
-;   Inst 1:   ret
-; }}
+; block0:
+;   ucvtf d0, x0
+;   ret
 
 function %f48(i64) -> f64 {
 block0(v0: i64):
@@ -790,14 +555,9 @@ block0(v0: i64):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   scvtf d0, x0
-;   Inst 1:   ret
-; }}
+; block0:
+;   scvtf d0, x0
+;   ret
 
 function %f49(f32) -> i32 {
 block0(v0: f32):
@@ -805,21 +565,16 @@ block0(v0: f32):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 9)
-;   Inst 0:   movz x0, #20352, LSL #16
-;   Inst 1:   fmov s1, w0
-;   Inst 2:   fmin s2, s0, s1
-;   Inst 3:   movi v1.2s, #0
-;   Inst 4:   fmax s2, s2, s1
-;   Inst 5:   fcmp s0, s0
-;   Inst 6:   fcsel s0, s1, s2, ne
-;   Inst 7:   fcvtzu w0, s0
-;   Inst 8:   ret
-; }}
+; block0:
+;   movz x6, #20352, LSL #16
+;   fmov s5, w6
+;   fmin s7, s0, s5
+;   movi v5.2s, #0
+;   fmax s7, s7, s5
+;   fcmp s0, s0
+;   fcsel s7, s5, s7, ne
+;   fcvtzu w0, s7
+;   ret
 
 function %f50(f32) -> i32 {
 block0(v0: f32):
@@ -827,23 +582,18 @@ block0(v0: f32):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 11)
-;   Inst 0:   movz x0, #20224, LSL #16
-;   Inst 1:   fmov s1, w0
-;   Inst 2:   fmin s1, s0, s1
-;   Inst 3:   movz x0, #52992, LSL #16
-;   Inst 4:   fmov s2, w0
-;   Inst 5:   fmax s1, s1, s2
-;   Inst 6:   movi v2.2s, #0
-;   Inst 7:   fcmp s0, s0
-;   Inst 8:   fcsel s0, s2, s1, ne
-;   Inst 9:   fcvtzs w0, s0
-;   Inst 10:   ret
-; }}
+; block0:
+;   movz x6, #20224, LSL #16
+;   fmov s5, w6
+;   fmin s7, s0, s5
+;   movz x10, #52992, LSL #16
+;   fmov s5, w10
+;   fmax s7, s7, s5
+;   movi v5.2s, #0
+;   fcmp s0, s0
+;   fcsel s7, s5, s7, ne
+;   fcvtzs w0, s7
+;   ret
 
 function %f51(f32) -> i64 {
 block0(v0: f32):
@@ -851,21 +601,16 @@ block0(v0: f32):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 9)
-;   Inst 0:   movz x0, #24448, LSL #16
-;   Inst 1:   fmov s1, w0
-;   Inst 2:   fmin s2, s0, s1
-;   Inst 3:   movi v1.2s, #0
-;   Inst 4:   fmax s2, s2, s1
-;   Inst 5:   fcmp s0, s0
-;   Inst 6:   fcsel s0, s1, s2, ne
-;   Inst 7:   fcvtzu x0, s0
-;   Inst 8:   ret
-; }}
+; block0:
+;   movz x6, #24448, LSL #16
+;   fmov s5, w6
+;   fmin s7, s0, s5
+;   movi v5.2s, #0
+;   fmax s7, s7, s5
+;   fcmp s0, s0
+;   fcsel s7, s5, s7, ne
+;   fcvtzu x0, s7
+;   ret
 
 function %f52(f32) -> i64 {
 block0(v0: f32):
@@ -873,23 +618,18 @@ block0(v0: f32):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 11)
-;   Inst 0:   movz x0, #24320, LSL #16
-;   Inst 1:   fmov s1, w0
-;   Inst 2:   fmin s1, s0, s1
-;   Inst 3:   movz x0, #57088, LSL #16
-;   Inst 4:   fmov s2, w0
-;   Inst 5:   fmax s1, s1, s2
-;   Inst 6:   movi v2.2s, #0
-;   Inst 7:   fcmp s0, s0
-;   Inst 8:   fcsel s0, s2, s1, ne
-;   Inst 9:   fcvtzs x0, s0
-;   Inst 10:   ret
-; }}
+; block0:
+;   movz x6, #24320, LSL #16
+;   fmov s5, w6
+;   fmin s7, s0, s5
+;   movz x10, #57088, LSL #16
+;   fmov s5, w10
+;   fmax s7, s7, s5
+;   movi v5.2s, #0
+;   fcmp s0, s0
+;   fcsel s7, s5, s7, ne
+;   fcvtzs x0, s7
+;   ret
 
 function %f53(f64) -> i32 {
 block0(v0: f64):
@@ -897,20 +637,15 @@ block0(v0: f64):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 8)
-;   Inst 0:   ldr d1, pc+8 ; b 12 ; data.f64 4294967295
-;   Inst 1:   fmin d2, d0, d1
-;   Inst 2:   movi v1.2s, #0
-;   Inst 3:   fmax d2, d2, d1
-;   Inst 4:   fcmp d0, d0
-;   Inst 5:   fcsel d0, d1, d2, ne
-;   Inst 6:   fcvtzu w0, d0
-;   Inst 7:   ret
-; }}
+; block0:
+;   ldr d4, pc+8 ; b 12 ; data.f64 4294967295
+;   fmin d6, d0, d4
+;   movi v4.2s, #0
+;   fmax d6, d6, d4
+;   fcmp d0, d0
+;   fcsel d6, d4, d6, ne
+;   fcvtzu w0, d6
+;   ret
 
 function %f54(f64) -> i32 {
 block0(v0: f64):
@@ -918,22 +653,17 @@ block0(v0: f64):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 10)
-;   Inst 0:   ldr d1, pc+8 ; b 12 ; data.f64 2147483647
-;   Inst 1:   fmin d1, d0, d1
-;   Inst 2:   movz x0, #49632, LSL #48
-;   Inst 3:   fmov d2, x0
-;   Inst 4:   fmax d1, d1, d2
-;   Inst 5:   movi v2.2s, #0
-;   Inst 6:   fcmp d0, d0
-;   Inst 7:   fcsel d0, d2, d1, ne
-;   Inst 8:   fcvtzs w0, d0
-;   Inst 9:   ret
-; }}
+; block0:
+;   ldr d4, pc+8 ; b 12 ; data.f64 2147483647
+;   fmin d6, d0, d4
+;   movz x8, #49632, LSL #48
+;   fmov d4, x8
+;   fmax d6, d6, d4
+;   movi v4.2s, #0
+;   fcmp d0, d0
+;   fcsel d6, d4, d6, ne
+;   fcvtzs w0, d6
+;   ret
 
 function %f55(f64) -> i64 {
 block0(v0: f64):
@@ -941,21 +671,16 @@ block0(v0: f64):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 9)
-;   Inst 0:   movz x0, #17392, LSL #48
-;   Inst 1:   fmov d1, x0
-;   Inst 2:   fmin d2, d0, d1
-;   Inst 3:   movi v1.2s, #0
-;   Inst 4:   fmax d2, d2, d1
-;   Inst 5:   fcmp d0, d0
-;   Inst 6:   fcsel d0, d1, d2, ne
-;   Inst 7:   fcvtzu x0, d0
-;   Inst 8:   ret
-; }}
+; block0:
+;   movz x6, #17392, LSL #48
+;   fmov d5, x6
+;   fmin d7, d0, d5
+;   movi v5.2s, #0
+;   fmax d7, d7, d5
+;   fcmp d0, d0
+;   fcsel d7, d5, d7, ne
+;   fcvtzu x0, d7
+;   ret
 
 function %f56(f64) -> i64 {
 block0(v0: f64):
@@ -963,21 +688,16 @@ block0(v0: f64):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 11)
-;   Inst 0:   movz x0, #17376, LSL #48
-;   Inst 1:   fmov d1, x0
-;   Inst 2:   fmin d1, d0, d1
-;   Inst 3:   movz x0, #50144, LSL #48
-;   Inst 4:   fmov d2, x0
-;   Inst 5:   fmax d1, d1, d2
-;   Inst 6:   movi v2.2s, #0
-;   Inst 7:   fcmp d0, d0
-;   Inst 8:   fcsel d0, d2, d1, ne
-;   Inst 9:   fcvtzs x0, d0
-;   Inst 10:   ret
-; }}
+; block0:
+;   movz x6, #17376, LSL #48
+;   fmov d5, x6
+;   fmin d7, d0, d5
+;   movz x10, #50144, LSL #48
+;   fmov d5, x10
+;   fmax d7, d7, d5
+;   movi v5.2s, #0
+;   fcmp d0, d0
+;   fcsel d7, d5, d7, ne
+;   fcvtzs x0, d7
+;   ret
 
diff --git a/cranelift/filetests/filetests/isa/aarch64/heap_addr.clif b/cranelift/filetests/filetests/isa/aarch64/heap_addr.clif
index f8227627d8..d353c80e1b 100644
--- a/cranelift/filetests/filetests/isa/aarch64/heap_addr.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/heap_addr.clif
@@ -13,31 +13,20 @@ block0(v0: i64, v1: i32):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (successor: Block 1)
-;   (successor: Block 2)
-;   (instruction range: 0 .. 5)
-;   Inst 0:   mov w2, w1
-;   Inst 1:   ldr x3, [x0]
-;   Inst 2:   mov x3, x3
-;   Inst 3:   subs xzr, x2, x3
-;   Inst 4:   b.ls label1 ; b label2
-; Block 1:
-;   (original IR block: block2)
-;   (instruction range: 5 .. 10)
-;   Inst 5:   add x0, x0, x1, UXTW
-;   Inst 6:   subs xzr, x2, x3
-;   Inst 7:   movz x1, #0
-;   Inst 8:   csel x0, x1, x0, hi
-;   Inst 9:   ret
-; Block 2:
-;   (original IR block: block1)
-;   (instruction range: 10 .. 11)
-;   Inst 10:   udf
-; }}
+; block0:
+;   mov w10, w1
+;   ldr x5, [x0]
+;   mov x11, x5
+;   subs xzr, x10, x11
+;   b.ls label1 ; b label2
+; block1:
+;   add x13, x0, x1, UXTW
+;   subs xzr, x10, x11
+;   movz x14, #0
+;   csel x0, x14, x13, hi
+;   ret
+; block2:
+;   udf
 
 function %static_heap_check(i64 vmctx, i32) -> i64 {
     gv0 = vmctx
@@ -48,27 +37,16 @@ block0(v0: i64, v1: i32):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (successor: Block 1)
-;   (successor: Block 2)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   mov w2, w1
-;   Inst 1:   subs xzr, x2, #65536
-;   Inst 2:   b.ls label1 ; b label2
-; Block 1:
-;   (original IR block: block2)
-;   (instruction range: 3 .. 8)
-;   Inst 3:   add x0, x0, x1, UXTW
-;   Inst 4:   subs xzr, x2, #65536
-;   Inst 5:   movz x1, #0
-;   Inst 6:   csel x0, x1, x0, hi
-;   Inst 7:   ret
-; Block 2:
-;   (original IR block: block1)
-;   (instruction range: 8 .. 9)
-;   Inst 8:   udf
-; }}
+; block0:
+;   mov w8, w1
+;   subs xzr, x8, #65536
+;   b.ls label1 ; b label2
+; block1:
+;   add x10, x0, x1, UXTW
+;   subs xzr, x8, #65536
+;   movz x11, #0
+;   csel x0, x11, x10, hi
+;   ret
+; block2:
+;   udf
 
diff --git a/cranelift/filetests/filetests/isa/aarch64/iconst-icmp-small.clif b/cranelift/filetests/filetests/isa/aarch64/iconst-icmp-small.clif
index 2dcf741ddf..b6be2e7bcb 100644
--- a/cranelift/filetests/filetests/isa/aarch64/iconst-icmp-small.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/iconst-icmp-small.clif
@@ -14,17 +14,12 @@ block0:
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 7)
-;   Inst 0:   movz x0, #56780
-;   Inst 1:   uxth w0, w0
-;   Inst 2:   movz x1, #56780
-;   Inst 3:   subs wzr, w0, w1, UXTH
-;   Inst 4:   cset x0, ne
-;   Inst 5:   and w0, w0, #1
-;   Inst 6:   ret
-; }}
+; block0:
+;   movz x3, #56780
+;   uxth w5, w3
+;   movz x7, #56780
+;   subs wzr, w5, w7, UXTH
+;   cset x4, ne
+;   and w0, w4, #1
+;   ret
 
diff --git a/cranelift/filetests/filetests/isa/aarch64/jumptable.clif b/cranelift/filetests/filetests/isa/aarch64/jumptable.clif
index f7ded6aa88..6ce538de43 100644
--- a/cranelift/filetests/filetests/isa/aarch64/jumptable.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/jumptable.clif
@@ -1,4 +1,4 @@
-test compile
+test compile precise-output
 set unwind_info=false
 target aarch64
 
@@ -29,16 +29,31 @@ block5(v5: i64):
   return v6
 }
 
-; check:   subs wzr, w0, #3
-; nextln:   b.hs label1 ; adr x1, pc+16 ; ldrsw x2, [x1, x0, LSL 2] ; add x1, x1, x2 ; br x1 ; jt_entries
-
-; check:   movz x1, #1
-; nextln:   b
-
-; check:   movz x1, #2
-; nextln:   b
-
-; check:   movz x1, #3
-
-; check:   add x0, x0, x1
+; block0:
+;   emit_island 36
+;   subs wzr, w0, #3
+;   b.hs label1 ; adr x15, pc+16 ; ldrsw x1, [x15, x0, LSL 2] ; add x15, x15, x1 ; br x15 ; jt_entries [Label(MachLabel(3)), Label(MachLabel(5)), Label(MachLabel(7))]
+; block1:
+;   movz x5, #4
+;   b label2
+; block2:
+;   b label9
+; block3:
+;   movz x5, #1
+;   b label4
+; block4:
+;   b label9
+; block5:
+;   movz x5, #2
+;   b label6
+; block6:
+;   b label9
+; block7:
+;   movz x5, #3
+;   b label8
+; block8:
+;   b label9
+; block9:
+;   add x0, x0, x5
+;   ret
 
diff --git a/cranelift/filetests/filetests/isa/aarch64/multivalue-ret.clif b/cranelift/filetests/filetests/isa/aarch64/multivalue-ret.clif
index c4aa72309c..d6782da015 100644
--- a/cranelift/filetests/filetests/isa/aarch64/multivalue-ret.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/multivalue-ret.clif
@@ -10,13 +10,8 @@ block1:
   return v0, v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block1)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   movz x0, #1
-;   Inst 1:   movz x1, #2
-;   Inst 2:   ret
-; }}
+; block0:
+;   movz x0, #1
+;   movz x1, #2
+;   ret
 
diff --git a/cranelift/filetests/filetests/isa/aarch64/narrow-arithmetic.clif b/cranelift/filetests/filetests/isa/aarch64/narrow-arithmetic.clif
index 23a3fc1d6e..30373affab 100644
--- a/cranelift/filetests/filetests/isa/aarch64/narrow-arithmetic.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/narrow-arithmetic.clif
@@ -8,14 +8,9 @@ block0(v0: i8, v1: i8):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   add w0, w0, w1
-;   Inst 1:   ret
-; }}
+; block0:
+;   add w0, w0, w1
+;   ret
 
 function %add16(i16, i16) -> i16 {
 block0(v0: i16, v1: i16):
@@ -23,14 +18,9 @@ block0(v0: i16, v1: i16):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   add w0, w0, w1
-;   Inst 1:   ret
-; }}
+; block0:
+;   add w0, w0, w1
+;   ret
 
 function %add32(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
@@ -38,14 +28,9 @@ block0(v0: i32, v1: i32):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   add w0, w0, w1
-;   Inst 1:   ret
-; }}
+; block0:
+;   add w0, w0, w1
+;   ret
 
 function %add32_8(i32, i8) -> i32 {
 block0(v0: i32, v1: i8):
@@ -54,14 +39,9 @@ block0(v0: i32, v1: i8):
   return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   add w0, w0, w1, SXTB
-;   Inst 1:   ret
-; }}
+; block0:
+;   add w0, w0, w1, SXTB
+;   ret
 
 function %add64_32(i64, i32) -> i64 {
 block0(v0: i64, v1: i32):
@@ -70,12 +50,7 @@ block0(v0: i64, v1: i32):
   return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   add x0, x0, x1, SXTW
-;   Inst 1:   ret
-; }}
+; block0:
+;   add x0, x0, x1, SXTW
+;   ret
 
diff --git a/cranelift/filetests/filetests/isa/aarch64/prologue.clif b/cranelift/filetests/filetests/isa/aarch64/prologue.clif
index 40934abd65..6742f1f0f1 100644
--- a/cranelift/filetests/filetests/isa/aarch64/prologue.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/prologue.clif
@@ -1,4 +1,4 @@
-test compile
+test compile precise-output
 set unwind_info=false
 target aarch64
 
@@ -75,19 +75,85 @@ block0(v0: f64):
     return v62
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: stp d14, d15, [sp, #-16]!
-; nextln: stp d12, d13, [sp, #-16]!
-; nextln: stp d10, d11, [sp, #-16]!
-; nextln: stp d8, d9, [sp, #-16]!
-
-; check: ldp d8, d9, [sp], #16
-; nextln: ldp d10, d11, [sp], #16
-; nextln: ldp d12, d13, [sp], #16
-; nextln: ldp d14, d15, [sp], #16
-; nextln: ldp fp, lr, [sp], #16
-; nextln: ret
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   stp d14, d15, [sp, #-16]!
+;   stp d12, d13, [sp, #-16]!
+;   stp d10, d11, [sp, #-16]!
+;   stp d8, d9, [sp, #-16]!
+;   sub sp, sp, #16
+; block0:
+;   fadd d4, d0, d0
+;   fadd d6, d0, d0
+;   str q6, [sp]
+;   fadd d6, d0, d0
+;   fadd d8, d0, d0
+;   fadd d10, d0, d0
+;   fadd d12, d0, d0
+;   fadd d14, d0, d0
+;   fadd d1, d0, d0
+;   fadd d3, d0, d0
+;   fadd d5, d0, d0
+;   fadd d7, d0, d0
+;   fadd d9, d0, d0
+;   fadd d11, d0, d0
+;   fadd d13, d0, d0
+;   fadd d16, d0, d0
+;   fadd d15, d0, d0
+;   fadd d20, d0, d0
+;   fadd d22, d0, d0
+;   fadd d24, d0, d0
+;   fadd d26, d0, d0
+;   fadd d28, d0, d0
+;   fadd d30, d0, d0
+;   fadd d17, d0, d0
+;   fadd d19, d0, d0
+;   fadd d21, d0, d0
+;   fadd d23, d0, d0
+;   fadd d25, d0, d0
+;   fadd d27, d0, d0
+;   fadd d29, d0, d0
+;   fadd d18, d0, d0
+;   fadd d2, d0, d0
+;   fadd d0, d0, d4
+;   ldr q4, [sp]
+;   fadd d6, d4, d6
+;   fadd d4, d8, d10
+;   fadd d10, d12, d14
+;   fadd d8, d1, d3
+;   fadd d14, d5, d7
+;   fadd d12, d9, d11
+;   fadd d3, d13, d16
+;   fadd d1, d15, d20
+;   fadd d7, d22, d24
+;   fadd d5, d26, d28
+;   fadd d11, d30, d17
+;   fadd d9, d19, d21
+;   fadd d15, d23, d25
+;   fadd d13, d27, d29
+;   fadd d2, d18, d2
+;   fadd d0, d0, d6
+;   fadd d6, d4, d10
+;   fadd d4, d8, d14
+;   fadd d10, d12, d3
+;   fadd d8, d1, d7
+;   fadd d11, d5, d11
+;   fadd d12, d9, d15
+;   fadd d14, d13, d2
+;   fadd d0, d0, d6
+;   fadd d2, d4, d10
+;   fadd d4, d8, d11
+;   fadd d6, d12, d14
+;   fadd d8, d0, d2
+;   fadd d10, d4, d6
+;   fadd d0, d8, d10
+;   add sp, sp, #16
+;   ldp d8, d9, [sp], #16
+;   ldp d10, d11, [sp], #16
+;   ldp d12, d13, [sp], #16
+;   ldp d14, d15, [sp], #16
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %f2(i64) -> i64 {
 block0(v0: i64):
@@ -135,14 +201,49 @@ block0(v0: i64):
     return v36
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: str x22, [sp, #-16]!
-; nextln: stp x19, x20, [sp, #-16]!
-; nextln: add x1, x0, x0
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   str x28, [sp, #-16]!
+;   stp x19, x21, [sp, #-16]!
+; block0:
+;   add x6, x0, x0
+;   add x7, x0, x6
+;   add x8, x0, x7
+;   add x9, x0, x8
+;   add x10, x0, x9
+;   add x11, x0, x10
+;   add x12, x0, x11
+;   add x13, x0, x12
+;   add x14, x0, x13
+;   add x15, x0, x14
+;   add x1, x0, x15
+;   add x2, x0, x1
+;   add x3, x0, x2
+;   add x4, x0, x3
+;   add x5, x0, x4
+;   add x28, x0, x5
+;   add x21, x0, x28
+;   add x19, x0, x21
+;   add x6, x0, x6
+;   add x7, x7, x8
+;   add x8, x9, x10
+;   add x9, x11, x12
+;   add x10, x13, x14
+;   add x11, x15, x1
+;   add x12, x2, x3
+;   add x13, x4, x5
+;   add x14, x28, x21
+;   add x6, x19, x6
+;   add x7, x7, x8
+;   add x8, x9, x10
+;   add x9, x11, x12
+;   add x10, x13, x14
+;   add x6, x6, x7
+;   add x7, x8, x9
+;   add x6, x10, x6
+;   add x0, x7, x6
+;   ldp x19, x21, [sp], #16
+;   ldr x28, [sp], #16
+;   ldp fp, lr, [sp], #16
+;   ret
 
-; check: add x0, x1, x0
-; nextln: ldp x19, x20, [sp], #16
-; nextln: ldr x22, [sp], #16
-; nextln: ldp fp, lr, [sp], #16
-; nextln: ret
diff --git a/cranelift/filetests/filetests/isa/aarch64/reduce.clif b/cranelift/filetests/filetests/isa/aarch64/reduce.clif
index 5d6ffa1700..9f85ce9689 100644
--- a/cranelift/filetests/filetests/isa/aarch64/reduce.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/reduce.clif
@@ -8,13 +8,8 @@ block0(v0: i128):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 1)
-;   Inst 0:   ret
-; }}
+; block0:
+;   ret
 
 function %ireduce_128_32(i128) -> i32 {
 block0(v0: i128):
@@ -22,13 +17,8 @@ block0(v0: i128):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 1)
-;   Inst 0:   ret
-; }}
+; block0:
+;   ret
 
 function %ireduce_128_16(i128) -> i16 {
 block0(v0: i128):
@@ -36,13 +26,8 @@ block0(v0: i128):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 1)
-;   Inst 0:   ret
-; }}
+; block0:
+;   ret
 
 function %ireduce_128_8(i128) -> i8 {
 block0(v0: i128):
@@ -50,11 +35,6 @@ block0(v0: i128):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 1)
-;   Inst 0:   ret
-; }}
+; block0:
+;   ret
 
diff --git a/cranelift/filetests/filetests/isa/aarch64/reftypes.clif b/cranelift/filetests/filetests/isa/aarch64/reftypes.clif
index 4b7d855574..cd66dd952c 100644
--- a/cranelift/filetests/filetests/isa/aarch64/reftypes.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/reftypes.clif
@@ -7,13 +7,8 @@ block0(v0: r64):
   return v0
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 1)
-;   Inst 0:   ret
-; }}
+; block0:
+;   ret
 
 function %f1(r64) -> b1 {
 block0(v0: r64):
@@ -21,15 +16,10 @@ block0(v0: r64):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   subs xzr, x0, #0
-;   Inst 1:   cset x0, eq
-;   Inst 2:   ret
-; }}
+; block0:
+;   subs xzr, x0, #0
+;   cset x0, eq
+;   ret
 
 function %f2(r64) -> b1 {
 block0(v0: r64):
@@ -37,15 +27,10 @@ block0(v0: r64):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   adds xzr, x0, #1
-;   Inst 1:   cset x0, eq
-;   Inst 2:   ret
-; }}
+; block0:
+;   adds xzr, x0, #1
+;   cset x0, eq
+;   ret
 
 function %f3() -> r64 {
 block0:
@@ -53,14 +38,9 @@ block0:
   return v0
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   movz x0, #0
-;   Inst 1:   ret
-; }}
+; block0:
+;   movz x0, #0
+;   ret
 
 function %f4(r64, r64) -> r64, r64, r64 {
     fn0 = %f(r64) -> b1
@@ -83,63 +63,38 @@ block3(v7: r64, v8: r64):
     return v7, v8, v9
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (successor: Block 1)
-;   (successor: Block 3)
-;   (instruction range: 0 .. 18)
-;   Inst 0:   stp fp, lr, [sp, #-16]!
-;   Inst 1:   mov fp, sp
-;   Inst 2:   stp x19, x20, [sp, #-16]!
-;   Inst 3:   sub sp, sp, #32
-;   Inst 4:   mov x19, x0
-;   Inst 5:   mov x20, x1
-;   Inst 6:   mov x0, x19
-;   Inst 7:   ldr x1, 8 ; b 12 ; data TestCase { length: 1, ascii: [102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0
-;   Inst 8:   stur x0, [sp, #8]
-;   Inst 9:   stur x19, [sp, #16]
-;   Inst 10:   stur x20, [sp, #24]
-;       (safepoint: slots [S0, S1, S2] with EmitState EmitState { virtual_sp_offset: 0, nominal_sp_to_fp: 0, stack_map: None, cur_srcloc: SourceLoc(4294967295) })
-;   Inst 11:   blr x1
-;   Inst 12:   ldur x19, [sp, #16]
-;   Inst 13:   ldur x20, [sp, #24]
-;   Inst 14:   mov x1, sp
-;   Inst 15:   str x19, [x1]
-;   Inst 16:   and w0, w0, #1
-;   Inst 17:   cbz x0, label1 ; b label3
-; Block 1:
-;   (original IR block: block1)
-;   (successor: Block 2)
-;   (instruction range: 18 .. 19)
-;   Inst 18:   b label2
-; Block 2:
-;   (successor: Block 5)
-;   (instruction range: 19 .. 21)
-;   Inst 19:   mov x0, x20
-;   Inst 20:   b label5
-; Block 3:
-;   (original IR block: block2)
-;   (successor: Block 4)
-;   (instruction range: 21 .. 22)
-;   Inst 21:   b label4
-; Block 4:
-;   (successor: Block 5)
-;   (instruction range: 22 .. 25)
-;   Inst 22:   mov x0, x19
-;   Inst 23:   mov x19, x20
-;   Inst 24:   b label5
-; Block 5:
-;   (original IR block: block3)
-;   (instruction range: 25 .. 33)
-;   Inst 25:   mov x1, sp
-;   Inst 26:   ldr x1, [x1]
-;   Inst 27:   mov x2, x1
-;   Inst 28:   mov x1, x19
-;   Inst 29:   add sp, sp, #32
-;   Inst 30:   ldp x19, x20, [sp], #16
-;   Inst 31:   ldp fp, lr, [sp], #16
-;   Inst 32:   ret
-; }}
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   sub sp, sp, #32
+; block0:
+;   mov x4, x1
+;   mov x2, x0
+;   ldr x3, 8 ; b 12 ; data TestCase { length: 1, ascii: [102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0
+;   str x2, [sp, #8]
+;   str x4, [sp, #16]
+;   blr x3
+;   ldr x2, [sp, #8]
+;   mov x9, sp
+;   mov x12, x2
+;   str x12, [x9]
+;   and w7, w0, #1
+;   cbz x7, label1 ; b label3
+; block1:
+;   b label2
+; block2:
+;   mov x1, x12
+;   ldr x0, [sp, #16]
+;   b label5
+; block3:
+;   b label4
+; block4:
+;   mov x0, x12
+;   ldr x1, [sp, #16]
+;   b label5
+; block5:
+;   mov x4, sp
+;   ldr x2, [x4]
+;   add sp, sp, #32
+;   ldp fp, lr, [sp], #16
+;   ret
 
diff --git a/cranelift/filetests/filetests/isa/aarch64/shift-op.clif b/cranelift/filetests/filetests/isa/aarch64/shift-op.clif
index b2074fb658..f2400cc8df 100644
--- a/cranelift/filetests/filetests/isa/aarch64/shift-op.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/shift-op.clif
@@ -10,14 +10,9 @@ block0(v0: i64):
   return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   add x0, x0, x0, LSL 3
-;   Inst 1:   ret
-; }}
+; block0:
+;   add x0, x0, x0, LSL 3
+;   ret
 
 function %f(i32) -> i32 {
 block0(v0: i32):
@@ -26,12 +21,7 @@ block0(v0: i32):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   lsl w0, w0, #21
-;   Inst 1:   ret
-; }}
+; block0:
+;   lsl w0, w0, #21
+;   ret
 
diff --git a/cranelift/filetests/filetests/isa/aarch64/shift-rotate.clif b/cranelift/filetests/filetests/isa/aarch64/shift-rotate.clif
index e4b602beb8..8468f76ebd 100644
--- a/cranelift/filetests/filetests/isa/aarch64/shift-rotate.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/shift-rotate.clif
@@ -12,37 +12,30 @@ block0(v0: i128, v1: i128):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 25)
-;   Inst 0:   mov x4, x1
-;   Inst 1:   orr x1, xzr, #128
-;   Inst 2:   sub x1, x1, x2
-;   Inst 3:   lsr x5, x0, x2
-;   Inst 4:   lsr x3, x4, x2
-;   Inst 5:   orn w6, wzr, w2
-;   Inst 6:   lsl x7, x4, #1
-;   Inst 7:   lsl x6, x7, x6
-;   Inst 8:   orr x5, x5, x6
-;   Inst 9:   ands xzr, x2, #64
-;   Inst 10:   csel x2, x3, x5, ne
-;   Inst 11:   csel x3, xzr, x3, ne
-;   Inst 12:   lsl x5, x0, x1
-;   Inst 13:   lsl x4, x4, x1
-;   Inst 14:   orn w6, wzr, w1
-;   Inst 15:   lsr x0, x0, #1
-;   Inst 16:   lsr x0, x0, x6
-;   Inst 17:   orr x0, x4, x0
-;   Inst 18:   ands xzr, x1, #64
-;   Inst 19:   csel x1, xzr, x5, ne
-;   Inst 20:   csel x0, x5, x0, ne
-;   Inst 21:   orr x3, x3, x0
-;   Inst 22:   orr x0, x2, x1
-;   Inst 23:   mov x1, x3
-;   Inst 24:   ret
-; }}
+; block0:
+;   orr x10, xzr, #128
+;   sub x12, x10, x2
+;   lsr x14, x0, x2
+;   lsr x3, x1, x2
+;   orn w4, wzr, w2
+;   lsl x5, x1, #1
+;   lsl x6, x5, x4
+;   orr x8, x14, x6
+;   ands xzr, x2, #64
+;   csel x11, x3, x8, ne
+;   csel x13, xzr, x3, ne
+;   lsl x15, x0, x12
+;   lsl x1, x1, x12
+;   orn w3, wzr, w12
+;   lsr x5, x0, #1
+;   lsr x7, x5, x3
+;   orr x9, x1, x7
+;   ands xzr, x12, #64
+;   csel x12, xzr, x15, ne
+;   csel x14, x15, x9, ne
+;   orr x1, x13, x14
+;   orr x0, x11, x12
+;   ret
 
 function %f0(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -50,14 +43,9 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   ror x0, x0, x1
-;   Inst 1:   ret
-; }}
+; block0:
+;   ror x0, x0, x1
+;   ret
 
 function %f1(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
@@ -65,14 +53,9 @@ block0(v0: i32, v1: i32):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   ror w0, w0, w1
-;   Inst 1:   ret
-; }}
+; block0:
+;   ror w0, w0, w1
+;   ret
 
 function %f2(i16, i16) -> i16 {
 block0(v0: i16, v1: i16):
@@ -80,20 +63,15 @@ block0(v0: i16, v1: i16):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 8)
-;   Inst 0:   uxth w0, w0
-;   Inst 1:   and w1, w1, #15
-;   Inst 2:   sub w2, w1, #16
-;   Inst 3:   sub w2, wzr, w2
-;   Inst 4:   lsr w1, w0, w1
-;   Inst 5:   lsl w0, w0, w2
-;   Inst 6:   orr w0, w0, w1
-;   Inst 7:   ret
-; }}
+; block0:
+;   uxth w5, w0
+;   and w7, w1, #15
+;   sub w9, w7, #16
+;   sub w11, wzr, w9
+;   lsr w13, w5, w7
+;   lsl w15, w5, w11
+;   orr w0, w15, w13
+;   ret
 
 function %f3(i8, i8) -> i8 {
 block0(v0: i8, v1: i8):
@@ -101,20 +79,15 @@ block0(v0: i8, v1: i8):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 8)
-;   Inst 0:   uxtb w0, w0
-;   Inst 1:   and w1, w1, #7
-;   Inst 2:   sub w2, w1, #8
-;   Inst 3:   sub w2, wzr, w2
-;   Inst 4:   lsr w1, w0, w1
-;   Inst 5:   lsl w0, w0, w2
-;   Inst 6:   orr w0, w0, w1
-;   Inst 7:   ret
-; }}
+; block0:
+;   uxtb w5, w0
+;   and w7, w1, #7
+;   sub w9, w7, #8
+;   sub w11, wzr, w9
+;   lsr w13, w5, w7
+;   lsl w15, w5, w11
+;   orr w0, w15, w13
+;   ret
 
 function %i128_rotl(i128, i128) -> i128 {
 block0(v0: i128, v1: i128):
@@ -122,36 +95,30 @@ block0(v0: i128, v1: i128):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 24)
-;   Inst 0:   mov x4, x0
-;   Inst 1:   orr x0, xzr, #128
-;   Inst 2:   sub x0, x0, x2
-;   Inst 3:   lsl x3, x4, x2
-;   Inst 4:   lsl x5, x1, x2
-;   Inst 5:   orn w6, wzr, w2
-;   Inst 6:   lsr x7, x4, #1
-;   Inst 7:   lsr x6, x7, x6
-;   Inst 8:   orr x5, x5, x6
-;   Inst 9:   ands xzr, x2, #64
-;   Inst 10:   csel x2, xzr, x3, ne
-;   Inst 11:   csel x3, x3, x5, ne
-;   Inst 12:   lsr x5, x4, x0
-;   Inst 13:   lsr x4, x1, x0
-;   Inst 14:   orn w6, wzr, w0
-;   Inst 15:   lsl x1, x1, #1
-;   Inst 16:   lsl x1, x1, x6
-;   Inst 17:   orr x1, x5, x1
-;   Inst 18:   ands xzr, x0, #64
-;   Inst 19:   csel x0, x4, x1, ne
-;   Inst 20:   csel x1, xzr, x4, ne
-;   Inst 21:   orr x0, x2, x0
-;   Inst 22:   orr x1, x3, x1
-;   Inst 23:   ret
-; }}
+; block0:
+;   orr x10, xzr, #128
+;   sub x12, x10, x2
+;   lsl x14, x0, x2
+;   lsl x3, x1, x2
+;   orn w4, wzr, w2
+;   lsr x5, x0, #1
+;   lsr x6, x5, x4
+;   orr x8, x3, x6
+;   ands xzr, x2, #64
+;   csel x11, xzr, x14, ne
+;   csel x13, x14, x8, ne
+;   lsr x15, x0, x12
+;   lsr x2, x1, x12
+;   orn w3, wzr, w12
+;   lsl x5, x1, #1
+;   lsl x7, x5, x3
+;   orr x9, x15, x7
+;   ands xzr, x12, #64
+;   csel x12, x2, x9, ne
+;   csel x14, xzr, x2, ne
+;   orr x0, x11, x12
+;   orr x1, x13, x14
+;   ret
 
 function %f4(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -159,15 +126,10 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   sub x1, xzr, x1
-;   Inst 1:   ror x0, x0, x1
-;   Inst 2:   ret
-; }}
+; block0:
+;   sub x5, xzr, x1
+;   ror x0, x0, x5
+;   ret
 
 function %f5(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
@@ -175,15 +137,10 @@ block0(v0: i32, v1: i32):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   sub w1, wzr, w1
-;   Inst 1:   ror w0, w0, w1
-;   Inst 2:   ret
-; }}
+; block0:
+;   sub w5, wzr, w1
+;   ror w0, w0, w5
+;   ret
 
 function %f6(i16, i16) -> i16 {
 block0(v0: i16, v1: i16):
@@ -191,21 +148,16 @@ block0(v0: i16, v1: i16):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 9)
-;   Inst 0:   sub w1, wzr, w1
-;   Inst 1:   uxth w0, w0
-;   Inst 2:   and w1, w1, #15
-;   Inst 3:   sub w2, w1, #16
-;   Inst 4:   sub w2, wzr, w2
-;   Inst 5:   lsr w1, w0, w1
-;   Inst 6:   lsl w0, w0, w2
-;   Inst 7:   orr w0, w0, w1
-;   Inst 8:   ret
-; }}
+; block0:
+;   sub w5, wzr, w1
+;   uxth w7, w0
+;   and w9, w5, #15
+;   sub w11, w9, #16
+;   sub w13, wzr, w11
+;   lsr w15, w7, w9
+;   lsl w1, w7, w13
+;   orr w0, w1, w15
+;   ret
 
 function %f7(i8, i8) -> i8 {
 block0(v0: i8, v1: i8):
@@ -213,21 +165,16 @@ block0(v0: i8, v1: i8):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 9)
-;   Inst 0:   sub w1, wzr, w1
-;   Inst 1:   uxtb w0, w0
-;   Inst 2:   and w1, w1, #7
-;   Inst 3:   sub w2, w1, #8
-;   Inst 4:   sub w2, wzr, w2
-;   Inst 5:   lsr w1, w0, w1
-;   Inst 6:   lsl w0, w0, w2
-;   Inst 7:   orr w0, w0, w1
-;   Inst 8:   ret
-; }}
+; block0:
+;   sub w5, wzr, w1
+;   uxtb w7, w0
+;   and w9, w5, #7
+;   sub w11, w9, #8
+;   sub w13, wzr, w11
+;   lsr w15, w7, w9
+;   lsl w1, w7, w13
+;   orr w0, w1, w15
+;   ret
 
 function %f8(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -235,14 +182,9 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   lsr x0, x0, x1
-;   Inst 1:   ret
-; }}
+; block0:
+;   lsr x0, x0, x1
+;   ret
 
 function %f9(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
@@ -250,14 +192,9 @@ block0(v0: i32, v1: i32):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   lsr w0, w0, w1
-;   Inst 1:   ret
-; }}
+; block0:
+;   lsr w0, w0, w1
+;   ret
 
 function %f10(i16, i16) -> i16 {
 block0(v0: i16, v1: i16):
@@ -265,16 +202,11 @@ block0(v0: i16, v1: i16):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 4)
-;   Inst 0:   uxth w0, w0
-;   Inst 1:   and w1, w1, #15
-;   Inst 2:   lsr w0, w0, w1
-;   Inst 3:   ret
-; }}
+; block0:
+;   uxth w5, w0
+;   and w7, w1, #15
+;   lsr w0, w5, w7
+;   ret
 
 function %f11(i8, i8) -> i8 {
 block0(v0: i8, v1: i8):
@@ -282,16 +214,11 @@ block0(v0: i8, v1: i8):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 4)
-;   Inst 0:   uxtb w0, w0
-;   Inst 1:   and w1, w1, #7
-;   Inst 2:   lsr w0, w0, w1
-;   Inst 3:   ret
-; }}
+; block0:
+;   uxtb w5, w0
+;   and w7, w1, #7
+;   lsr w0, w5, w7
+;   ret
 
 function %f12(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -299,14 +226,9 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   lsl x0, x0, x1
-;   Inst 1:   ret
-; }}
+; block0:
+;   lsl x0, x0, x1
+;   ret
 
 function %f13(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
@@ -314,14 +236,9 @@ block0(v0: i32, v1: i32):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   lsl w0, w0, w1
-;   Inst 1:   ret
-; }}
+; block0:
+;   lsl w0, w0, w1
+;   ret
 
 function %f14(i16, i16) -> i16 {
 block0(v0: i16, v1: i16):
@@ -329,15 +246,10 @@ block0(v0: i16, v1: i16):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   and w1, w1, #15
-;   Inst 1:   lsl w0, w0, w1
-;   Inst 2:   ret
-; }}
+; block0:
+;   and w5, w1, #15
+;   lsl w0, w0, w5
+;   ret
 
 function %f15(i8, i8) -> i8 {
 block0(v0: i8, v1: i8):
@@ -345,15 +257,10 @@ block0(v0: i8, v1: i8):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   and w1, w1, #7
-;   Inst 1:   lsl w0, w0, w1
-;   Inst 2:   ret
-; }}
+; block0:
+;   and w5, w1, #7
+;   lsl w0, w0, w5
+;   ret
 
 function %f16(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -361,14 +268,9 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   asr x0, x0, x1
-;   Inst 1:   ret
-; }}
+; block0:
+;   asr x0, x0, x1
+;   ret
 
 function %f17(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
@@ -376,14 +278,9 @@ block0(v0: i32, v1: i32):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   asr w0, w0, w1
-;   Inst 1:   ret
-; }}
+; block0:
+;   asr w0, w0, w1
+;   ret
 
 function %f18(i16, i16) -> i16 {
 block0(v0: i16, v1: i16):
@@ -391,16 +288,11 @@ block0(v0: i16, v1: i16):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 4)
-;   Inst 0:   sxth w0, w0
-;   Inst 1:   and w1, w1, #15
-;   Inst 2:   asr w0, w0, w1
-;   Inst 3:   ret
-; }}
+; block0:
+;   sxth w5, w0
+;   and w7, w1, #15
+;   asr w0, w5, w7
+;   ret
 
 function %f19(i8, i8) -> i8 {
 block0(v0: i8, v1: i8):
@@ -408,16 +300,11 @@ block0(v0: i8, v1: i8):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 4)
-;   Inst 0:   sxtb w0, w0
-;   Inst 1:   and w1, w1, #7
-;   Inst 2:   asr w0, w0, w1
-;   Inst 3:   ret
-; }}
+; block0:
+;   sxtb w5, w0
+;   and w7, w1, #7
+;   asr w0, w5, w7
+;   ret
 
 function %f20(i64) -> i64 {
 block0(v0: i64):
@@ -426,14 +313,9 @@ block0(v0: i64):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   ror x0, x0, #17
-;   Inst 1:   ret
-; }}
+; block0:
+;   ror x0, x0, #17
+;   ret
 
 function %f21(i64) -> i64 {
 block0(v0: i64):
@@ -442,14 +324,9 @@ block0(v0: i64):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   ror x0, x0, #47
-;   Inst 1:   ret
-; }}
+; block0:
+;   ror x0, x0, #47
+;   ret
 
 function %f22(i32) -> i32 {
 block0(v0: i32):
@@ -458,14 +335,9 @@ block0(v0: i32):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   ror w0, w0, #15
-;   Inst 1:   ret
-; }}
+; block0:
+;   ror w0, w0, #15
+;   ret
 
 function %f23(i16) -> i16 {
 block0(v0: i16):
@@ -474,17 +346,12 @@ block0(v0: i16):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 5)
-;   Inst 0:   uxth w0, w0
-;   Inst 1:   lsr w1, w0, #6
-;   Inst 2:   lsl w0, w0, #10
-;   Inst 3:   orr w0, w0, w1
-;   Inst 4:   ret
-; }}
+; block0:
+;   uxth w3, w0
+;   lsr w5, w3, #6
+;   lsl w7, w3, #10
+;   orr w0, w7, w5
+;   ret
 
 function %f24(i8) -> i8 {
 block0(v0: i8):
@@ -493,17 +360,12 @@ block0(v0: i8):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 5)
-;   Inst 0:   uxtb w0, w0
-;   Inst 1:   lsr w1, w0, #5
-;   Inst 2:   lsl w0, w0, #3
-;   Inst 3:   orr w0, w0, w1
-;   Inst 4:   ret
-; }}
+; block0:
+;   uxtb w3, w0
+;   lsr w5, w3, #5
+;   lsl w7, w3, #3
+;   orr w0, w7, w5
+;   ret
 
 function %f25(i64) -> i64 {
 block0(v0: i64):
@@ -512,14 +374,9 @@ block0(v0: i64):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   lsr x0, x0, #17
-;   Inst 1:   ret
-; }}
+; block0:
+;   lsr x0, x0, #17
+;   ret
 
 function %f26(i64) -> i64 {
 block0(v0: i64):
@@ -528,14 +385,9 @@ block0(v0: i64):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   asr x0, x0, #17
-;   Inst 1:   ret
-; }}
+; block0:
+;   asr x0, x0, #17
+;   ret
 
 function %f27(i64) -> i64 {
 block0(v0: i64):
@@ -544,12 +396,7 @@ block0(v0: i64):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   lsl x0, x0, #17
-;   Inst 1:   ret
-; }}
+; block0:
+;   lsl x0, x0, #17
+;   ret
 
diff --git a/cranelift/filetests/filetests/isa/aarch64/simd-extmul.clif b/cranelift/filetests/filetests/isa/aarch64/simd-extmul.clif
index b594c60eba..532cdb82d2 100644
--- a/cranelift/filetests/filetests/isa/aarch64/simd-extmul.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/simd-extmul.clif
@@ -10,14 +10,9 @@ block0(v0: i8x16, v1: i8x16):
   return v4
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   smull v0.8h, v0.8b, v1.8b
-;   Inst 1:   ret
-; }}
+; block0:
+;   smull v0.8h, v0.8b, v1.8b
+;   ret
 
 function %fn2(i8x16, i8x16) -> i16x8 {
 block0(v0: i8x16, v1: i8x16):
@@ -27,14 +22,9 @@ block0(v0: i8x16, v1: i8x16):
   return v4
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   smull2 v0.8h, v0.16b, v1.16b
-;   Inst 1:   ret
-; }}
+; block0:
+;   smull2 v0.8h, v0.16b, v1.16b
+;   ret
 
 function %fn3(i16x8, i16x8) -> i32x4 {
 block0(v0: i16x8, v1: i16x8):
@@ -44,14 +34,9 @@ block0(v0: i16x8, v1: i16x8):
   return v4
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   smull v0.4s, v0.4h, v1.4h
-;   Inst 1:   ret
-; }}
+; block0:
+;   smull v0.4s, v0.4h, v1.4h
+;   ret
 
 function %fn4(i16x8, i16x8) -> i32x4 {
 block0(v0: i16x8, v1: i16x8):
@@ -61,14 +46,9 @@ block0(v0: i16x8, v1: i16x8):
   return v4
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   smull2 v0.4s, v0.8h, v1.8h
-;   Inst 1:   ret
-; }}
+; block0:
+;   smull2 v0.4s, v0.8h, v1.8h
+;   ret
 
 function %fn5(i32x4, i32x4) -> i64x2 {
 block0(v0: i32x4, v1: i32x4):
@@ -78,14 +58,9 @@ block0(v0: i32x4, v1: i32x4):
   return v4
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   smull v0.2d, v0.2s, v1.2s
-;   Inst 1:   ret
-; }}
+; block0:
+;   smull v0.2d, v0.2s, v1.2s
+;   ret
 
 function %fn6(i32x4, i32x4) -> i64x2 {
 block0(v0: i32x4, v1: i32x4):
@@ -95,14 +70,9 @@ block0(v0: i32x4, v1: i32x4):
   return v4
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   smull2 v0.2d, v0.4s, v1.4s
-;   Inst 1:   ret
-; }}
+; block0:
+;   smull2 v0.2d, v0.4s, v1.4s
+;   ret
 
 function %fn7(i8x16, i8x16) -> i16x8 {
 block0(v0: i8x16, v1: i8x16):
@@ -112,14 +82,9 @@ block0(v0: i8x16, v1: i8x16):
   return v4
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   umull v0.8h, v0.8b, v1.8b
-;   Inst 1:   ret
-; }}
+; block0:
+;   umull v0.8h, v0.8b, v1.8b
+;   ret
 
 function %fn8(i8x16, i8x16) -> i16x8 {
 block0(v0: i8x16, v1: i8x16):
@@ -129,14 +94,9 @@ block0(v0: i8x16, v1: i8x16):
   return v4
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   umull2 v0.8h, v0.16b, v1.16b
-;   Inst 1:   ret
-; }}
+; block0:
+;   umull2 v0.8h, v0.16b, v1.16b
+;   ret
 
 function %fn9(i16x8, i16x8) -> i32x4 {
 block0(v0: i16x8, v1: i16x8):
@@ -146,14 +106,9 @@ block0(v0: i16x8, v1: i16x8):
   return v4
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   umull v0.4s, v0.4h, v1.4h
-;   Inst 1:   ret
-; }}
+; block0:
+;   umull v0.4s, v0.4h, v1.4h
+;   ret
 
 function %fn10(i16x8, i16x8) -> i32x4 {
 block0(v0: i16x8, v1: i16x8):
@@ -163,14 +118,9 @@ block0(v0: i16x8, v1: i16x8):
   return v4
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   umull2 v0.4s, v0.8h, v1.8h
-;   Inst 1:   ret
-; }}
+; block0:
+;   umull2 v0.4s, v0.8h, v1.8h
+;   ret
 
 function %fn11(i32x4, i32x4) -> i64x2 {
 block0(v0: i32x4, v1: i32x4):
@@ -180,14 +130,9 @@ block0(v0: i32x4, v1: i32x4):
   return v4
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   umull v0.2d, v0.2s, v1.2s
-;   Inst 1:   ret
-; }}
+; block0:
+;   umull v0.2d, v0.2s, v1.2s
+;   ret
 
 function %fn12(i32x4, i32x4) -> i64x2 {
 block0(v0: i32x4, v1: i32x4):
@@ -197,12 +142,7 @@ block0(v0: i32x4, v1: i32x4):
   return v4
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   umull2 v0.2d, v0.4s, v1.4s
-;   Inst 1:   ret
-; }}
+; block0:
+;   umull2 v0.2d, v0.4s, v1.4s
+;   ret
 
diff --git a/cranelift/filetests/filetests/isa/aarch64/simd-pairwise-add.clif b/cranelift/filetests/filetests/isa/aarch64/simd-pairwise-add.clif
index 7be0e9705c..d1d81c61f7 100644
--- a/cranelift/filetests/filetests/isa/aarch64/simd-pairwise-add.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/simd-pairwise-add.clif
@@ -11,14 +11,9 @@ block0(v0: i8x16):
   return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   saddlp v0.8h, v0.16b
-;   Inst 1:   ret
-; }}
+; block0:
+;   saddlp v0.8h, v0.16b
+;   ret
 
 function %fn2(i8x16) -> i16x8 {
 block0(v0: i8x16):
@@ -28,14 +23,9 @@ block0(v0: i8x16):
   return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   uaddlp v0.8h, v0.16b
-;   Inst 1:   ret
-; }}
+; block0:
+;   uaddlp v0.8h, v0.16b
+;   ret
 
 function %fn3(i16x8) -> i32x4 {
 block0(v0: i16x8):
@@ -45,14 +35,9 @@ block0(v0: i16x8):
   return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   saddlp v0.4s, v0.8h
-;   Inst 1:   ret
-; }}
+; block0:
+;   saddlp v0.4s, v0.8h
+;   ret
 
 function %fn4(i16x8) -> i32x4 {
 block0(v0: i16x8):
@@ -62,14 +47,9 @@ block0(v0: i16x8):
   return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   uaddlp v0.4s, v0.8h
-;   Inst 1:   ret
-; }}
+; block0:
+;   uaddlp v0.4s, v0.8h
+;   ret
 
 function %fn5(i8x16, i8x16) -> i16x8 {
 block0(v0: i8x16, v1: i8x16):
@@ -79,16 +59,11 @@ block0(v0: i8x16, v1: i8x16):
   return v4
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 4)
-;   Inst 0:   sxtl v0.8h, v0.8b
-;   Inst 1:   sxtl2 v1.8h, v1.16b
-;   Inst 2:   addp v0.8h, v0.8h, v1.8h
-;   Inst 3:   ret
-; }}
+; block0:
+;   sxtl v4.8h, v0.8b
+;   sxtl2 v6.8h, v1.16b
+;   addp v0.8h, v4.8h, v6.8h
+;   ret
 
 function %fn6(i8x16, i8x16) -> i16x8 {
 block0(v0: i8x16, v1: i8x16):
@@ -98,16 +73,11 @@ block0(v0: i8x16, v1: i8x16):
   return v4
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 4)
-;   Inst 0:   uxtl v0.8h, v0.8b
-;   Inst 1:   uxtl2 v1.8h, v1.16b
-;   Inst 2:   addp v0.8h, v0.8h, v1.8h
-;   Inst 3:   ret
-; }}
+; block0:
+;   uxtl v4.8h, v0.8b
+;   uxtl2 v6.8h, v1.16b
+;   addp v0.8h, v4.8h, v6.8h
+;   ret
 
 function %fn7(i8x16) -> i16x8 {
 block0(v0: i8x16):
@@ -117,16 +87,11 @@ block0(v0: i8x16):
   return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 4)
-;   Inst 0:   uxtl v1.8h, v0.8b
-;   Inst 1:   sxtl2 v0.8h, v0.16b
-;   Inst 2:   addp v0.8h, v1.8h, v0.8h
-;   Inst 3:   ret
-; }}
+; block0:
+;   uxtl v2.8h, v0.8b
+;   sxtl2 v4.8h, v0.16b
+;   addp v0.8h, v2.8h, v4.8h
+;   ret
 
 function %fn8(i8x16) -> i16x8 {
 block0(v0: i8x16):
@@ -136,14 +101,9 @@ block0(v0: i8x16):
   return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 4)
-;   Inst 0:   sxtl v1.8h, v0.8b
-;   Inst 1:   uxtl2 v0.8h, v0.16b
-;   Inst 2:   addp v0.8h, v1.8h, v0.8h
-;   Inst 3:   ret
-; }}
+; block0:
+;   sxtl v2.8h, v0.8b
+;   uxtl2 v4.8h, v0.16b
+;   addp v0.8h, v2.8h, v4.8h
+;   ret
 
diff --git a/cranelift/filetests/filetests/isa/aarch64/simd.clif b/cranelift/filetests/filetests/isa/aarch64/simd.clif
index 9514c79a1f..166d27b80b 100644
--- a/cranelift/filetests/filetests/isa/aarch64/simd.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/simd.clif
@@ -9,16 +9,11 @@ block0:
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 4)
-;   Inst 0:   movz x0, #1
-;   Inst 1:   movk x0, #1, LSL #48
-;   Inst 2:   dup v0.2d, x0
-;   Inst 3:   ret
-; }}
+; block0:
+;   movz x2, #1
+;   movk x2, #1, LSL #48
+;   dup v0.2d, x2
+;   ret
 
 function %f2() -> i16x8 {
 block0:
@@ -28,15 +23,10 @@ block0:
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   movz x0, #42679
-;   Inst 1:   dup v0.8h, w0
-;   Inst 2:   ret
-; }}
+; block0:
+;   movz x2, #42679
+;   dup v0.8h, w2
+;   ret
 
 function %f3() -> b8x16 {
 block0:
@@ -46,14 +36,9 @@ block0:
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   movi v0.16b, #255
-;   Inst 1:   ret
-; }}
+; block0:
+;   movi v0.16b, #255
+;   ret
 
 function %f4(i32, i8x16, i8x16) -> i8x16 {
 block0(v0: i32, v1: i8x16, v2: i8x16):
@@ -61,15 +46,10 @@ block0(v0: i32, v1: i8x16, v2: i8x16):
    return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   subs wzr, w0, wzr
-;   Inst 1:   vcsel v0.16b, v0.16b, v1.16b, ne (if-then-else diamond)
-;   Inst 2:   ret
-; }}
+; block0:
+;   subs wzr, w0, wzr
+;   vcsel v0.16b, v0.16b, v1.16b, ne (if-then-else diamond)
+;   ret
 
 function %f5(i64) -> i8x16 {
 block0(v0: i64):
@@ -78,14 +58,9 @@ block0(v0: i64):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   ld1r { v0.16b }, [x0]
-;   Inst 1:   ret
-; }}
+; block0:
+;   ld1r { v0.16b }, [x0]
+;   ret
 
 function %f6(i64, i64) -> i8x16, i8x16 {
 block0(v0: i64, v1: i64):
@@ -96,15 +71,10 @@ block0(v0: i64, v1: i64):
   return v4, v5
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   ld1r { v0.16b }, [x0]
-;   Inst 1:   ld1r { v1.16b }, [x1]
-;   Inst 2:   ret
-; }}
+; block0:
+;   ld1r { v0.16b }, [x0]
+;   ld1r { v1.16b }, [x1]
+;   ret
 
 function %f7(i64, i64) -> i8x16, i8x16 {
 block0(v0: i64, v1: i64):
@@ -115,16 +85,11 @@ block0(v0: i64, v1: i64):
   return v4, v5
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 4)
-;   Inst 0:   ldrb w0, [x0]
-;   Inst 1:   ld1r { v0.16b }, [x1]
-;   Inst 2:   dup v1.16b, w0
-;   Inst 3:   ret
-; }}
+; block0:
+;   ldrb w4, [x0]
+;   ld1r { v0.16b }, [x1]
+;   dup v1.16b, w4
+;   ret
 
 function %f8(i64, i64) -> i8x16, i8x16 {
 block0(v0: i64, v1: i64):
@@ -134,16 +99,11 @@ block0(v0: i64, v1: i64):
   return v3, v4
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 4)
-;   Inst 0:   ldrb w0, [x0]
-;   Inst 1:   dup v0.16b, w0
-;   Inst 2:   dup v1.16b, w0
-;   Inst 3:   ret
-; }}
+; block0:
+;   ldrb w4, [x0]
+;   dup v0.16b, w4
+;   dup v1.16b, w4
+;   ret
 
 function %f9() -> i32x2 {
 block0:
@@ -152,15 +112,10 @@ block0:
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   movi v0.2d, #18374687579166474495
-;   Inst 1:   fmov d0, d0
-;   Inst 2:   ret
-; }}
+; block0:
+;   movi v0.2d, #18374687579166474495
+;   fmov d0, d0
+;   ret
 
 function %f10() -> i32x4 {
 block0:
@@ -169,14 +124,9 @@ block0:
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   mvni v0.4s, #15, MSL #16
-;   Inst 1:   ret
-; }}
+; block0:
+;   mvni v0.4s, #15, MSL #16
+;   ret
 
 function %f11() -> f32x4 {
 block0:
@@ -185,12 +135,7 @@ block0:
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   fmov v0.4s, #1.3125
-;   Inst 1:   ret
-; }}
+; block0:
+;   fmov v0.4s, #1.3125
+;   ret
 
diff --git a/cranelift/filetests/filetests/isa/aarch64/simd_load_zero.clif b/cranelift/filetests/filetests/isa/aarch64/simd_load_zero.clif
index f39438c883..894ed03775 100644
--- a/cranelift/filetests/filetests/isa/aarch64/simd_load_zero.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/simd_load_zero.clif
@@ -9,16 +9,11 @@ block0:
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 4)
-;   Inst 0:   movz x0, #1
-;   Inst 1:   movk x0, #1, LSL #48
-;   Inst 2:   fmov d0, x0
-;   Inst 3:   ret
-; }}
+; block0:
+;   movz x2, #1
+;   movk x2, #1, LSL #48
+;   fmov d0, x2
+;   ret
 
 function %f2() -> i32x4 {
 block0:
@@ -27,13 +22,8 @@ block0:
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   movz x0, #42679
-;   Inst 1:   fmov s0, w0
-;   Inst 2:   ret
-; }}
+; block0:
+;   movz x2, #42679
+;   fmov s0, w2
+;   ret
 
diff --git a/cranelift/filetests/filetests/isa/aarch64/stack-limit.clif b/cranelift/filetests/filetests/isa/aarch64/stack-limit.clif
index 5ccabd8acf..993d63c3cc 100644
--- a/cranelift/filetests/filetests/isa/aarch64/stack-limit.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/stack-limit.clif
@@ -7,26 +7,16 @@ block0:
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 1)
-;   Inst 0:   ret
-; }}
+; block0:
+;   ret
 
 function %stack_limit_leaf_zero(i64 stack_limit) {
 block0(v0: i64):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 1)
-;   Inst 0:   ret
-; }}
+; block0:
+;   ret
 
 function %stack_limit_gv_leaf_zero(i64 vmctx) {
     gv0 = vmctx
@@ -37,13 +27,8 @@ block0(v0: i64):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 1)
-;   Inst 0:   ret
-; }}
+; block0:
+;   ret
 
 function %stack_limit_call_zero(i64 stack_limit) {
     fn0 = %foo()
@@ -52,20 +37,15 @@ block0(v0: i64):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 8)
-;   Inst 0:   stp fp, lr, [sp, #-16]!
-;   Inst 1:   mov fp, sp
-;   Inst 2:   subs xzr, sp, x0, UXTX
-;   Inst 3:   b.hs 8 ; udf
-;   Inst 4:   ldr x0, 8 ; b 12 ; data TestCase { length: 3, ascii: [102, 111, 111, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0
-;   Inst 5:   blr x0
-;   Inst 6:   ldp fp, lr, [sp], #16
-;   Inst 7:   ret
-; }}
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   subs xzr, sp, x0, UXTX
+;   b.hs 8 ; udf
+; block0:
+;   ldr x2, 8 ; b 12 ; data TestCase { length: 3, ascii: [102, 111, 111, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0
+;   blr x2
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %stack_limit_gv_call_zero(i64 vmctx) {
     gv0 = vmctx
@@ -78,22 +58,17 @@ block0(v0: i64):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 10)
-;   Inst 0:   stp fp, lr, [sp, #-16]!
-;   Inst 1:   mov fp, sp
-;   Inst 2:   ldur x16, [x0]
-;   Inst 3:   ldur x16, [x16, #4]
-;   Inst 4:   subs xzr, sp, x16, UXTX
-;   Inst 5:   b.hs 8 ; udf
-;   Inst 6:   ldr x0, 8 ; b 12 ; data TestCase { length: 3, ascii: [102, 111, 111, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0
-;   Inst 7:   blr x0
-;   Inst 8:   ldp fp, lr, [sp], #16
-;   Inst 9:   ret
-; }}
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   ldr x16, [x0]
+;   ldr x16, [x16, #4]
+;   subs xzr, sp, x16, UXTX
+;   b.hs 8 ; udf
+; block0:
+;   ldr x2, 8 ; b 12 ; data TestCase { length: 3, ascii: [102, 111, 111, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0
+;   blr x2
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %stack_limit(i64 stack_limit) {
     ss0 = explicit_slot 168
@@ -101,21 +76,16 @@ block0(v0: i64):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 9)
-;   Inst 0:   stp fp, lr, [sp, #-16]!
-;   Inst 1:   mov fp, sp
-;   Inst 2:   add x16, x0, #176
-;   Inst 3:   subs xzr, sp, x16, UXTX
-;   Inst 4:   b.hs 8 ; udf
-;   Inst 5:   sub sp, sp, #176
-;   Inst 6:   add sp, sp, #176
-;   Inst 7:   ldp fp, lr, [sp], #16
-;   Inst 8:   ret
-; }}
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   add x16, x0, #176
+;   subs xzr, sp, x16, UXTX
+;   b.hs 8 ; udf
+;   sub sp, sp, #176
+; block0:
+;   add sp, sp, #176
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %huge_stack_limit(i64 stack_limit) {
     ss0 = explicit_slot 400000
@@ -123,29 +93,24 @@ block0(v0: i64):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 17)
-;   Inst 0:   stp fp, lr, [sp, #-16]!
-;   Inst 1:   mov fp, sp
-;   Inst 2:   subs xzr, sp, x0, UXTX
-;   Inst 3:   b.hs 8 ; udf
-;   Inst 4:   movz w17, #6784
-;   Inst 5:   movk w17, #6, LSL #16
-;   Inst 6:   add x16, x0, x17, UXTX
-;   Inst 7:   subs xzr, sp, x16, UXTX
-;   Inst 8:   b.hs 8 ; udf
-;   Inst 9:   movz w16, #6784
-;   Inst 10:   movk w16, #6, LSL #16
-;   Inst 11:   sub sp, sp, x16, UXTX
-;   Inst 12:   movz w16, #6784
-;   Inst 13:   movk w16, #6, LSL #16
-;   Inst 14:   add sp, sp, x16, UXTX
-;   Inst 15:   ldp fp, lr, [sp], #16
-;   Inst 16:   ret
-; }}
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   subs xzr, sp, x0, UXTX
+;   b.hs 8 ; udf
+;   movz w17, #6784
+;   movk w17, #6, LSL #16
+;   add x16, x0, x17, UXTX
+;   subs xzr, sp, x16, UXTX
+;   b.hs 8 ; udf
+;   movz w16, #6784
+;   movk w16, #6, LSL #16
+;   sub sp, sp, x16, UXTX
+; block0:
+;   movz w16, #6784
+;   movk w16, #6, LSL #16
+;   add sp, sp, x16, UXTX
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %limit_preamble(i64 vmctx) {
     gv0 = vmctx
@@ -157,23 +122,18 @@ block0(v0: i64):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 11)
-;   Inst 0:   stp fp, lr, [sp, #-16]!
-;   Inst 1:   mov fp, sp
-;   Inst 2:   ldur x16, [x0]
-;   Inst 3:   ldur x16, [x16, #4]
-;   Inst 4:   add x16, x16, #32
-;   Inst 5:   subs xzr, sp, x16, UXTX
-;   Inst 6:   b.hs 8 ; udf
-;   Inst 7:   sub sp, sp, #32
-;   Inst 8:   add sp, sp, #32
-;   Inst 9:   ldp fp, lr, [sp], #16
-;   Inst 10:   ret
-; }}
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   ldr x16, [x0]
+;   ldr x16, [x16, #4]
+;   add x16, x16, #32
+;   subs xzr, sp, x16, UXTX
+;   b.hs 8 ; udf
+;   sub sp, sp, #32
+; block0:
+;   add sp, sp, #32
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %limit_preamble_huge(i64 vmctx) {
     gv0 = vmctx
@@ -185,31 +145,26 @@ block0(v0: i64):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 19)
-;   Inst 0:   stp fp, lr, [sp, #-16]!
-;   Inst 1:   mov fp, sp
-;   Inst 2:   ldur x16, [x0]
-;   Inst 3:   ldur x16, [x16, #4]
-;   Inst 4:   subs xzr, sp, x16, UXTX
-;   Inst 5:   b.hs 8 ; udf
-;   Inst 6:   movz w17, #6784
-;   Inst 7:   movk w17, #6, LSL #16
-;   Inst 8:   add x16, x16, x17, UXTX
-;   Inst 9:   subs xzr, sp, x16, UXTX
-;   Inst 10:   b.hs 8 ; udf
-;   Inst 11:   movz w16, #6784
-;   Inst 12:   movk w16, #6, LSL #16
-;   Inst 13:   sub sp, sp, x16, UXTX
-;   Inst 14:   movz w16, #6784
-;   Inst 15:   movk w16, #6, LSL #16
-;   Inst 16:   add sp, sp, x16, UXTX
-;   Inst 17:   ldp fp, lr, [sp], #16
-;   Inst 18:   ret
-; }}
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   ldr x16, [x0]
+;   ldr x16, [x16, #4]
+;   subs xzr, sp, x16, UXTX
+;   b.hs 8 ; udf
+;   movz w17, #6784
+;   movk w17, #6, LSL #16
+;   add x16, x16, x17, UXTX
+;   subs xzr, sp, x16, UXTX
+;   b.hs 8 ; udf
+;   movz w16, #6784
+;   movk w16, #6, LSL #16
+;   sub sp, sp, x16, UXTX
+; block0:
+;   movz w16, #6784
+;   movk w16, #6, LSL #16
+;   add sp, sp, x16, UXTX
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %limit_preamble_huge_offset(i64 vmctx) {
     gv0 = vmctx
@@ -220,20 +175,15 @@ block0(v0: i64):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 10)
-;   Inst 0:   stp fp, lr, [sp, #-16]!
-;   Inst 1:   mov fp, sp
-;   Inst 2:   movz w16, #6784 ; movk w16, #6, LSL #16 ; add x16, x0, x16, UXTX ; ldr x16, [x16]
-;   Inst 3:   add x16, x16, #32
-;   Inst 4:   subs xzr, sp, x16, UXTX
-;   Inst 5:   b.hs 8 ; udf
-;   Inst 6:   sub sp, sp, #32
-;   Inst 7:   add sp, sp, #32
-;   Inst 8:   ldp fp, lr, [sp], #16
-;   Inst 9:   ret
-; }}
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   movz w16, #6784 ; movk w16, #6, LSL #16 ; add x16, x0, x16, UXTX ; ldr x16, [x16]
+;   add x16, x16, #32
+;   subs xzr, sp, x16, UXTX
+;   b.hs 8 ; udf
+;   sub sp, sp, #32
+; block0:
+;   add sp, sp, #32
+;   ldp fp, lr, [sp], #16
+;   ret
 
diff --git a/cranelift/filetests/filetests/isa/aarch64/stack.clif b/cranelift/filetests/filetests/isa/aarch64/stack.clif
index a816eeb4d6..a5ebd29a9e 100644
--- a/cranelift/filetests/filetests/isa/aarch64/stack.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/stack.clif
@@ -1,4 +1,4 @@
-test compile
+test compile precise-output
 set unwind_info=false
 target aarch64
 
@@ -10,14 +10,14 @@ block0:
   return v0
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: sub sp, sp, #16
-; nextln: mov x0, sp
-; nextln: add sp, sp, #16
-; nextln: ldp fp, lr, [sp], #16
-; nextln: ret
-
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   sub sp, sp, #16
+; block0:
+;   mov x0, sp
+;   add sp, sp, #16
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %stack_addr_big() -> i64 {
 ss0 = explicit_slot 100000
@@ -28,20 +28,18 @@ block0:
   return v0
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movz w16, #34480
-; nextln: movk w16, #1, LSL #16
-; nextln: sub sp, sp, x16, UXTX
-; nextln: mov x0, sp
-; nextln: movz w16, #34480
-; nextln: movk w16, #1, LSL #16
-; nextln: add sp, sp, x16, UXTX
-; nextln: ldp fp, lr, [sp], #16
-; nextln: ret
-
-
-; FIXME: don't use stack_addr legalization for stack_load and stack_store
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   movz w16, #34480
+;   movk w16, #1, LSL #16
+;   sub sp, sp, x16, UXTX
+; block0:
+;   mov x0, sp
+;   movz w16, #34480
+;   movk w16, #1, LSL #16
+;   add sp, sp, x16, UXTX
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %stack_load_small() -> i64 {
 ss0 = explicit_slot 8
@@ -51,15 +49,15 @@ block0:
   return v0
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: sub sp, sp, #16
-; nextln: mov x0, sp
-; nextln: ldr x0, [x0]
-; nextln: add sp, sp, #16
-; nextln: ldp fp, lr, [sp], #16
-; nextln: ret
-
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   sub sp, sp, #16
+; block0:
+;   mov x0, sp
+;   ldr x0, [x0]
+;   add sp, sp, #16
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %stack_load_big() -> i64 {
 ss0 = explicit_slot 100000
@@ -70,19 +68,19 @@ block0:
   return v0
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movz w16, #34480
-; nextln: movk w16, #1, LSL #16
-; nextln: sub sp, sp, x16, UXTX
-; nextln: mov x0, sp
-; nextln: ldr x0, [x0]
-; nextln: movz w16, #34480
-; nextln: movk w16, #1, LSL #16
-; nextln: add sp, sp, x16, UXTX
-; nextln: ldp fp, lr, [sp], #16
-; nextln: ret
-
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   movz w16, #34480
+;   movk w16, #1, LSL #16
+;   sub sp, sp, x16, UXTX
+; block0:
+;   mov x0, sp
+;   ldr x0, [x0]
+;   movz w16, #34480
+;   movk w16, #1, LSL #16
+;   add sp, sp, x16, UXTX
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %stack_store_small(i64) {
 ss0 = explicit_slot 8
@@ -92,15 +90,15 @@ block0(v0: i64):
   return
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: sub sp, sp, #16
-; nextln: mov x1, sp
-; nextln: str x0, [x1]
-; nextln: add sp, sp, #16
-; nextln: ldp fp, lr, [sp], #16
-; nextln: ret
-
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   sub sp, sp, #16
+; block0:
+;   mov x2, sp
+;   str x0, [x2]
+;   add sp, sp, #16
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %stack_store_big(i64) {
 ss0 = explicit_slot 100000
@@ -111,21 +109,20 @@ block0(v0: i64):
   return
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movz w16, #34480
-; nextln: movk w16, #1, LSL #16
-; nextln: sub sp, sp, x16, UXTX
-; nextln: mov x1, sp
-; nextln: str x0, [x1]
-; nextln: movz w16, #34480
-; nextln: movk w16, #1, LSL #16
-; nextln: add sp, sp, x16, UXTX
-; nextln: ldp fp, lr, [sp], #16
-; nextln: ret
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   movz w16, #34480
+;   movk w16, #1, LSL #16
+;   sub sp, sp, x16, UXTX
+; block0:
+;   mov x2, sp
+;   str x0, [x2]
+;   movz w16, #34480
+;   movk w16, #1, LSL #16
+;   add sp, sp, x16, UXTX
+;   ldp fp, lr, [sp], #16
+;   ret
 
-; Force a b1 to be spilled into a slot at an SP offset between 0x100 and
-; 0x1fff, to exercise the scaled addressing mode.
 function %b1_spill_slot(b1) -> b1, i64 {
     ss0 = explicit_slot 1000
 
@@ -277,7 +274,161 @@ block0(v0: b1):
   return v0, v137
 }
 
-
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   stp x27, x28, [sp, #-16]!
+;   stp x25, x26, [sp, #-16]!
+;   stp x23, x24, [sp, #-16]!
+;   stp x21, x22, [sp, #-16]!
+;   stp x19, x20, [sp, #-16]!
+;   sub sp, sp, #1152
+; block0:
+;   str x0, [sp, #1000]
+;   movz x8, #2
+;   add x11, x8, #1
+;   str x11, [sp, #1136]
+;   movz x8, #4
+;   add x12, x8, #3
+;   str x12, [sp, #1128]
+;   movz x8, #6
+;   add x13, x8, #5
+;   str x13, [sp, #1120]
+;   movz x8, #8
+;   add x14, x8, #7
+;   str x14, [sp, #1112]
+;   movz x8, #10
+;   add x15, x8, #9
+;   str x15, [sp, #1104]
+;   movz x8, #12
+;   add x1, x8, #11
+;   str x1, [sp, #1096]
+;   movz x8, #14
+;   add x2, x8, #13
+;   str x2, [sp, #1088]
+;   movz x8, #16
+;   add x3, x8, #15
+;   str x3, [sp, #1080]
+;   movz x8, #18
+;   add x4, x8, #17
+;   str x4, [sp, #1072]
+;   movz x8, #20
+;   add x5, x8, #19
+;   str x5, [sp, #1064]
+;   movz x8, #22
+;   add x6, x8, #21
+;   str x6, [sp, #1056]
+;   movz x8, #24
+;   add x7, x8, #23
+;   str x7, [sp, #1048]
+;   movz x8, #26
+;   add x8, x8, #25
+;   str x8, [sp, #1040]
+;   movz x8, #28
+;   add x9, x8, #27
+;   str x9, [sp, #1032]
+;   movz x8, #30
+;   add x26, x8, #29
+;   str x26, [sp, #1024]
+;   movz x8, #32
+;   add x27, x8, #31
+;   str x27, [sp, #1016]
+;   movz x8, #34
+;   add x28, x8, #33
+;   movz x8, #36
+;   add x21, x8, #35
+;   str x21, [sp, #1008]
+;   movz x8, #38
+;   add x21, x8, #37
+;   movz x8, #30
+;   add x19, x8, #39
+;   movz x8, #32
+;   add x20, x8, #31
+;   movz x8, #34
+;   add x22, x8, #33
+;   movz x8, #36
+;   add x23, x8, #35
+;   movz x8, #38
+;   add x24, x8, #37
+;   movz x8, #30
+;   add x25, x8, #39
+;   movz x8, #32
+;   add x0, x8, #31
+;   movz x8, #34
+;   add x10, x8, #33
+;   movz x8, #36
+;   add x11, x8, #35
+;   movz x8, #38
+;   add x12, x8, #37
+;   movz x8, #30
+;   add x13, x8, #39
+;   movz x8, #32
+;   add x14, x8, #31
+;   movz x8, #34
+;   add x15, x8, #33
+;   movz x8, #36
+;   add x1, x8, #35
+;   movz x8, #38
+;   add x2, x8, #37
+;   ldr x3, [sp, #1136]
+;   add x3, x3, #39
+;   ldr x5, [sp, #1120]
+;   ldr x4, [sp, #1128]
+;   add x4, x4, x5
+;   ldr x5, [sp, #1104]
+;   ldr x8, [sp, #1112]
+;   add x5, x8, x5
+;   ldr x6, [sp, #1088]
+;   ldr x7, [sp, #1096]
+;   add x6, x7, x6
+;   ldr x7, [sp, #1072]
+;   ldr x8, [sp, #1080]
+;   add x7, x8, x7
+;   ldr x9, [sp, #1056]
+;   ldr x8, [sp, #1064]
+;   add x8, x8, x9
+;   ldr x9, [sp, #1040]
+;   ldr x26, [sp, #1048]
+;   add x9, x26, x9
+;   ldr x26, [sp, #1024]
+;   ldr x27, [sp, #1032]
+;   add x26, x27, x26
+;   ldr x27, [sp, #1016]
+;   add x27, x27, x28
+;   ldr x28, [sp, #1008]
+;   add x28, x28, x21
+;   add x21, x19, x20
+;   add x19, x22, x23
+;   add x25, x24, x25
+;   add x10, x0, x10
+;   add x11, x11, x12
+;   add x12, x13, x14
+;   add x13, x15, x1
+;   add x14, x2, x3
+;   add x15, x4, x5
+;   add x0, x6, x7
+;   add x9, x8, x9
+;   add x1, x26, x27
+;   add x2, x28, x21
+;   add x3, x19, x25
+;   add x10, x10, x11
+;   add x11, x12, x13
+;   add x12, x14, x15
+;   add x9, x0, x9
+;   add x13, x1, x2
+;   add x10, x3, x10
+;   add x11, x11, x12
+;   add x9, x9, x13
+;   add x10, x10, x11
+;   add x1, x9, x10
+;   ldr x0, [sp, #1000]
+;   add sp, sp, #1152
+;   ldp x19, x20, [sp], #16
+;   ldp x21, x22, [sp], #16
+;   ldp x23, x24, [sp], #16
+;   ldp x25, x26, [sp], #16
+;   ldp x27, x28, [sp], #16
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %i128_stack_store(i128) {
 ss0 = explicit_slot 16
@@ -286,15 +437,16 @@ block0(v0: i128):
   stack_store.i128 v0, ss0
   return
 }
-; TODO: Codegen improvement opportunities: This should be just a stp x0, x1, [sp, #-16]
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: sub sp, sp, #16
-; nextln: mov x2, sp
-; nextln: stp x0, x1, [x2]
-; nextln: add sp, sp, #16
-; nextln: ldp fp, lr, [sp], #16
 
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   sub sp, sp, #16
+; block0:
+;   mov x4, sp
+;   stp x0, x1, [x4]
+;   add sp, sp, #16
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %i128_stack_store_inst_offset(i128) {
 ss0 = explicit_slot 16
@@ -304,15 +456,16 @@ block0(v0: i128):
   stack_store.i128 v0, ss1+16
   return
 }
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: sub sp, sp, #32
-; nextln: add x2, sp, #32
-; nextln: stp x0, x1, [x2]
-; nextln: add sp, sp, #32
-; nextln: ldp fp, lr, [sp], #16
-; nextln: ret
 
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   sub sp, sp, #32
+; block0:
+;   add x4, sp, #32
+;   stp x0, x1, [x4]
+;   add sp, sp, #32
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %i128_stack_store_big(i128) {
 ss0 = explicit_slot 100000
@@ -322,20 +475,20 @@ block0(v0: i128):
   stack_store.i128 v0, ss0
   return
 }
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movz w16, #34480
-; nextln: movk w16, #1, LSL #16
-; nextln: sub sp, sp, x16, UXTX
-; nextln: mov x2, sp
-; nextln: stp x0, x1, [x2]
-; nextln: movz w16, #34480
-; nextln: movk w16, #1, LSL #16
-; nextln: add sp, sp, x16, UXTX
-; nextln: ldp fp, lr, [sp], #16
-; nextln: ret
-
 
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   movz w16, #34480
+;   movk w16, #1, LSL #16
+;   sub sp, sp, x16, UXTX
+; block0:
+;   mov x4, sp
+;   stp x0, x1, [x4]
+;   movz w16, #34480
+;   movk w16, #1, LSL #16
+;   add sp, sp, x16, UXTX
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %i128_stack_load() -> i128 {
 ss0 = explicit_slot 16
@@ -344,20 +497,16 @@ block0:
   v0 = stack_load.i128 ss0
   return v0
 }
-; TODO: Codegen improvement opportunities: This should be just a ldp x0, x1, [sp, #-16]
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: sub sp, sp, #16
-; nextln: mov x0, sp
-; nextln: ldp x1, x0, [x0]
-; nextln: mov x2, x0
-; nextln: mov x0, x1
-; nextln: mov x1, x2
-; nextln: add sp, sp, #16
-; nextln: ldp fp, lr, [sp], #16
-; nextln: ret
-
 
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   sub sp, sp, #16
+; block0:
+;   mov x0, sp
+;   ldp x0, x1, [x0]
+;   add sp, sp, #16
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %i128_stack_load_inst_offset() -> i128 {
 ss0 = explicit_slot 16
@@ -367,19 +516,16 @@ block0:
   v0 = stack_load.i128 ss1+16
   return v0
 }
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: sub sp, sp, #32
-; nextln: add x0, sp, #32
-; nextln: ldp x1, x0, [x0]
-; nextln: mov x2, x0
-; nextln: mov x0, x1
-; nextln: mov x1, x2
-; nextln: add sp, sp, #32
-; nextln: ldp fp, lr, [sp], #16
-; nextln: ret
-
 
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   sub sp, sp, #32
+; block0:
+;   add x0, sp, #32
+;   ldp x0, x1, [x0]
+;   add sp, sp, #32
+;   ldp fp, lr, [sp], #16
+;   ret
 
 function %i128_stack_load_big() -> i128 {
 ss0 = explicit_slot 100000
@@ -389,18 +535,18 @@ block0:
   v0 = stack_load.i128 ss0
   return v0
 }
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movz w16, #34480
-; nextln: movk w16, #1, LSL #16
-; nextln: sub sp, sp, x16, UXTX
-; nextln: mov x0, sp
-; nextln: ldp x1, x0, [x0]
-; nextln: mov x2, x0
-; nextln: mov x0, x1
-; nextln: mov x1, x2
-; nextln: movz w16, #34480
-; nextln: movk w16, #1, LSL #16
-; nextln: add sp, sp, x16, UXTX
-; nextln: ldp fp, lr, [sp], #16
-; nextln: ret
+
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   movz w16, #34480
+;   movk w16, #1, LSL #16
+;   sub sp, sp, x16, UXTX
+; block0:
+;   mov x0, sp
+;   ldp x0, x1, [x0]
+;   movz w16, #34480
+;   movk w16, #1, LSL #16
+;   add sp, sp, x16, UXTX
+;   ldp fp, lr, [sp], #16
+;   ret
+
diff --git a/cranelift/filetests/filetests/isa/aarch64/symbol-value.clif b/cranelift/filetests/filetests/isa/aarch64/symbol-value.clif
index c1c695d9ae..b9eecef1bd 100644
--- a/cranelift/filetests/filetests/isa/aarch64/symbol-value.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/symbol-value.clif
@@ -10,12 +10,7 @@ block0:
   return v0
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   ldr x0, 8 ; b 12 ; data TestCase { length: 9, ascii: [109, 121, 95, 103, 108, 111, 98, 97, 108, 0, 0, 0, 0, 0, 0, 0] } + 0
-;   Inst 1:   ret
-; }}
+; block0:
+;   ldr x0, 8 ; b 12 ; data TestCase { length: 9, ascii: [109, 121, 95, 103, 108, 111, 98, 97, 108, 0, 0, 0, 0, 0, 0, 0] } + 0
+;   ret
 
diff --git a/cranelift/filetests/filetests/isa/aarch64/tls-elf-gd.clif b/cranelift/filetests/filetests/isa/aarch64/tls-elf-gd.clif
index fa81e520db..7cc42b191b 100644
--- a/cranelift/filetests/filetests/isa/aarch64/tls-elf-gd.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/tls-elf-gd.clif
@@ -10,28 +10,23 @@ block0(v0: i32):
     return v0, v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 18)
-;   Inst 0:   stp fp, lr, [sp, #-16]!
-;   Inst 1:   mov fp, sp
-;   Inst 2:   str x19, [sp, #-16]!
-;   Inst 3:   stp d14, d15, [sp, #-16]!
-;   Inst 4:   stp d12, d13, [sp, #-16]!
-;   Inst 5:   stp d10, d11, [sp, #-16]!
-;   Inst 6:   stp d8, d9, [sp, #-16]!
-;   Inst 7:   mov x19, x0
-;   Inst 8:   elf_tls_get_addr u1:0
-;   Inst 9:   mov x1, x0
-;   Inst 10:   mov x0, x19
-;   Inst 11:   ldp d8, d9, [sp], #16
-;   Inst 12:   ldp d10, d11, [sp], #16
-;   Inst 13:   ldp d12, d13, [sp], #16
-;   Inst 14:   ldp d14, d15, [sp], #16
-;   Inst 15:   ldr x19, [sp], #16
-;   Inst 16:   ldp fp, lr, [sp], #16
-;   Inst 17:   ret
-; }}
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   str x25, [sp, #-16]!
+;   stp d14, d15, [sp, #-16]!
+;   stp d12, d13, [sp, #-16]!
+;   stp d10, d11, [sp, #-16]!
+;   stp d8, d9, [sp, #-16]!
+; block0:
+;   mov x25, x0
+;   elf_tls_get_addr u1:0
+;   mov x1, x0
+;   mov x0, x25
+;   ldp d8, d9, [sp], #16
+;   ldp d10, d11, [sp], #16
+;   ldp d12, d13, [sp], #16
+;   ldp d14, d15, [sp], #16
+;   ldr x25, [sp], #16
+;   ldp fp, lr, [sp], #16
+;   ret
 
diff --git a/cranelift/filetests/filetests/isa/aarch64/traps.clif b/cranelift/filetests/filetests/isa/aarch64/traps.clif
index 34c59f4678..e01b3e77cb 100644
--- a/cranelift/filetests/filetests/isa/aarch64/traps.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/traps.clif
@@ -7,13 +7,8 @@ block0:
   trap user0
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 1)
-;   Inst 0:   udf
-; }}
+; block0:
+;   udf
 
 function %g(i64) {
 block0(v0: i64):
@@ -23,15 +18,10 @@ block0(v0: i64):
   return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 3)
-;   Inst 0:   subs xzr, x0, #42
-;   Inst 1:   b.ne 8 ; udf
-;   Inst 2:   ret
-; }}
+; block0:
+;   subs xzr, x0, #42
+;   b.ne 8 ; udf
+;   ret
 
 function %h() {
 block0:
@@ -39,12 +29,7 @@ block0:
   return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   brk #0
-;   Inst 1:   ret
-; }}
+; block0:
+;   brk #0
+;   ret
 
diff --git a/cranelift/filetests/filetests/isa/aarch64/uextend-sextend.clif b/cranelift/filetests/filetests/isa/aarch64/uextend-sextend.clif
index 46b08b60b5..a13f20c555 100644
--- a/cranelift/filetests/filetests/isa/aarch64/uextend-sextend.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/uextend-sextend.clif
@@ -8,14 +8,9 @@ block0(v0: i8):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   uxtb w0, w0
-;   Inst 1:   ret
-; }}
+; block0:
+;   uxtb w0, w0
+;   ret
 
 function %f_u_8_32(i8) -> i32 {
 block0(v0: i8):
@@ -23,14 +18,9 @@ block0(v0: i8):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   uxtb w0, w0
-;   Inst 1:   ret
-; }}
+; block0:
+;   uxtb w0, w0
+;   ret
 
 function %f_u_8_16(i8) -> i16 {
 block0(v0: i8):
@@ -38,14 +28,9 @@ block0(v0: i8):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   uxtb w0, w0
-;   Inst 1:   ret
-; }}
+; block0:
+;   uxtb w0, w0
+;   ret
 
 function %f_s_8_64(i8) -> i64 {
 block0(v0: i8):
@@ -53,14 +38,9 @@ block0(v0: i8):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   sxtb x0, w0
-;   Inst 1:   ret
-; }}
+; block0:
+;   sxtb x0, w0
+;   ret
 
 function %f_s_8_32(i8) -> i32 {
 block0(v0: i8):
@@ -68,14 +48,9 @@ block0(v0: i8):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   sxtb w0, w0
-;   Inst 1:   ret
-; }}
+; block0:
+;   sxtb w0, w0
+;   ret
 
 function %f_s_8_16(i8) -> i16 {
 block0(v0: i8):
@@ -83,14 +58,9 @@ block0(v0: i8):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   sxtb w0, w0
-;   Inst 1:   ret
-; }}
+; block0:
+;   sxtb w0, w0
+;   ret
 
 function %f_u_16_64(i16) -> i64 {
 block0(v0: i16):
@@ -98,14 +68,9 @@ block0(v0: i16):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   uxth w0, w0
-;   Inst 1:   ret
-; }}
+; block0:
+;   uxth w0, w0
+;   ret
 
 function %f_u_16_32(i16) -> i32 {
 block0(v0: i16):
@@ -113,14 +78,9 @@ block0(v0: i16):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   uxth w0, w0
-;   Inst 1:   ret
-; }}
+; block0:
+;   uxth w0, w0
+;   ret
 
 function %f_s_16_64(i16) -> i64 {
 block0(v0: i16):
@@ -128,14 +88,9 @@ block0(v0: i16):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   sxth x0, w0
-;   Inst 1:   ret
-; }}
+; block0:
+;   sxth x0, w0
+;   ret
 
 function %f_s_16_32(i16) -> i32 {
 block0(v0: i16):
@@ -143,14 +98,9 @@ block0(v0: i16):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   sxth w0, w0
-;   Inst 1:   ret
-; }}
+; block0:
+;   sxth w0, w0
+;   ret
 
 function %f_u_32_64(i32) -> i64 {
 block0(v0: i32):
@@ -158,14 +108,9 @@ block0(v0: i32):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   mov w0, w0
-;   Inst 1:   ret
-; }}
+; block0:
+;   mov w0, w0
+;   ret
 
 function %f_s_32_64(i32) -> i64 {
 block0(v0: i32):
@@ -173,12 +118,7 @@ block0(v0: i32):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 2)
-;   Inst 0:   sxtw x0, w0
-;   Inst 1:   ret
-; }}
+; block0:
+;   sxtw x0, w0
+;   ret
 
diff --git a/cranelift/filetests/filetests/isa/s390x/arithmetic.clif b/cranelift/filetests/filetests/isa/s390x/arithmetic.clif
index 98883f6ae7..945e251371 100644
--- a/cranelift/filetests/filetests/isa/s390x/arithmetic.clif
+++ b/cranelift/filetests/filetests/isa/s390x/arithmetic.clif
@@ -1,4 +1,4 @@
-test compile
+test compile precise-output
 target s390x
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -11,8 +11,9 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  agr %r2, %r3
-; nextln: br %r14
+; block0:
+;   agr %r2, %r3
+;   br %r14
 
 function %iadd_i64_ext32(i64, i32) -> i64 {
 block0(v0: i64, v1: i32):
@@ -21,8 +22,9 @@ block0(v0: i64, v1: i32):
   return v3
 }
 
-; check:  agfr %r2, %r3
-; nextln: br %r14
+; block0:
+;   agfr %r2, %r3
+;   br %r14
 
 function %iadd_i64_imm16(i64) -> i64 {
 block0(v0: i64):
@@ -31,8 +33,9 @@ block0(v0: i64):
   return v2
 }
 
-; check:  aghi %r2, 1
-; nextln: br %r14
+; block0:
+;   aghi %r2, 1
+;   br %r14
 
 function %iadd_i64_imm32(i64) -> i64 {
 block0(v0: i64):
@@ -41,8 +44,9 @@ block0(v0: i64):
   return v2
 }
 
-; check:  agfi %r2, 32768
-; nextln: br %r14
+; block0:
+;   agfi %r2, 32768
+;   br %r14
 
 function %iadd_i64_mem(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -51,8 +55,9 @@ block0(v0: i64, v1: i64):
   return v3
 }
 
-; check:  ag %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   ag %r2, 0(%r3)
+;   br %r14
 
 function %iadd_i64_mem_ext16(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -61,8 +66,9 @@ block0(v0: i64, v1: i64):
   return v3
 }
 
-; check:  agh %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   agh %r2, 0(%r3)
+;   br %r14
 
 function %iadd_i64_mem_ext32(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -71,8 +77,9 @@ block0(v0: i64, v1: i64):
   return v3
 }
 
-; check:  agf %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   agf %r2, 0(%r3)
+;   br %r14
 
 function %iadd_i32(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
@@ -80,8 +87,9 @@ block0(v0: i32, v1: i32):
   return v2
 }
 
-; check:  ar %r2, %r3
-; nextln: br %r14
+; block0:
+;   ar %r2, %r3
+;   br %r14
 
 function %iadd_i32_imm16(i32) -> i32 {
 block0(v0: i32):
@@ -90,8 +98,9 @@ block0(v0: i32):
   return v2
 }
 
-; check:  ahi %r2, 1
-; nextln: br %r14
+; block0:
+;   ahi %r2, 1
+;   br %r14
 
 function %iadd_i32_imm(i32) -> i32 {
 block0(v0: i32):
@@ -100,8 +109,9 @@ block0(v0: i32):
   return v2
 }
 
-; check:  afi %r2, 32768
-; nextln: br %r14
+; block0:
+;   afi %r2, 32768
+;   br %r14
 
 function %iadd_i32_mem(i32, i64) -> i32 {
 block0(v0: i32, v1: i64):
@@ -110,8 +120,9 @@ block0(v0: i32, v1: i64):
   return v3
 }
 
-; check:  a %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   a %r2, 0(%r3)
+;   br %r14
 
 function %iadd_i32_memoff(i32, i64) -> i32 {
 block0(v0: i32, v1: i64):
@@ -120,8 +131,9 @@ block0(v0: i32, v1: i64):
   return v3
 }
 
-; check:  ay %r2, 4096(%r3)
-; nextln: br %r14
+; block0:
+;   ay %r2, 4096(%r3)
+;   br %r14
 
 function %iadd_i32_mem_ext16(i32, i64) -> i32 {
 block0(v0: i32, v1: i64):
@@ -130,8 +142,9 @@ block0(v0: i32, v1: i64):
   return v3
 }
 
-; check:  ah %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   ah %r2, 0(%r3)
+;   br %r14
 
 function %iadd_i32_memoff_ext16(i32, i64) -> i32 {
 block0(v0: i32, v1: i64):
@@ -140,8 +153,9 @@ block0(v0: i32, v1: i64):
   return v3
 }
 
-; check:  ahy %r2, 4096(%r3)
-; nextln: br %r14
+; block0:
+;   ahy %r2, 4096(%r3)
+;   br %r14
 
 function %iadd_i16(i16, i16) -> i16 {
 block0(v0: i16, v1: i16):
@@ -149,8 +163,9 @@ block0(v0: i16, v1: i16):
   return v2
 }
 
-; check:  ar %r2, %r3
-; nextln: br %r14
+; block0:
+;   ar %r2, %r3
+;   br %r14
 
 function %iadd_i16_imm(i16) -> i16 {
 block0(v0: i16):
@@ -159,8 +174,9 @@ block0(v0: i16):
   return v2
 }
 
-; check:  ahi %r2, 1
-; nextln: br %r14
+; block0:
+;   ahi %r2, 1
+;   br %r14
 
 function %iadd_i16_mem(i16, i64) -> i16 {
 block0(v0: i16, v1: i64):
@@ -169,8 +185,9 @@ block0(v0: i16, v1: i64):
   return v3
 }
 
-; check:  ah %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   ah %r2, 0(%r3)
+;   br %r14
 
 function %iadd_i8(i8, i8) -> i8 {
 block0(v0: i8, v1: i8):
@@ -178,8 +195,9 @@ block0(v0: i8, v1: i8):
   return v2
 }
 
-; check:  ar %r2, %r3
-; nextln: br %r14
+; block0:
+;   ar %r2, %r3
+;   br %r14
 
 function %iadd_i8_imm(i8) -> i8 {
 block0(v0: i8):
@@ -188,8 +206,9 @@ block0(v0: i8):
   return v2
 }
 
-; check:  ahi %r2, 1
-; nextln: br %r14
+; block0:
+;   ahi %r2, 1
+;   br %r14
 
 function %iadd_i8_mem(i8, i64) -> i8 {
 block0(v0: i8, v1: i64):
@@ -198,13 +217,10 @@ block0(v0: i8, v1: i64):
   return v3
 }
 
-; check:  llc %r3, 0(%r3)
-; nextln: ar %r2, %r3
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; IADD_IFCOUT
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   llc %r4, 0(%r3)
+;   ar %r2, %r4
+;   br %r14
 
 function %iadd_i64(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -212,8 +228,9 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  algr %r2, %r3
-; nextln: br %r14
+; block0:
+;   algr %r2, %r3
+;   br %r14
 
 function %iadd_i64_ext32(i64, i32) -> i64 {
 block0(v0: i64, v1: i32):
@@ -222,8 +239,9 @@ block0(v0: i64, v1: i32):
   return v3
 }
 
-; check:  algfr %r2, %r3
-; nextln: br %r14
+; block0:
+;   algfr %r2, %r3
+;   br %r14
 
 function %iadd_i64_imm32(i64) -> i64 {
 block0(v0: i64):
@@ -232,8 +250,9 @@ block0(v0: i64):
   return v2
 }
 
-; check:  algfi %r2, 32768
-; nextln: br %r14
+; block0:
+;   algfi %r2, 32768
+;   br %r14
 
 function %iadd_i64_mem(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -242,8 +261,9 @@ block0(v0: i64, v1: i64):
   return v3
 }
 
-; check:  alg %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   alg %r2, 0(%r3)
+;   br %r14
 
 function %iadd_i64_mem_ext32(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -252,8 +272,9 @@ block0(v0: i64, v1: i64):
   return v3
 }
 
-; check:  algf %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   algf %r2, 0(%r3)
+;   br %r14
 
 function %iadd_i32(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
@@ -261,8 +282,9 @@ block0(v0: i32, v1: i32):
   return v2
 }
 
-; check:  alr %r2, %r3
-; nextln: br %r14
+; block0:
+;   alr %r2, %r3
+;   br %r14
 
 function %iadd_i32_imm(i32) -> i32 {
 block0(v0: i32):
@@ -271,8 +293,9 @@ block0(v0: i32):
   return v2
 }
 
-; check:  alfi %r2, 32768
-; nextln: br %r14
+; block0:
+;   alfi %r2, 32768
+;   br %r14
 
 function %iadd_i32_mem(i32, i64) -> i32 {
 block0(v0: i32, v1: i64):
@@ -281,8 +304,9 @@ block0(v0: i32, v1: i64):
   return v3
 }
 
-; check:  al %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   al %r2, 0(%r3)
+;   br %r14
 
 function %iadd_i32_memoff(i32, i64) -> i32 {
 block0(v0: i32, v1: i64):
@@ -291,12 +315,9 @@ block0(v0: i32, v1: i64):
   return v3
 }
 
-; check:  aly %r2, 4096(%r3)
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; ISUB
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   aly %r2, 4096(%r3)
+;   br %r14
 
 function %isub_i64(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -304,8 +325,9 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  sgr %r2, %r3
-; nextln: br %r14
+; block0:
+;   sgr %r2, %r3
+;   br %r14
 
 function %isub_i64_ext32(i64, i32) -> i64 {
 block0(v0: i64, v1: i32):
@@ -314,8 +336,9 @@ block0(v0: i64, v1: i32):
   return v3
 }
 
-; check:  sgfr %r2, %r3
-; nextln: br %r14
+; block0:
+;   sgfr %r2, %r3
+;   br %r14
 
 function %isub_i64_imm16(i64) -> i64 {
 block0(v0: i64):
@@ -324,8 +347,9 @@ block0(v0: i64):
   return v2
 }
 
-; check:  aghi %r2, -1
-; nextln: br %r14
+; block0:
+;   aghi %r2, -1
+;   br %r14
 
 function %isub_i64_imm32(i64) -> i64 {
 block0(v0: i64):
@@ -334,8 +358,9 @@ block0(v0: i64):
   return v2
 }
 
-; check:  agfi %r2, -32769
-; nextln: br %r14
+; block0:
+;   agfi %r2, -32769
+;   br %r14
 
 function %isub_i64_mem(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -344,8 +369,9 @@ block0(v0: i64, v1: i64):
   return v3
 }
 
-; check:  sg %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   sg %r2, 0(%r3)
+;   br %r14
 
 function %isub_i64_mem_ext16(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -354,8 +380,9 @@ block0(v0: i64, v1: i64):
   return v3
 }
 
-; check:  sgh %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   sgh %r2, 0(%r3)
+;   br %r14
 
 function %isub_i64_mem_ext32(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -364,8 +391,9 @@ block0(v0: i64, v1: i64):
   return v3
 }
 
-; check:  sgf %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   sgf %r2, 0(%r3)
+;   br %r14
 
 function %isub_i32(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
@@ -373,8 +401,9 @@ block0(v0: i32, v1: i32):
   return v2
 }
 
-; check:  sr %r2, %r3
-; nextln: br %r14
+; block0:
+;   sr %r2, %r3
+;   br %r14
 
 function %isub_i32_imm16(i32) -> i32 {
 block0(v0: i32):
@@ -383,8 +412,9 @@ block0(v0: i32):
   return v2
 }
 
-; check:  ahi %r2, -1
-; nextln: br %r14
+; block0:
+;   ahi %r2, -1
+;   br %r14
 
 function %isub_i32_imm(i32) -> i32 {
 block0(v0: i32):
@@ -393,8 +423,9 @@ block0(v0: i32):
   return v2
 }
 
-; check:  afi %r2, -32769
-; nextln: br %r14
+; block0:
+;   afi %r2, -32769
+;   br %r14
 
 function %isub_i32_mem(i32, i64) -> i32 {
 block0(v0: i32, v1: i64):
@@ -403,8 +434,9 @@ block0(v0: i32, v1: i64):
   return v3
 }
 
-; check:  s %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   s %r2, 0(%r3)
+;   br %r14
 
 function %isub_i32_memoff(i32, i64) -> i32 {
 block0(v0: i32, v1: i64):
@@ -413,8 +445,9 @@ block0(v0: i32, v1: i64):
   return v3
 }
 
-; check:  sy %r2, 4096(%r3)
-; nextln: br %r14
+; block0:
+;   sy %r2, 4096(%r3)
+;   br %r14
 
 function %isub_i32_mem_ext16(i32, i64) -> i32 {
 block0(v0: i32, v1: i64):
@@ -423,8 +456,9 @@ block0(v0: i32, v1: i64):
   return v3
 }
 
-; check:  sh %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   sh %r2, 0(%r3)
+;   br %r14
 
 function %isub_i32_memoff_ext16(i32, i64) -> i32 {
 block0(v0: i32, v1: i64):
@@ -433,8 +467,9 @@ block0(v0: i32, v1: i64):
   return v3
 }
 
-; check:  shy %r2, 4096(%r3)
-; nextln: br %r14
+; block0:
+;   shy %r2, 4096(%r3)
+;   br %r14
 
 function %isub_i16(i16, i16) -> i16 {
 block0(v0: i16, v1: i16):
@@ -442,8 +477,9 @@ block0(v0: i16, v1: i16):
   return v2
 }
 
-; check:  sr %r2, %r3
-; nextln: br %r14
+; block0:
+;   sr %r2, %r3
+;   br %r14
 
 function %isub_i16_imm(i16) -> i16 {
 block0(v0: i16):
@@ -452,8 +488,9 @@ block0(v0: i16):
   return v2
 }
 
-; check:  ahi %r2, -1
-; nextln: br %r14
+; block0:
+;   ahi %r2, -1
+;   br %r14
 
 function %isub_i16_mem(i16, i64) -> i16 {
 block0(v0: i16, v1: i64):
@@ -462,8 +499,9 @@ block0(v0: i16, v1: i64):
   return v3
 }
 
-; check:  sh %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   sh %r2, 0(%r3)
+;   br %r14
 
 function %isub_i8(i8, i8) -> i8 {
 block0(v0: i8, v1: i8):
@@ -471,8 +509,9 @@ block0(v0: i8, v1: i8):
   return v2
 }
 
-; check:  sr %r2, %r3
-; nextln: br %r14
+; block0:
+;   sr %r2, %r3
+;   br %r14
 
 function %isub_i8_imm(i8) -> i8 {
 block0(v0: i8):
@@ -481,8 +520,9 @@ block0(v0: i8):
   return v2
 }
 
-; check:  ahi %r2, -1
-; nextln: br %r14
+; block0:
+;   ahi %r2, -1
+;   br %r14
 
 function %isub_i8_mem(i8, i64) -> i8 {
 block0(v0: i8, v1: i64):
@@ -491,13 +531,10 @@ block0(v0: i8, v1: i64):
   return v3
 }
 
-; check:  llc %r3, 0(%r3)
-; nextln: sr %r2, %r3
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; IABS
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   llc %r4, 0(%r3)
+;   sr %r2, %r4
+;   br %r14
 
 function %iabs_i64(i64) -> i64 {
 block0(v0: i64):
@@ -505,8 +542,9 @@ block0(v0: i64):
   return v1
 }
 
-; check:  lpgr %r2, %r2
-; nextln: br %r14
+; block0:
+;   lpgr %r2, %r2
+;   br %r14
 
 function %iabs_i64_ext32(i32) -> i64 {
 block0(v0: i32):
@@ -515,8 +553,9 @@ block0(v0: i32):
   return v2
 }
 
-; check:  lpgfr %r2, %r2
-; nextln: br %r14
+; block0:
+;   lpgfr %r2, %r2
+;   br %r14
 
 function %iabs_i32(i32) -> i32 {
 block0(v0: i32):
@@ -524,8 +563,9 @@ block0(v0: i32):
   return v1
 }
 
-; check:  lpr %r2, %r2
-; nextln: br %r14
+; block0:
+;   lpr %r2, %r2
+;   br %r14
 
 function %iabs_i16(i16) -> i16 {
 block0(v0: i16):
@@ -533,9 +573,10 @@ block0(v0: i16):
   return v1
 }
 
-; check:  lhr %r2, %r2
-; nextln: lpr %r2, %r2
-; nextln: br %r14
+; block0:
+;   lhr %r5, %r2
+;   lpr %r2, %r5
+;   br %r14
 
 function %iabs_i8(i8) -> i8 {
 block0(v0: i8):
@@ -543,15 +584,10 @@ block0(v0: i8):
   return v1
 }
 
-; check:  lbr %r2, %r2
-; nextln: lpr %r2, %r2
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; INEG
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-; FIXME: neg-abs combination not yet supported
+; block0:
+;   lbr %r5, %r2
+;   lpr %r2, %r5
+;   br %r14
 
 function %ineg_i64(i64) -> i64 {
 block0(v0: i64):
@@ -559,8 +595,9 @@ block0(v0: i64):
   return v1
 }
 
-; check:  lcgr %r2, %r2
-; nextln: br %r14
+; block0:
+;   lcgr %r2, %r2
+;   br %r14
 
 function %ineg_i64_ext32(i32) -> i64 {
 block0(v0: i32):
@@ -569,8 +606,9 @@ block0(v0: i32):
   return v2
 }
 
-; check:  lcgfr %r2, %r2
-; nextln: br %r14
+; block0:
+;   lcgfr %r2, %r2
+;   br %r14
 
 function %ineg_i32(i32) -> i32 {
 block0(v0: i32):
@@ -578,8 +616,9 @@ block0(v0: i32):
   return v1
 }
 
-; check:  lcr %r2, %r2
-; nextln: br %r14
+; block0:
+;   lcr %r2, %r2
+;   br %r14
 
 function %ineg_i16(i16) -> i16 {
 block0(v0: i16):
@@ -587,8 +626,9 @@ block0(v0: i16):
   return v1
 }
 
-; check:  lcr %r2, %r2
-; nextln: br %r14
+; block0:
+;   lcr %r2, %r2
+;   br %r14
 
 function %ineg_i8(i8) -> i8 {
 block0(v0: i8):
@@ -596,12 +636,9 @@ block0(v0: i8):
   return v1
 }
 
-; check:  lcr %r2, %r2
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; IMUL
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   lcr %r2, %r2
+;   br %r14
 
 function %imul_i64(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -609,8 +646,9 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  msgr %r2, %r3
-; nextln: br %r14
+; block0:
+;   msgr %r2, %r3
+;   br %r14
 
 function %imul_i64_imm16(i64) -> i64 {
 block0(v0: i64):
@@ -619,8 +657,9 @@ block0(v0: i64):
   return v2
 }
 
-; check:  mghi %r2, 3
-; nextln: br %r14
+; block0:
+;   mghi %r2, 3
+;   br %r14
 
 function %imul_i64_imm32(i64) -> i64 {
 block0(v0: i64):
@@ -629,8 +668,9 @@ block0(v0: i64):
   return v2
 }
 
-; check:  msgfi %r2, 32769
-; nextln: br %r14
+; block0:
+;   msgfi %r2, 32769
+;   br %r14
 
 function %imul_i64_mem(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -639,8 +679,9 @@ block0(v0: i64, v1: i64):
   return v3
 }
 
-; check:  msg %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   msg %r2, 0(%r3)
+;   br %r14
 
 function %imul_i64_mem_ext16(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -649,8 +690,9 @@ block0(v0: i64, v1: i64):
   return v3
 }
 
-; check:  mgh %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   mgh %r2, 0(%r3)
+;   br %r14
 
 function %imul_i64_mem_ext32(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -659,8 +701,9 @@ block0(v0: i64, v1: i64):
   return v3
 }
 
-; check:  msgf %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   msgf %r2, 0(%r3)
+;   br %r14
 
 function %imul_i32(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
@@ -668,8 +711,9 @@ block0(v0: i32, v1: i32):
   return v2
 }
 
-; check:  msr %r2, %r3
-; nextln: br %r14
+; block0:
+;   msr %r2, %r3
+;   br %r14
 
 function %imul_i32_imm16(i32) -> i32 {
 block0(v0: i32):
@@ -678,8 +722,9 @@ block0(v0: i32):
   return v2
 }
 
-; check:  mhi %r2, 3
-; nextln: br %r14
+; block0:
+;   mhi %r2, 3
+;   br %r14
 
 function %imul_i32_imm32(i32) -> i32 {
 block0(v0: i32):
@@ -688,8 +733,9 @@ block0(v0: i32):
   return v2
 }
 
-; check:  msfi %r2, 32769
-; nextln: br %r14
+; block0:
+;   msfi %r2, 32769
+;   br %r14
 
 function %imul_i32_mem(i32, i64) -> i32 {
 block0(v0: i32, v1: i64):
@@ -698,8 +744,9 @@ block0(v0: i32, v1: i64):
   return v3
 }
 
-; check:  ms %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   ms %r2, 0(%r3)
+;   br %r14
 
 function %imul_i32_memoff(i32, i64) -> i32 {
 block0(v0: i32, v1: i64):
@@ -708,8 +755,9 @@ block0(v0: i32, v1: i64):
   return v3
 }
 
-; check:  msy %r2, 4096(%r3)
-; nextln: br %r14
+; block0:
+;   msy %r2, 4096(%r3)
+;   br %r14
 
 function %imul_i32_mem_ext16(i32, i64) -> i32 {
 block0(v0: i32, v1: i64):
@@ -718,8 +766,9 @@ block0(v0: i32, v1: i64):
   return v3
 }
 
-; check:  mh %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   mh %r2, 0(%r3)
+;   br %r14
 
 function %imul_i32_memoff_ext16(i32, i64) -> i32 {
 block0(v0: i32, v1: i64):
@@ -728,8 +777,9 @@ block0(v0: i32, v1: i64):
   return v3
 }
 
-; check:  mhy %r2, 4096(%r3)
-; nextln: br %r14
+; block0:
+;   mhy %r2, 4096(%r3)
+;   br %r14
 
 function %imul_i16(i16, i16) -> i16 {
 block0(v0: i16, v1: i16):
@@ -737,8 +787,9 @@ block0(v0: i16, v1: i16):
   return v2
 }
 
-; check:  msr %r2, %r3
-; nextln: br %r14
+; block0:
+;   msr %r2, %r3
+;   br %r14
 
 function %imul_i16_imm(i16) -> i16 {
 block0(v0: i16):
@@ -747,8 +798,9 @@ block0(v0: i16):
   return v2
 }
 
-; check:  mhi %r2, 3
-; nextln: br %r14
+; block0:
+;   mhi %r2, 3
+;   br %r14
 
 function %imul_i16_mem(i16, i64) -> i16 {
 block0(v0: i16, v1: i64):
@@ -757,8 +809,9 @@ block0(v0: i16, v1: i64):
   return v3
 }
 
-; check:  mh %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   mh %r2, 0(%r3)
+;   br %r14
 
 function %imul_i8(i8, i8) -> i8 {
 block0(v0: i8, v1: i8):
@@ -766,8 +819,9 @@ block0(v0: i8, v1: i8):
   return v2
 }
 
-; check:  msr %r2, %r3
-; nextln: br %r14
+; block0:
+;   msr %r2, %r3
+;   br %r14
 
 function %imul_i8_imm(i8) -> i8 {
 block0(v0: i8):
@@ -776,8 +830,9 @@ block0(v0: i8):
   return v2
 }
 
-; check:  mhi %r2, 3
-; nextln: br %r14
+; block0:
+;   mhi %r2, 3
+;   br %r14
 
 function %imul_i8_mem(i8, i64) -> i8 {
 block0(v0: i8, v1: i64):
@@ -786,13 +841,10 @@ block0(v0: i8, v1: i64):
   return v3
 }
 
-; check:  llc %r3, 0(%r3)
-; nextln: msr %r2, %r3
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; UMULHI
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   llc %r4, 0(%r3)
+;   msr %r2, %r4
+;   br %r14
 
 function %umulhi_i64(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -800,10 +852,11 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  lgr %r1, %r3
-; nextln: mlgr %r0, %r2
-; nextln: lgr %r2, %r0
-; nextln: br %r14
+; block0:
+;   lgr %r1, %r3
+;   mlgr %r0, %r2
+;   lgr %r2, %r0
+;   br %r14
 
 function %umulhi_i32(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
@@ -811,11 +864,14 @@ block0(v0: i32, v1: i32):
   return v2
 }
 
-; check:  llgfr %r2, %r2
-; nextln: llgfr %r3, %r3
-; nextln: msgr %r2, %r3
-; nextln: srlg %r2, %r2, 32
-; nextln: br %r14
+; block0:
+;   lgr %r4, %r3
+;   llgfr %r3, %r2
+;   lgr %r2, %r4
+;   llgfr %r5, %r2
+;   msgr %r3, %r5
+;   srlg %r2, %r3, 32
+;   br %r14
 
 function %umulhi_i16(i16, i16) -> i16 {
 block0(v0: i16, v1: i16):
@@ -823,11 +879,14 @@ block0(v0: i16, v1: i16):
   return v2
 }
 
-; check:  llhr %r2, %r2
-; nextln: llhr %r3, %r3
-; nextln: msr %r2, %r3
-; nextln: srlk %r2, %r2, 16
-; nextln: br %r14
+; block0:
+;   lgr %r4, %r3
+;   llhr %r3, %r2
+;   lgr %r2, %r4
+;   llhr %r5, %r2
+;   msr %r3, %r5
+;   srlk %r2, %r3, 16
+;   br %r14
 
 function %umulhi_i8(i8, i8) -> i8 {
 block0(v0: i8, v1: i8):
@@ -835,15 +894,14 @@ block0(v0: i8, v1: i8):
   return v2
 }
 
-; check:  llcr %r2, %r2
-; nextln: llcr %r3, %r3
-; nextln: msr %r2, %r3
-; nextln: srlk %r2, %r2, 8
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; SMULHI
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   lgr %r4, %r3
+;   llcr %r3, %r2
+;   lgr %r2, %r4
+;   llcr %r5, %r2
+;   msr %r3, %r5
+;   srlk %r2, %r3, 8
+;   br %r14
 
 function %smulhi_i64(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -851,9 +909,10 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  mgrk %r0, %r2, %r3
-; nextln: lgr %r2, %r0
-; nextln: br %r14
+; block0:
+;   mgrk %r0, %r2, %r3
+;   lgr %r2, %r0
+;   br %r14
 
 function %smulhi_i32(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
@@ -861,11 +920,14 @@ block0(v0: i32, v1: i32):
   return v2
 }
 
-; check:  lgfr %r2, %r2
-; nextln: lgfr %r3, %r3
-; nextln: msgr %r2, %r3
-; nextln: srag %r2, %r2, 32
-; nextln: br %r14
+; block0:
+;   lgr %r4, %r3
+;   lgfr %r3, %r2
+;   lgr %r2, %r4
+;   lgfr %r5, %r2
+;   msgr %r3, %r5
+;   srag %r2, %r3, 32
+;   br %r14
 
 function %smulhi_i16(i16, i16) -> i16 {
 block0(v0: i16, v1: i16):
@@ -873,11 +935,14 @@ block0(v0: i16, v1: i16):
   return v2
 }
 
-; check:  lhr %r2, %r2
-; nextln: lhr %r3, %r3
-; nextln: msr %r2, %r3
-; nextln: srak %r2, %r2, 16
-; nextln: br %r14
+; block0:
+;   lgr %r4, %r3
+;   lhr %r3, %r2
+;   lgr %r2, %r4
+;   lhr %r5, %r2
+;   msr %r3, %r5
+;   srak %r2, %r3, 16
+;   br %r14
 
 function %smulhi_i8(i8, i8) -> i8 {
 block0(v0: i8, v1: i8):
@@ -885,15 +950,14 @@ block0(v0: i8, v1: i8):
   return v2
 }
 
-; check:  lbr %r2, %r2
-; nextln: lbr %r3, %r3
-; nextln: msr %r2, %r3
-; nextln: srak %r2, %r2, 8
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; SDIV
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   lgr %r4, %r3
+;   lbr %r3, %r2
+;   lgr %r2, %r4
+;   lbr %r5, %r2
+;   msr %r3, %r5
+;   srak %r2, %r3, 8
+;   br %r14
 
 function %sdiv_i64(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -901,15 +965,16 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  lgr %r1, %r2
-; nextln: llihf %r2, 2147483647
-; nextln: iilf %r2, 4294967295
-; nextln: xgr %r2, %r1
-; nextln: ngr %r2, %r3
-; nextln: cgite %r2, -1
-; nextln: dsgr %r0, %r3
-; nextln: lgr %r2, %r1
-; nextln: br %r14
+; block0:
+;   lgr %r1, %r2
+;   llihf %r4, 2147483647
+;   iilf %r4, 4294967295
+;   xgr %r4, %r1
+;   ngrk %r5, %r4, %r3
+;   cgite %r5, -1
+;   dsgr %r0, %r3
+;   lgr %r2, %r1
+;   br %r14
 
 function %sdiv_i64_imm(i64) -> i64 {
 block0(v0: i64):
@@ -918,11 +983,12 @@ block0(v0: i64):
   return v2
 }
 
-; check:  lgr %r1, %r2
-; nextln: lghi %r2, 2
-; nextln: dsgr %r0, %r2
-; nextln: lgr %r2, %r1
-; nextln: br %r14
+; block0:
+;   lgr %r1, %r2
+;   lghi %r2, 2
+;   dsgr %r0, %r2
+;   lgr %r2, %r1
+;   br %r14
 
 function %sdiv_i32(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
@@ -930,14 +996,15 @@ block0(v0: i32, v1: i32):
   return v2
 }
 
-; check:  lgfr %r1, %r2
-; nextln: iilf %r2, 2147483647
-; nextln: xr %r2, %r1
-; nextln: nr %r2, %r3
-; nextln: cite %r2, -1
-; nextln: dsgfr %r0, %r3
-; nextln: lr %r2, %r1
-; nextln: br %r14
+; block0:
+;   lgfr %r1, %r2
+;   iilf %r4, 2147483647
+;   xrk %r2, %r4, %r1
+;   nrk %r4, %r2, %r3
+;   cite %r4, -1
+;   dsgfr %r0, %r3
+;   lgr %r2, %r1
+;   br %r14
 
 function %sdiv_i32_imm(i32) -> i32 {
 block0(v0: i32):
@@ -946,11 +1013,12 @@ block0(v0: i32):
   return v2
 }
 
-; check:  lgfr %r1, %r2
-; nextln: lhi %r2, 2
-; nextln: dsgfr %r0, %r2
-; nextln: lr %r2, %r1
-; nextln: br %r14
+; block0:
+;   lgfr %r1, %r2
+;   lhi %r2, 2
+;   dsgfr %r0, %r2
+;   lgr %r2, %r1
+;   br %r14
 
 function %sdiv_i16(i16, i16) -> i16 {
 block0(v0: i16, v1: i16):
@@ -958,15 +1026,16 @@ block0(v0: i16, v1: i16):
   return v2
 }
 
-; check:  lghr %r1, %r2
-; nextln: lhr %r2, %r3
-; nextln: lhi %r3, 32767
-; nextln: xr %r3, %r1
-; nextln: nr %r3, %r2
-; nextln: cite %r3, -1
-; nextln: dsgfr %r0, %r2
-; nextln: lr %r2, %r1
-; nextln: br %r14
+; block0:
+;   lghr %r1, %r2
+;   lhr %r4, %r3
+;   lhi %r2, 32767
+;   xrk %r5, %r2, %r1
+;   nrk %r2, %r5, %r4
+;   cite %r2, -1
+;   dsgfr %r0, %r4
+;   lgr %r2, %r1
+;   br %r14
 
 function %sdiv_i16_imm(i16) -> i16 {
 block0(v0: i16):
@@ -975,11 +1044,12 @@ block0(v0: i16):
   return v2
 }
 
-; check:  lghr %r1, %r2
-; nextln: lhi %r2, 2
-; nextln: dsgfr %r0, %r2
-; nextln: lr %r2, %r1
-; nextln: br %r14
+; block0:
+;   lghr %r1, %r2
+;   lhi %r2, 2
+;   dsgfr %r0, %r2
+;   lgr %r2, %r1
+;   br %r14
 
 function %sdiv_i8(i8, i8) -> i8 {
 block0(v0: i8, v1: i8):
@@ -987,15 +1057,16 @@ block0(v0: i8, v1: i8):
   return v2
 }
 
-; check:  lgbr %r1, %r2
-; nextln: lbr %r2, %r3
-; nextln: lhi %r3, 127
-; nextln: xr %r3, %r1
-; nextln: nr %r3, %r2
-; nextln: cite %r3, -1
-; nextln: dsgfr %r0, %r2
-; nextln: lr %r2, %r1
-; nextln: br %r14
+; block0:
+;   lgbr %r1, %r2
+;   lbr %r4, %r3
+;   lhi %r2, 127
+;   xrk %r5, %r2, %r1
+;   nrk %r2, %r5, %r4
+;   cite %r2, -1
+;   dsgfr %r0, %r4
+;   lgr %r2, %r1
+;   br %r14
 
 function %sdiv_i8_imm(i8) -> i8 {
 block0(v0: i8):
@@ -1004,15 +1075,12 @@ block0(v0: i8):
   return v2
 }
 
-; check:  lgbr %r1, %r2
-; nextln: lhi %r2, 2
-; nextln: dsgfr %r0, %r2
-; nextln: lr %r2, %r1
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; UDIV
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   lgbr %r1, %r2
+;   lhi %r2, 2
+;   dsgfr %r0, %r2
+;   lgr %r2, %r1
+;   br %r14
 
 function %udiv_i64(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -1020,11 +1088,12 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  lghi %r0, 0
-; nextln: lgr %r1, %r2
-; nextln: dlgr %r0, %r3
-; nextln: lgr %r2, %r1
-; nextln: br %r14
+; block0:
+;   lghi %r0, 0
+;   lgr %r1, %r2
+;   dlgr %r0, %r3
+;   lgr %r2, %r1
+;   br %r14
 
 function %udiv_i64_imm(i64) -> i64 {
 block0(v0: i64):
@@ -1033,12 +1102,13 @@ block0(v0: i64):
   return v2
 }
 
-; check:  lghi %r0, 0
-; nextln: lgr %r1, %r2
-; nextln: lghi %r2, 2
-; nextln: dlgr %r0, %r2
-; nextln: lgr %r2, %r1
-; nextln: br %r14
+; block0:
+;   lghi %r0, 0
+;   lgr %r1, %r2
+;   lghi %r3, 2
+;   dlgr %r0, %r3
+;   lgr %r2, %r1
+;   br %r14
 
 function %udiv_i32(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
@@ -1046,11 +1116,12 @@ block0(v0: i32, v1: i32):
   return v2
 }
 
-; check:  lhi %r0, 0
-; nextln: lr %r1, %r2
-; nextln: dlr %r0, %r3
-; nextln: lr %r2, %r1
-; nextln: br %r14
+; block0:
+;   lhi %r0, 0
+;   lgr %r1, %r2
+;   dlr %r0, %r3
+;   lgr %r2, %r1
+;   br %r14
 
 function %udiv_i32_imm(i32) -> i32 {
 block0(v0: i32):
@@ -1059,12 +1130,13 @@ block0(v0: i32):
   return v2
 }
 
-; check:  lhi %r0, 0
-; nextln: lr %r1, %r2
-; nextln: lhi %r2, 2
-; nextln: dlr %r0, %r2
-; nextln: lr %r2, %r1
-; nextln: br %r14
+; block0:
+;   lhi %r0, 0
+;   lgr %r1, %r2
+;   lhi %r3, 2
+;   dlr %r0, %r3
+;   lgr %r2, %r1
+;   br %r14
 
 function %udiv_i16(i16, i16) -> i16 {
 block0(v0: i16, v1: i16):
@@ -1072,12 +1144,13 @@ block0(v0: i16, v1: i16):
   return v2
 }
 
-; check:  lhi %r0, 0
-; nextln: llhr %r1, %r2
-; nextln: llhr %r2, %r3
-; nextln: dlr %r0, %r2
-; nextln: lr %r2, %r1
-; nextln: br %r14
+; block0:
+;   lhi %r0, 0
+;   llhr %r1, %r2
+;   llhr %r5, %r3
+;   dlr %r0, %r5
+;   lgr %r2, %r1
+;   br %r14
 
 function %udiv_i16_imm(i16) -> i16 {
 block0(v0: i16):
@@ -1086,12 +1159,13 @@ block0(v0: i16):
   return v2
 }
 
-; check:  lhi %r0, 0
-; nextln: llhr %r1, %r2
-; nextln: lhi %r2, 2
-; nextln: dlr %r0, %r2
-; nextln: lr %r2, %r1
-; nextln: br %r14
+; block0:
+;   lhi %r0, 0
+;   llhr %r1, %r2
+;   lhi %r3, 2
+;   dlr %r0, %r3
+;   lgr %r2, %r1
+;   br %r14
 
 function %udiv_i8(i8, i8) -> i8 {
 block0(v0: i8, v1: i8):
@@ -1099,12 +1173,13 @@ block0(v0: i8, v1: i8):
   return v2
 }
 
-; check:  lhi %r0, 0
-; nextln: llcr %r1, %r2
-; nextln: llcr %r2, %r3
-; nextln: dlr %r0, %r2
-; nextln: lr %r2, %r1
-; nextln: br %r14
+; block0:
+;   lhi %r0, 0
+;   llcr %r1, %r2
+;   llcr %r5, %r3
+;   dlr %r0, %r5
+;   lgr %r2, %r1
+;   br %r14
 
 function %udiv_i8_imm(i8) -> i8 {
 block0(v0: i8):
@@ -1113,16 +1188,13 @@ block0(v0: i8):
   return v2
 }
 
-; check:  lhi %r0, 0
-; nextln: llcr %r1, %r2
-; nextln: lhi %r2, 2
-; nextln: dlr %r0, %r2
-; nextln: lr %r2, %r1
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; SREM
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   lhi %r0, 0
+;   llcr %r1, %r2
+;   lhi %r3, 2
+;   dlr %r0, %r3
+;   lgr %r2, %r1
+;   br %r14
 
 function %srem_i64(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -1130,12 +1202,13 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  lgr %r1, %r2
-; nextln: cghi %r3, -1
-; nextln: locghie %r1, 0
-; nextln: dsgr %r0, %r3
-; nextln: lgr %r2, %r0
-; nextln: br %r14
+; block0:
+;   lgr %r1, %r2
+;   cghi %r3, -1
+;   locghie %r1, 0
+;   dsgr %r0, %r3
+;   lgr %r2, %r0
+;   br %r14
 
 function %srem_i32(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
@@ -1143,10 +1216,11 @@ block0(v0: i32, v1: i32):
   return v2
 }
 
-; check:  lgfr %r1, %r2
-; nextln: dsgfr %r0, %r3
-; nextln: lr %r2, %r0
-; nextln: br %r14
+; block0:
+;   lgfr %r1, %r2
+;   dsgfr %r0, %r3
+;   lgr %r2, %r0
+;   br %r14
 
 function %srem_i16(i16, i16) -> i16 {
 block0(v0: i16, v1: i16):
@@ -1154,11 +1228,12 @@ block0(v0: i16, v1: i16):
   return v2
 }
 
-; check:  lghr %r1, %r2
-; nextln: lhr %r2, %r3
-; nextln: dsgfr %r0, %r2
-; nextln: lr %r2, %r0
-; nextln: br %r14
+; block0:
+;   lghr %r1, %r2
+;   lhr %r4, %r3
+;   dsgfr %r0, %r4
+;   lgr %r2, %r0
+;   br %r14
 
 function %srem_i8(i8, i8) -> i8 {
 block0(v0: i8, v1: i8):
@@ -1166,15 +1241,12 @@ block0(v0: i8, v1: i8):
   return v2
 }
 
-; check:  lgbr %r1, %r2
-; nextln: lbr %r2, %r3
-; nextln: dsgfr %r0, %r2
-; nextln: lr %r2, %r0
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; UREM
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   lgbr %r1, %r2
+;   lbr %r4, %r3
+;   dsgfr %r0, %r4
+;   lgr %r2, %r0
+;   br %r14
 
 function %urem_i64(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -1182,11 +1254,12 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  lghi %r0, 0
-; nextln: lgr %r1, %r2
-; nextln: dlgr %r0, %r3
-; nextln: lgr %r2, %r0
-; nextln: br %r14
+; block0:
+;   lghi %r0, 0
+;   lgr %r1, %r2
+;   dlgr %r0, %r3
+;   lgr %r2, %r0
+;   br %r14
 
 function %urem_i32(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
@@ -1194,11 +1267,12 @@ block0(v0: i32, v1: i32):
   return v2
 }
 
-; check:  lhi %r0, 0
-; nextln: lr %r1, %r2
-; nextln: dlr %r0, %r3
-; nextln: lr %r2, %r0
-; nextln: br %r14
+; block0:
+;   lhi %r0, 0
+;   lgr %r1, %r2
+;   dlr %r0, %r3
+;   lgr %r2, %r0
+;   br %r14
 
 function %urem_i16(i16, i16) -> i16 {
 block0(v0: i16, v1: i16):
@@ -1206,12 +1280,13 @@ block0(v0: i16, v1: i16):
   return v2
 }
 
-; check:  lhi %r0, 0
-; check:  llhr %r1, %r2
-; nextln: llhr %r2, %r3
-; nextln: dlr %r0, %r2
-; nextln: lr %r2, %r0
-; nextln: br %r14
+; block0:
+;   lhi %r0, 0
+;   llhr %r1, %r2
+;   llhr %r5, %r3
+;   dlr %r0, %r5
+;   lgr %r2, %r0
+;   br %r14
 
 function %urem_i8(i8, i8) -> i8 {
 block0(v0: i8, v1: i8):
@@ -1219,10 +1294,11 @@ block0(v0: i8, v1: i8):
   return v2
 }
 
-; check:  lhi %r0, 0
-; check:  llcr %r1, %r2
-; nextln: llcr %r2, %r3
-; nextln: dlr %r0, %r2
-; nextln: lr %r2, %r0
-; nextln: br %r14
+; block0:
+;   lhi %r0, 0
+;   llcr %r1, %r2
+;   llcr %r5, %r3
+;   dlr %r0, %r5
+;   lgr %r2, %r0
+;   br %r14
 
diff --git a/cranelift/filetests/filetests/isa/s390x/atomic_cas-little.clif b/cranelift/filetests/filetests/isa/s390x/atomic_cas-little.clif
index 53eed240c6..624f0d1849 100644
--- a/cranelift/filetests/filetests/isa/s390x/atomic_cas-little.clif
+++ b/cranelift/filetests/filetests/isa/s390x/atomic_cas-little.clif
@@ -1,4 +1,4 @@
-test compile
+test compile precise-output
 target s390x
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -11,11 +11,12 @@ block0(v0: i64, v1: i64, v2: i64):
   return v3
 }
 
-; check:  lrvgr %r2, %r2
-; nextln: lrvgr %r3, %r3
-; nextln: csg %r2, %r3, 0(%r4)
-; nextln: lrvgr %r2, %r2
-; nextln: br %r14
+; block0:
+;   lrvgr %r5, %r2
+;   lrvgr %r3, %r3
+;   csg %r5, %r3, 0(%r4)
+;   lrvgr %r2, %r5
+;   br %r14
 
 function %atomic_cas_i32(i32, i32, i64) -> i32 {
 block0(v0: i32, v1: i32, v2: i64):
@@ -23,11 +24,12 @@ block0(v0: i32, v1: i32, v2: i64):
   return v3
 }
 
-; check:  lrvr %r2, %r2
-; nextln: lrvr %r3, %r3
-; nextln: cs %r2, %r3, 0(%r4)
-; nextln: lrvr %r2, %r2
-; nextln: br %r14
+; block0:
+;   lrvr %r5, %r2
+;   lrvr %r3, %r3
+;   cs %r5, %r3, 0(%r4)
+;   lrvr %r2, %r5
+;   br %r14
 
 function %atomic_cas_i16(i64, i16, i16, i64) -> i16 {
 block0(v0: i64, v1: i16, v2: i16, v3: i64):
@@ -35,15 +37,20 @@ block0(v0: i64, v1: i16, v2: i16, v3: i64):
   return v4
 }
 
-; check:  sllk %r2, %r5, 3
-; nextln: nill %r5, 65532
-; nextln: lrvr %r3, %r3
-; nextln: lrvr %r4, %r4
-; nextln: l %r0, 0(%r5)
-; nextln: 0: rll %r1, %r0, 16(%r2) ; rxsbg %r1, %r3, 176, 64, 48 ; jglh 1f ; risbgn %r1, %r4, 48, 64, 48 ; rll %r1, %r1, 16(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1:
-; nextln: rll %r2, %r0, 0(%r2)
-; nextln: lrvr %r2, %r2
-; nextln: br %r14
+;   stmg %r6, %r15, 48(%r15)
+; block0:
+;   lgr %r6, %r3
+;   sllk %r3, %r5, 3
+;   nill %r5, 65532
+;   lgr %r2, %r6
+;   lrvr %r2, %r2
+;   lrvr %r4, %r4
+;   l %r0, 0(%r5)
+;   0: rll %r1, %r0, 16(%r3) ; rxsbg %r1, %r2, 176, 64, 48 ; jglh 1f ; risbgn %r1, %r4, 48, 64, 48 ; rll %r1, %r1, 16(%r3) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1:
+;   rll %r2, %r0, 0(%r3)
+;   lrvr %r2, %r2
+;   lmg %r6, %r15, 48(%r15)
+;   br %r14
 
 function %atomic_cas_i8(i64, i8, i8, i64) -> i8 {
 block0(v0: i64, v1: i8, v2: i8, v3: i64):
@@ -51,13 +58,14 @@ block0(v0: i64, v1: i8, v2: i8, v3: i64):
   return v4
 }
 
-; check:  stmg %r14, %r15, 112(%r15)
-; nextln: sllk %r2, %r5, 3
-; nextln: nill %r5, 65532
-; nextln: lcr %r14, %r2
-; nextln: l %r0, 0(%r5)
-; nextln: 0: rll %r1, %r0, 0(%r2) ; rxsbg %r1, %r3, 160, 40, 24 ; jglh 1f ; risbgn %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r14) ; cs %r0, %r1, 0(%r5) ; jglh 0b
-; nextln: rll %r2, %r0, 8(%r2)
-; nextln: lmg %r14, %r15, 112(%r15)
-; nextln: br %r14
+;   stmg %r12, %r15, 96(%r15)
+; block0:
+;   sllk %r2, %r5, 3
+;   nill %r5, 65532
+;   lcr %r12, %r2
+;   l %r0, 0(%r5)
+;   0: rll %r1, %r0, 0(%r2) ; rxsbg %r1, %r3, 160, 40, 24 ; jglh 1f ; risbgn %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r12) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1:
+;   rll %r2, %r0, 8(%r2)
+;   lmg %r12, %r15, 96(%r15)
+;   br %r14
 
diff --git a/cranelift/filetests/filetests/isa/s390x/atomic_cas.clif b/cranelift/filetests/filetests/isa/s390x/atomic_cas.clif
index 5d9e79786f..a7d94fc46c 100644
--- a/cranelift/filetests/filetests/isa/s390x/atomic_cas.clif
+++ b/cranelift/filetests/filetests/isa/s390x/atomic_cas.clif
@@ -1,4 +1,4 @@
-test compile
+test compile precise-output
 target s390x
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -11,8 +11,9 @@ block0(v0: i64, v1: i64, v2: i64):
   return v3
 }
 
-; check:  csg %r2, %r3, 0(%r4)
-; nextln: br %r14
+; block0:
+;   csg %r2, %r3, 0(%r4)
+;   br %r14
 
 function %atomic_cas_i32(i32, i32, i64) -> i32 {
 block0(v0: i32, v1: i32, v2: i64):
@@ -20,8 +21,9 @@ block0(v0: i32, v1: i32, v2: i64):
   return v3
 }
 
-; check:  cs %r2, %r3, 0(%r4)
-; nextln: br %r14
+; block0:
+;   cs %r2, %r3, 0(%r4)
+;   br %r14
 
 function %atomic_cas_i16(i64, i16, i16, i64) -> i16 {
 block0(v0: i64, v1: i16, v2: i16, v3: i64):
@@ -29,12 +31,13 @@ block0(v0: i64, v1: i16, v2: i16, v3: i64):
   return v4
 }
 
-; check:  sllk %r2, %r5, 3
-; nextln: nill %r5, 65532
-; nextln: l %r0, 0(%r5)
-; nextln: 0: rll %r1, %r0, 0(%r2) ; rxsbg %r1, %r3, 160, 48, 16 ; jglh 1f ; risbgn %r1, %r4, 32, 48, 16 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1:
-; nextln: rll %r2, %r0, 16(%r2)
-; nextln: br %r14
+; block0:
+;   sllk %r2, %r5, 3
+;   nill %r5, 65532
+;   l %r0, 0(%r5)
+;   0: rll %r1, %r0, 0(%r2) ; rxsbg %r1, %r3, 160, 48, 16 ; jglh 1f ; risbgn %r1, %r4, 32, 48, 16 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1:
+;   rll %r2, %r0, 16(%r2)
+;   br %r14
 
 function %atomic_cas_i8(i64, i8, i8, i64) -> i8 {
 block0(v0: i64, v1: i8, v2: i8, v3: i64):
@@ -42,13 +45,14 @@ block0(v0: i64, v1: i8, v2: i8, v3: i64):
   return v4
 }
 
-; check:  stmg %r14, %r15, 112(%r15)
-; nextln: sllk %r2, %r5, 3
-; nextln: nill %r5, 65532
-; nextln: lcr %r14, %r2
-; nextln: l %r0, 0(%r5)
-; nextln: 0: rll %r1, %r0, 0(%r2) ; rxsbg %r1, %r3, 160, 40, 24 ; jglh 1f ; risbgn %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r14) ; cs %r0, %r1, 0(%r5) ; jglh 0b
-; nextln: rll %r2, %r0, 8(%r2)
-; nextln: lmg %r14, %r15, 112(%r15)
-; nextln: br %r14
+;   stmg %r12, %r15, 96(%r15)
+; block0:
+;   sllk %r2, %r5, 3
+;   nill %r5, 65532
+;   lcr %r12, %r2
+;   l %r0, 0(%r5)
+;   0: rll %r1, %r0, 0(%r2) ; rxsbg %r1, %r3, 160, 40, 24 ; jglh 1f ; risbgn %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r12) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1:
+;   rll %r2, %r0, 8(%r2)
+;   lmg %r12, %r15, 96(%r15)
+;   br %r14
 
diff --git a/cranelift/filetests/filetests/isa/s390x/atomic_load-little.clif b/cranelift/filetests/filetests/isa/s390x/atomic_load-little.clif
index 5556176bbb..fa493bcdd0 100644
--- a/cranelift/filetests/filetests/isa/s390x/atomic_load-little.clif
+++ b/cranelift/filetests/filetests/isa/s390x/atomic_load-little.clif
@@ -1,4 +1,4 @@
-test compile
+test compile precise-output
 target s390x
 
 function %atomic_load_i64(i64) -> i64 {
@@ -7,8 +7,9 @@ block0(v0: i64):
   return v1
 }
 
-; check:  lrvg %r2, 0(%r2)
-; nextln: br %r14
+; block0:
+;   lrvg %r2, 0(%r2)
+;   br %r14
 
 function %atomic_load_i64_sym() -> i64 {
   gv0 = symbol colocated %sym
@@ -18,8 +19,9 @@ block0:
   return v1
 }
 
-; check:  larl %r1, %sym + 0 ; lrvg %r2, 0(%r1)
-; nextln: br %r14
+; block0:
+;   larl %r1, %sym + 0 ; lrvg %r2, 0(%r1)
+;   br %r14
 
 function %atomic_load_i32(i64) -> i32 {
 block0(v0: i64):
@@ -27,8 +29,9 @@ block0(v0: i64):
   return v1
 }
 
-; check:  lrv %r2, 0(%r2)
-; nextln: br %r14
+; block0:
+;   lrv %r2, 0(%r2)
+;   br %r14
 
 function %atomic_load_i32_sym() -> i32 {
   gv0 = symbol colocated %sym
@@ -38,8 +41,9 @@ block0:
   return v1
 }
 
-; check:  larl %r1, %sym + 0 ; lrv %r2, 0(%r1)
-; nextln: br %r14
+; block0:
+;   larl %r1, %sym + 0 ; lrv %r2, 0(%r1)
+;   br %r14
 
 function %atomic_load_i16(i64) -> i16 {
 block0(v0: i64):
@@ -47,8 +51,9 @@ block0(v0: i64):
   return v1
 }
 
-; check:  lrvh %r2, 0(%r2)
-; nextln: br %r14
+; block0:
+;   lrvh %r2, 0(%r2)
+;   br %r14
 
 function %atomic_load_i16_sym() -> i16 {
   gv0 = symbol colocated %sym
@@ -58,8 +63,9 @@ block0:
   return v1
 }
 
-; check:  larl %r1, %sym + 0 ; lrvh %r2, 0(%r1)
-; nextln: br %r14
+; block0:
+;   larl %r1, %sym + 0 ; lrvh %r2, 0(%r1)
+;   br %r14
 
 function %atomic_load_i8(i64) -> i8 {
 block0(v0: i64):
@@ -67,6 +73,7 @@ block0(v0: i64):
   return v1
 }
 
-; check:  llc %r2, 0(%r2)
-; nextln: br %r14
+; block0:
+;   llc %r2, 0(%r2)
+;   br %r14
 
diff --git a/cranelift/filetests/filetests/isa/s390x/atomic_load.clif b/cranelift/filetests/filetests/isa/s390x/atomic_load.clif
index b361aaa4c7..673577633b 100644
--- a/cranelift/filetests/filetests/isa/s390x/atomic_load.clif
+++ b/cranelift/filetests/filetests/isa/s390x/atomic_load.clif
@@ -1,4 +1,4 @@
-test compile
+test compile precise-output
 target s390x
 
 function %atomic_load_i64(i64) -> i64 {
@@ -7,8 +7,9 @@ block0(v0: i64):
   return v1
 }
 
-; check:  lg %r2, 0(%r2)
-; nextln: br %r14
+; block0:
+;   lg %r2, 0(%r2)
+;   br %r14
 
 function %atomic_load_i64_sym() -> i64 {
   gv0 = symbol colocated %sym
@@ -18,8 +19,9 @@ block0:
   return v1
 }
 
-; check:  lgrl %r2, %sym + 0
-; nextln: br %r14
+; block0:
+;   lgrl %r2, %sym + 0
+;   br %r14
 
 function %atomic_load_i32(i64) -> i32 {
 block0(v0: i64):
@@ -27,8 +29,9 @@ block0(v0: i64):
   return v1
 }
 
-; check:  l %r2, 0(%r2)
-; nextln: br %r14
+; block0:
+;   l %r2, 0(%r2)
+;   br %r14
 
 function %atomic_load_i32_sym() -> i32 {
   gv0 = symbol colocated %sym
@@ -38,8 +41,9 @@ block0:
   return v1
 }
 
-; check:  lrl %r2, %sym + 0
-; nextln: br %r14
+; block0:
+;   lrl %r2, %sym + 0
+;   br %r14
 
 function %atomic_load_i16(i64) -> i16 {
 block0(v0: i64):
@@ -47,8 +51,9 @@ block0(v0: i64):
   return v1
 }
 
-; check:  llh %r2, 0(%r2)
-; nextln: br %r14
+; block0:
+;   llh %r2, 0(%r2)
+;   br %r14
 
 function %atomic_load_i16_sym() -> i16 {
   gv0 = symbol colocated %sym
@@ -58,8 +63,9 @@ block0:
   return v1
 }
 
-; check:  llhrl %r2, %sym + 0
-; nextln: br %r14
+; block0:
+;   llhrl %r2, %sym + 0
+;   br %r14
 
 function %atomic_load_i8(i64) -> i8 {
 block0(v0: i64):
@@ -67,6 +73,7 @@ block0(v0: i64):
   return v1
 }
 
-; check:  llc %r2, 0(%r2)
-; nextln: br %r14
+; block0:
+;   llc %r2, 0(%r2)
+;   br %r14
 
diff --git a/cranelift/filetests/filetests/isa/s390x/atomic_rmw-arch13.clif b/cranelift/filetests/filetests/isa/s390x/atomic_rmw-arch13.clif
index 28cf8be01e..b23455eee1 100644
--- a/cranelift/filetests/filetests/isa/s390x/atomic_rmw-arch13.clif
+++ b/cranelift/filetests/filetests/isa/s390x/atomic_rmw-arch13.clif
@@ -1,4 +1,4 @@
-test compile
+test compile precise-output
 target s390x arch13
 
 function %atomic_rmw_nand_i64(i64, i64, i64) -> i64 {
@@ -7,10 +7,11 @@ block0(v0: i64, v1: i64, v2: i64):
   return v3
 }
 
-; check:  lg %r0, 0(%r3)
-; nextln: 0: nngrk %r1, %r0, %r4 ; csg %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: lgr %r2, %r0
-; nextln: br %r14
+; block0:
+;   lg %r0, 0(%r3)
+;   0: nngrk %r1, %r0, %r4 ; csg %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   lgr %r2, %r0
+;   br %r14
 
 function %atomic_rmw_nand_i32(i64, i64, i32) -> i32 {
 block0(v0: i64, v1: i64, v2: i32):
@@ -18,10 +19,11 @@ block0(v0: i64, v1: i64, v2: i32):
   return v3
 }
 
-; check:  l %r0, 0(%r3)
-; nextln: 0: nnrk %r1, %r0, %r4 ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: lr %r2, %r0
-; nextln: br %r14
+; block0:
+;   l %r0, 0(%r3)
+;   0: nnrk %r1, %r0, %r4 ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   lgr %r2, %r0
+;   br %r14
 
 function %atomic_rmw_nand_i16(i64, i64, i16) -> i16 {
 block0(v0: i64, v1: i64, v2: i16):
@@ -29,12 +31,13 @@ block0(v0: i64, v1: i64, v2: i16):
   return v3
 }
 
-; check:  sllk %r2, %r3, 3
-; nextln: nill %r3, 65532
-; nextln: l %r0, 0(%r3)
-; nextln: 0: rll %r1, %r0, 0(%r2) ; rnsbg %r1, %r4, 32, 48, 16 ; xilf %r1, 4294901760 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: rll %r2, %r0, 16(%r2)
-; nextln: br %r14
+; block0:
+;   sllk %r5, %r3, 3
+;   nill %r3, 65532
+;   l %r0, 0(%r3)
+;   0: rll %r1, %r0, 0(%r5) ; rnsbg %r1, %r4, 32, 48, 16 ; xilf %r1, 4294901760 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   rll %r2, %r0, 16(%r5)
+;   br %r14
 
 function %atomic_rmw_nand_i8(i64, i64, i8) -> i8 {
 block0(v0: i64, v1: i64, v2: i8):
@@ -42,13 +45,14 @@ block0(v0: i64, v1: i64, v2: i8):
   return v3
 }
 
-; check:  sllk %r2, %r3, 3
-; nextln: nill %r3, 65532
-; nextln: lcr %r5, %r2
-; nextln: l %r0, 0(%r3)
-; nextln: 0: rll %r1, %r0, 0(%r2) ; rnsbg %r1, %r4, 32, 40, 24 ; xilf %r1, 4278190080 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: rll %r2, %r0, 8(%r2)
-; nextln: br %r14
+; block0:
+;   sllk %r5, %r3, 3
+;   nill %r3, 65532
+;   lcr %r2, %r5
+;   l %r0, 0(%r3)
+;   0: rll %r1, %r0, 0(%r5) ; rnsbg %r1, %r4, 32, 40, 24 ; xilf %r1, 4278190080 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   rll %r2, %r0, 8(%r5)
+;   br %r14
 
 function %atomic_rmw_nand_i64(i64, i64, i64) -> i64 {
 block0(v0: i64, v1: i64, v2: i64):
@@ -56,11 +60,12 @@ block0(v0: i64, v1: i64, v2: i64):
   return v3
 }
 
-; check:  lrvgr %r2, %r4
-; nextln: lg %r0, 0(%r3)
-; nextln: 0: nngrk %r1, %r0, %r2 ; csg %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: lrvgr %r2, %r0
-; nextln: br %r14
+; block0:
+;   lrvgr %r5, %r4
+;   lg %r0, 0(%r3)
+;   0: nngrk %r1, %r0, %r5 ; csg %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   lrvgr %r2, %r0
+;   br %r14
 
 function %atomic_rmw_nand_i32(i64, i64, i32) -> i32 {
 block0(v0: i64, v1: i64, v2: i32):
@@ -68,11 +73,12 @@ block0(v0: i64, v1: i64, v2: i32):
   return v3
 }
 
-; check:  lrvr %r2, %r4
-; nextln: l %r0, 0(%r3)
-; nextln: 0: nnrk %r1, %r0, %r2 ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: lrvr %r2, %r0
-; nextln: br %r14
+; block0:
+;   lrvr %r5, %r4
+;   l %r0, 0(%r3)
+;   0: nnrk %r1, %r0, %r5 ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   lrvr %r2, %r0
+;   br %r14
 
 function %atomic_rmw_nand_i16(i64, i64, i16) -> i16 {
 block0(v0: i64, v1: i64, v2: i16):
@@ -80,14 +86,15 @@ block0(v0: i64, v1: i64, v2: i16):
   return v3
 }
 
-; check:  sllk %r2, %r3, 3
-; nextln: nill %r3, 65532
-; nextln: lrvr %r4, %r4
-; nextln: l %r0, 0(%r3)
-; nextln: 0: rll %r1, %r0, 16(%r2) ; rnsbg %r1, %r4, 48, 64, 48 ; xilf %r1, 65535 ; rll %r1, %r1, 16(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: rll %r2, %r0, 0(%r2)
-; nextln: lrvr %r2, %r2
-; nextln: br %r14
+; block0:
+;   sllk %r5, %r3, 3
+;   nill %r3, 65532
+;   lrvr %r2, %r4
+;   l %r0, 0(%r3)
+;   0: rll %r1, %r0, 16(%r5) ; rnsbg %r1, %r2, 48, 64, 48 ; xilf %r1, 65535 ; rll %r1, %r1, 16(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   rll %r2, %r0, 0(%r5)
+;   lrvr %r2, %r2
+;   br %r14
 
 function %atomic_rmw_nand_i8(i64, i64, i8) -> i8 {
 block0(v0: i64, v1: i64, v2: i8):
@@ -95,11 +102,12 @@ block0(v0: i64, v1: i64, v2: i8):
   return v3
 }
 
-; check:  sllk %r2, %r3, 3
-; nextln: nill %r3, 65532
-; nextln: lcr %r5, %r2
-; nextln: l %r0, 0(%r3)
-; nextln: 0: rll %r1, %r0, 0(%r2) ; rnsbg %r1, %r4, 32, 40, 24 ; xilf %r1, 4278190080 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: rll %r2, %r0, 8(%r2)
-; nextln: br %r14
+; block0:
+;   sllk %r5, %r3, 3
+;   nill %r3, 65532
+;   lcr %r2, %r5
+;   l %r0, 0(%r3)
+;   0: rll %r1, %r0, 0(%r5) ; rnsbg %r1, %r4, 32, 40, 24 ; xilf %r1, 4278190080 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   rll %r2, %r0, 8(%r5)
+;   br %r14
 
diff --git a/cranelift/filetests/filetests/isa/s390x/atomic_rmw-little.clif b/cranelift/filetests/filetests/isa/s390x/atomic_rmw-little.clif
index 783c70a9cd..479e25e734 100644
--- a/cranelift/filetests/filetests/isa/s390x/atomic_rmw-little.clif
+++ b/cranelift/filetests/filetests/isa/s390x/atomic_rmw-little.clif
@@ -1,4 +1,4 @@
-test compile
+test compile precise-output
 target s390x
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -11,11 +11,12 @@ block0(v0: i64, v1: i64, v2: i64):
   return v3
 }
 
-; check:  lrvgr %r2, %r4
-; nextln: lg %r0, 0(%r3)
-; nextln: 0: csg %r0, %r2, 0(%r3) ; jglh 0b ; 1:
-; nextln: lrvgr %r2, %r0
-; nextln: br %r14
+; block0:
+;   lrvgr %r5, %r4
+;   lg %r0, 0(%r3)
+;   0: csg %r0, %r5, 0(%r3) ; jglh 0b ; 1:
+;   lrvgr %r2, %r0
+;   br %r14
 
 function %atomic_rmw_xchg_i32(i64, i64, i32) -> i32 {
 block0(v0: i64, v1: i64, v2: i32):
@@ -23,11 +24,12 @@ block0(v0: i64, v1: i64, v2: i32):
   return v3
 }
 
-; check:  lrvr %r2, %r4
-; nextln: l %r0, 0(%r3)
-; nextln: 0: cs %r0, %r2, 0(%r3) ; jglh 0b ; 1:
-; nextln: lrvr %r2, %r0
-; nextln: br %r14
+; block0:
+;   lrvr %r5, %r4
+;   l %r0, 0(%r3)
+;   0: cs %r0, %r5, 0(%r3) ; jglh 0b ; 1:
+;   lrvr %r2, %r0
+;   br %r14
 
 function %atomic_rmw_xchg_i16(i64, i64, i16) -> i16 {
 block0(v0: i64, v1: i64, v2: i16):
@@ -35,14 +37,15 @@ block0(v0: i64, v1: i64, v2: i16):
   return v3
 }
 
-; check:  sllk %r2, %r3, 3
-; nextln: nill %r3, 65532
-; nextln: lrvr %r4, %r4
-; nextln: l %r0, 0(%r3)
-; nextln: 0: rll %r1, %r0, 16(%r2) ; risbgn %r1, %r4, 48, 64, 48 ; rll %r1, %r1, 16(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: rll %r2, %r0, 0(%r2)
-; nextln: lrvr %r2, %r2
-; nextln: br %r14
+; block0:
+;   sllk %r5, %r3, 3
+;   nill %r3, 65532
+;   lrvr %r2, %r4
+;   l %r0, 0(%r3)
+;   0: rll %r1, %r0, 16(%r5) ; risbgn %r1, %r2, 48, 64, 48 ; rll %r1, %r1, 16(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   rll %r2, %r0, 0(%r5)
+;   lrvr %r2, %r2
+;   br %r14
 
 function %atomic_rmw_xchg_i8(i64, i64, i8) -> i8 {
 block0(v0: i64, v1: i64, v2: i8):
@@ -50,18 +53,14 @@ block0(v0: i64, v1: i64, v2: i8):
   return v3
 }
 
-; check:  sllk %r2, %r3, 3
-; nextln: nill %r3, 65532
-; nextln: lcr %r5, %r2
-; nextln: l %r0, 0(%r3)
-; nextln: 0: rll %r1, %r0, 0(%r2) ; risbgn %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: rll %r2, %r0, 8(%r2)
-; nextln: br %r14
-
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; ATOMIC_RMW (ADD)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   sllk %r5, %r3, 3
+;   nill %r3, 65532
+;   lcr %r2, %r5
+;   l %r0, 0(%r3)
+;   0: rll %r1, %r0, 0(%r5) ; risbgn %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   rll %r2, %r0, 8(%r5)
+;   br %r14
 
 function %atomic_rmw_add_i64(i64, i64, i64) -> i64 {
 block0(v0: i64, v1: i64, v2: i64):
@@ -69,10 +68,11 @@ block0(v0: i64, v1: i64, v2: i64):
   return v3
 }
 
-; check:  lg %r0, 0(%r3)
-; nextln: 0: lrvgr %r1, %r0 ; agr %r1, %r4 ; lrvgr %r1, %r1 ; csg %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: lrvgr %r2, %r0
-; nextln: br %r14
+; block0:
+;   lg %r0, 0(%r3)
+;   0: lrvgr %r1, %r0 ; agr %r1, %r4 ; lrvgr %r1, %r1 ; csg %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   lrvgr %r2, %r0
+;   br %r14
 
 function %atomic_rmw_add_i32(i64, i64, i32) -> i32 {
 block0(v0: i64, v1: i64, v2: i32):
@@ -80,10 +80,11 @@ block0(v0: i64, v1: i64, v2: i32):
   return v3
 }
 
-; check:  l %r0, 0(%r3)
-; nextln: 0: lrvr %r1, %r0 ; ar %r1, %r4 ; lrvr %r1, %r1 ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: lrvr %r2, %r0
-; nextln: br %r14
+; block0:
+;   l %r0, 0(%r3)
+;   0: lrvr %r1, %r0 ; ar %r1, %r4 ; lrvr %r1, %r1 ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   lrvr %r2, %r0
+;   br %r14
 
 function %atomic_rmw_add_i16(i64, i64, i16) -> i16 {
 block0(v0: i64, v1: i64, v2: i16):
@@ -91,14 +92,15 @@ block0(v0: i64, v1: i64, v2: i16):
   return v3
 }
 
-; check:  sllk %r2, %r3, 3
-; nextln: nill %r3, 65532
-; nextln: sllk %r4, %r4, 16
-; nextln: l %r0, 0(%r3)
-; nextln: 0: rll %r1, %r0, 16(%r2) ; lrvr %r1, %r1 ; ar %r1, %r4 ; lrvr %r1, %r1 ; rll %r1, %r1, 16(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: rll %r2, %r0, 0(%r2)
-; nextln: lrvr %r2, %r2
-; nextln: br %r14
+; block0:
+;   sllk %r5, %r3, 3
+;   nill %r3, 65532
+;   sllk %r2, %r4, 16
+;   l %r0, 0(%r3)
+;   0: rll %r1, %r0, 16(%r5) ; lrvr %r1, %r1 ; ar %r1, %r2 ; lrvr %r1, %r1 ; rll %r1, %r1, 16(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   rll %r2, %r0, 0(%r5)
+;   lrvr %r2, %r2
+;   br %r14
 
 function %atomic_rmw_add_i8(i64, i64, i8) -> i8 {
 block0(v0: i64, v1: i64, v2: i8):
@@ -106,18 +108,15 @@ block0(v0: i64, v1: i64, v2: i8):
   return v3
 }
 
-; check:  sllk %r2, %r3, 3
-; nextln: nill %r3, 65532
-; nextln: sllk %r4, %r4, 24
-; nextln: lcr %r5, %r2
-; nextln: l %r0, 0(%r3)
-; nextln: 0: rll %r1, %r0, 0(%r2) ; ar %r1, %r4 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: rll %r2, %r0, 8(%r2)
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; ATOMIC_RMW (SUB)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   sllk %r5, %r3, 3
+;   nill %r3, 65532
+;   sllk %r2, %r4, 24
+;   lcr %r4, %r5
+;   l %r0, 0(%r3)
+;   0: rll %r1, %r0, 0(%r5) ; ar %r1, %r2 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   rll %r2, %r0, 8(%r5)
+;   br %r14
 
 function %atomic_rmw_sub_i64(i64, i64, i64) -> i64 {
 block0(v0: i64, v1: i64, v2: i64):
@@ -125,10 +124,11 @@ block0(v0: i64, v1: i64, v2: i64):
   return v3
 }
 
-; check:  lg %r0, 0(%r3)
-; nextln: 0: lrvgr %r1, %r0 ; sgr %r1, %r4 ; lrvgr %r1, %r1 ; csg %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: lrvgr %r2, %r0
-; nextln: br %r14
+; block0:
+;   lg %r0, 0(%r3)
+;   0: lrvgr %r1, %r0 ; sgr %r1, %r4 ; lrvgr %r1, %r1 ; csg %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   lrvgr %r2, %r0
+;   br %r14
 
 function %atomic_rmw_sub_i32(i64, i64, i32) -> i32 {
 block0(v0: i64, v1: i64, v2: i32):
@@ -136,10 +136,11 @@ block0(v0: i64, v1: i64, v2: i32):
   return v3
 }
 
-; check:  l %r0, 0(%r3)
-; nextln: 0: lrvr %r1, %r0 ; sr %r1, %r4 ; lrvr %r1, %r1 ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: lrvr %r2, %r0
-; nextln: br %r14
+; block0:
+;   l %r0, 0(%r3)
+;   0: lrvr %r1, %r0 ; sr %r1, %r4 ; lrvr %r1, %r1 ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   lrvr %r2, %r0
+;   br %r14
 
 function %atomic_rmw_sub_i16(i64, i64, i16) -> i16 {
 block0(v0: i64, v1: i64, v2: i16):
@@ -147,14 +148,15 @@ block0(v0: i64, v1: i64, v2: i16):
   return v3
 }
 
-; check:  sllk %r2, %r3, 3
-; nextln: nill %r3, 65532
-; nextln: sllk %r4, %r4, 16
-; nextln: l %r0, 0(%r3)
-; nextln: 0: rll %r1, %r0, 16(%r2) ; lrvr %r1, %r1 ; sr %r1, %r4 ; lrvr %r1, %r1 ; rll %r1, %r1, 16(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: rll %r2, %r0, 0(%r2)
-; nextln: lrvr %r2, %r2
-; nextln: br %r14
+; block0:
+;   sllk %r5, %r3, 3
+;   nill %r3, 65532
+;   sllk %r2, %r4, 16
+;   l %r0, 0(%r3)
+;   0: rll %r1, %r0, 16(%r5) ; lrvr %r1, %r1 ; sr %r1, %r2 ; lrvr %r1, %r1 ; rll %r1, %r1, 16(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   rll %r2, %r0, 0(%r5)
+;   lrvr %r2, %r2
+;   br %r14
 
 function %atomic_rmw_sub_i8(i64, i64, i8) -> i8 {
 block0(v0: i64, v1: i64, v2: i8):
@@ -162,18 +164,15 @@ block0(v0: i64, v1: i64, v2: i8):
   return v3
 }
 
-; check:  sllk %r2, %r3, 3
-; nextln: nill %r3, 65532
-; nextln: sllk %r4, %r4, 24
-; nextln: lcr %r5, %r2
-; nextln: l %r0, 0(%r3)
-; nextln: 0: rll %r1, %r0, 0(%r2) ; sr %r1, %r4 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: rll %r2, %r0, 8(%r2)
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; ATOMIC_RMW (AND)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   sllk %r5, %r3, 3
+;   nill %r3, 65532
+;   sllk %r2, %r4, 24
+;   lcr %r4, %r5
+;   l %r0, 0(%r3)
+;   0: rll %r1, %r0, 0(%r5) ; sr %r1, %r2 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   rll %r2, %r0, 8(%r5)
+;   br %r14
 
 function %atomic_rmw_and_i64(i64, i64, i64) -> i64 {
 block0(v0: i64, v1: i64, v2: i64):
@@ -181,10 +180,11 @@ block0(v0: i64, v1: i64, v2: i64):
   return v3
 }
 
-; check:  lrvgr %r2, %r4
-; nextln: lang %r2, %r2, 0(%r3)
-; nextln: lrvgr %r2, %r2
-; nextln: br %r14
+; block0:
+;   lrvgr %r5, %r4
+;   lang %r3, %r5, 0(%r3)
+;   lrvgr %r2, %r3
+;   br %r14
 
 function %atomic_rmw_and_i32(i64, i64, i32) -> i32 {
 block0(v0: i64, v1: i64, v2: i32):
@@ -192,10 +192,11 @@ block0(v0: i64, v1: i64, v2: i32):
   return v3
 }
 
-; check:  lrvr %r2, %r4
-; nextln: lan %r2, %r2, 0(%r3)
-; nextln: lrvr %r2, %r2
-; nextln: br %r14
+; block0:
+;   lrvr %r5, %r4
+;   lan %r3, %r5, 0(%r3)
+;   lrvr %r2, %r3
+;   br %r14
 
 function %atomic_rmw_and_i16(i64, i64, i16) -> i16 {
 block0(v0: i64, v1: i64, v2: i16):
@@ -203,14 +204,15 @@ block0(v0: i64, v1: i64, v2: i16):
   return v3
 }
 
-; check:  sllk %r2, %r3, 3
-; nextln: nill %r3, 65532
-; nextln: lrvr %r4, %r4
-; nextln: l %r0, 0(%r3)
-; nextln: 0: rll %r1, %r0, 16(%r2) ; rnsbg %r1, %r4, 48, 64, 48 ; rll %r1, %r1, 16(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: rll %r2, %r0, 0(%r2)
-; nextln: lrvr %r2, %r2
-; nextln: br %r14
+; block0:
+;   sllk %r5, %r3, 3
+;   nill %r3, 65532
+;   lrvr %r2, %r4
+;   l %r0, 0(%r3)
+;   0: rll %r1, %r0, 16(%r5) ; rnsbg %r1, %r2, 48, 64, 48 ; rll %r1, %r1, 16(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   rll %r2, %r0, 0(%r5)
+;   lrvr %r2, %r2
+;   br %r14
 
 function %atomic_rmw_and_i8(i64, i64, i8) -> i8 {
 block0(v0: i64, v1: i64, v2: i8):
@@ -218,17 +220,14 @@ block0(v0: i64, v1: i64, v2: i8):
   return v3
 }
 
-; check:  sllk %r2, %r3, 3
-; nextln: nill %r3, 65532
-; nextln: lcr %r5, %r2
-; nextln: l %r0, 0(%r3)
-; nextln: 0: rll %r1, %r0, 0(%r2) ; rnsbg %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: rll %r2, %r0, 8(%r2)
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; ATOMIC_RMW (OR)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   sllk %r5, %r3, 3
+;   nill %r3, 65532
+;   lcr %r2, %r5
+;   l %r0, 0(%r3)
+;   0: rll %r1, %r0, 0(%r5) ; rnsbg %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   rll %r2, %r0, 8(%r5)
+;   br %r14
 
 function %atomic_rmw_or_i64(i64, i64, i64) -> i64 {
 block0(v0: i64, v1: i64, v2: i64):
@@ -236,10 +235,11 @@ block0(v0: i64, v1: i64, v2: i64):
   return v3
 }
 
-; check:  lrvgr %r2, %r4
-; nextln: laog %r2, %r2, 0(%r3)
-; nextln: lrvgr %r2, %r2
-; nextln: br %r14
+; block0:
+;   lrvgr %r5, %r4
+;   laog %r3, %r5, 0(%r3)
+;   lrvgr %r2, %r3
+;   br %r14
 
 function %atomic_rmw_or_i32(i64, i64, i32) -> i32 {
 block0(v0: i64, v1: i64, v2: i32):
@@ -247,10 +247,11 @@ block0(v0: i64, v1: i64, v2: i32):
   return v3
 }
 
-; check:  lrvr %r2, %r4
-; nextln: lao %r2, %r2, 0(%r3)
-; nextln: lrvr %r2, %r2
-; nextln: br %r14
+; block0:
+;   lrvr %r5, %r4
+;   lao %r3, %r5, 0(%r3)
+;   lrvr %r2, %r3
+;   br %r14
 
 function %atomic_rmw_or_i16(i64, i64, i16) -> i16 {
 block0(v0: i64, v1: i64, v2: i16):
@@ -258,14 +259,15 @@ block0(v0: i64, v1: i64, v2: i16):
   return v3
 }
 
-; check:  sllk %r2, %r3, 3
-; nextln: nill %r3, 65532
-; nextln: lrvr %r4, %r4
-; nextln: l %r0, 0(%r3)
-; nextln: 0: rll %r1, %r0, 16(%r2) ; rosbg %r1, %r4, 48, 64, 48 ; rll %r1, %r1, 16(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: rll %r2, %r0, 0(%r2)
-; nextln: lrvr %r2, %r2
-; nextln: br %r14
+; block0:
+;   sllk %r5, %r3, 3
+;   nill %r3, 65532
+;   lrvr %r2, %r4
+;   l %r0, 0(%r3)
+;   0: rll %r1, %r0, 16(%r5) ; rosbg %r1, %r2, 48, 64, 48 ; rll %r1, %r1, 16(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   rll %r2, %r0, 0(%r5)
+;   lrvr %r2, %r2
+;   br %r14
 
 function %atomic_rmw_or_i8(i64, i64, i8) -> i8 {
 block0(v0: i64, v1: i64, v2: i8):
@@ -273,18 +275,14 @@ block0(v0: i64, v1: i64, v2: i8):
   return v3
 }
 
-; check:  sllk %r2, %r3, 3
-; nextln: nill %r3, 65532
-; nextln: lcr %r5, %r2
-; nextln: l %r0, 0(%r3)
-; nextln: 0: rll %r1, %r0, 0(%r2) ; rosbg %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: rll %r2, %r0, 8(%r2)
-; nextln: br %r14
-
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; ATOMIC_RMW (XOR)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   sllk %r5, %r3, 3
+;   nill %r3, 65532
+;   lcr %r2, %r5
+;   l %r0, 0(%r3)
+;   0: rll %r1, %r0, 0(%r5) ; rosbg %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   rll %r2, %r0, 8(%r5)
+;   br %r14
 
 function %atomic_rmw_xor_i64(i64, i64, i64) -> i64 {
 block0(v0: i64, v1: i64, v2: i64):
@@ -292,10 +290,11 @@ block0(v0: i64, v1: i64, v2: i64):
   return v3
 }
 
-; check:  lrvgr %r2, %r4
-; nextln: laxg %r2, %r2, 0(%r3)
-; nextln: lrvgr %r2, %r2
-; nextln: br %r14
+; block0:
+;   lrvgr %r5, %r4
+;   laxg %r3, %r5, 0(%r3)
+;   lrvgr %r2, %r3
+;   br %r14
 
 function %atomic_rmw_xor_i32(i64, i64, i32) -> i32 {
 block0(v0: i64, v1: i64, v2: i32):
@@ -303,10 +302,11 @@ block0(v0: i64, v1: i64, v2: i32):
   return v3
 }
 
-; check:  lrvr %r2, %r4
-; nextln: lax %r2, %r2, 0(%r3)
-; nextln: lrvr %r2, %r2
-; nextln: br %r14
+; block0:
+;   lrvr %r5, %r4
+;   lax %r3, %r5, 0(%r3)
+;   lrvr %r2, %r3
+;   br %r14
 
 function %atomic_rmw_xor_i16(i64, i64, i16) -> i16 {
 block0(v0: i64, v1: i64, v2: i16):
@@ -314,14 +314,15 @@ block0(v0: i64, v1: i64, v2: i16):
   return v3
 }
 
-; check:  sllk %r2, %r3, 3
-; nextln: nill %r3, 65532
-; nextln: lrvr %r4, %r4
-; nextln: l %r0, 0(%r3)
-; nextln: 0: rll %r1, %r0, 16(%r2) ; rxsbg %r1, %r4, 48, 64, 48 ; rll %r1, %r1, 16(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: rll %r2, %r0, 0(%r2)
-; nextln: lrvr %r2, %r2
-; nextln: br %r14
+; block0:
+;   sllk %r5, %r3, 3
+;   nill %r3, 65532
+;   lrvr %r2, %r4
+;   l %r0, 0(%r3)
+;   0: rll %r1, %r0, 16(%r5) ; rxsbg %r1, %r2, 48, 64, 48 ; rll %r1, %r1, 16(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   rll %r2, %r0, 0(%r5)
+;   lrvr %r2, %r2
+;   br %r14
 
 function %atomic_rmw_xor_i8(i64, i64, i8) -> i8 {
 block0(v0: i64, v1: i64, v2: i8):
@@ -329,17 +330,14 @@ block0(v0: i64, v1: i64, v2: i8):
   return v3
 }
 
-; check:  sllk %r2, %r3, 3
-; nextln: nill %r3, 65532
-; nextln: lcr %r5, %r2
-; nextln: l %r0, 0(%r3)
-; nextln: 0: rll %r1, %r0, 0(%r2) ; rxsbg %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: rll %r2, %r0, 8(%r2)
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; ATOMIC_RMW (NAND)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   sllk %r5, %r3, 3
+;   nill %r3, 65532
+;   lcr %r2, %r5
+;   l %r0, 0(%r3)
+;   0: rll %r1, %r0, 0(%r5) ; rxsbg %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   rll %r2, %r0, 8(%r5)
+;   br %r14
 
 function %atomic_rmw_nand_i64(i64, i64, i64) -> i64 {
 block0(v0: i64, v1: i64, v2: i64):
@@ -347,11 +345,12 @@ block0(v0: i64, v1: i64, v2: i64):
   return v3
 }
 
-; check:  lrvgr %r2, %r4
-; nextln: lg %r0, 0(%r3)
-; nextln: 0: ngrk %r1, %r0, %r2 ; xilf %r1, 4294967295 ; xihf %r1, 4294967295 ; csg %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: lrvgr %r2, %r0
-; nextln: br %r14
+; block0:
+;   lrvgr %r5, %r4
+;   lg %r0, 0(%r3)
+;   0: ngrk %r1, %r0, %r5 ; xilf %r1, 4294967295 ; xihf %r1, 4294967295 ; csg %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   lrvgr %r2, %r0
+;   br %r14
 
 function %atomic_rmw_nand_i32(i64, i64, i32) -> i32 {
 block0(v0: i64, v1: i64, v2: i32):
@@ -359,11 +358,12 @@ block0(v0: i64, v1: i64, v2: i32):
   return v3
 }
 
-; check:  lrvr %r2, %r4
-; nextln: l %r0, 0(%r3)
-; nextln: 0: nrk %r1, %r0, %r2 ; xilf %r1, 4294967295 ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: lrvr %r2, %r0
-; nextln: br %r14
+; block0:
+;   lrvr %r5, %r4
+;   l %r0, 0(%r3)
+;   0: nrk %r1, %r0, %r5 ; xilf %r1, 4294967295 ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   lrvr %r2, %r0
+;   br %r14
 
 function %atomic_rmw_nand_i16(i64, i64, i16) -> i16 {
 block0(v0: i64, v1: i64, v2: i16):
@@ -371,14 +371,15 @@ block0(v0: i64, v1: i64, v2: i16):
   return v3
 }
 
-; check:  sllk %r2, %r3, 3
-; nextln: nill %r3, 65532
-; nextln: lrvr %r4, %r4
-; nextln: l %r0, 0(%r3)
-; nextln: 0: rll %r1, %r0, 16(%r2) ; rnsbg %r1, %r4, 48, 64, 48 ; xilf %r1, 65535 ; rll %r1, %r1, 16(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: rll %r2, %r0, 0(%r2)
-; nextln: lrvr %r2, %r2
-; nextln: br %r14
+; block0:
+;   sllk %r5, %r3, 3
+;   nill %r3, 65532
+;   lrvr %r2, %r4
+;   l %r0, 0(%r3)
+;   0: rll %r1, %r0, 16(%r5) ; rnsbg %r1, %r2, 48, 64, 48 ; xilf %r1, 65535 ; rll %r1, %r1, 16(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   rll %r2, %r0, 0(%r5)
+;   lrvr %r2, %r2
+;   br %r14
 
 function %atomic_rmw_nand_i8(i64, i64, i8) -> i8 {
 block0(v0: i64, v1: i64, v2: i8):
@@ -386,17 +387,14 @@ block0(v0: i64, v1: i64, v2: i8):
   return v3
 }
 
-; check:  sllk %r2, %r3, 3
-; nextln: nill %r3, 65532
-; nextln: lcr %r5, %r2
-; nextln: l %r0, 0(%r3)
-; nextln: 0: rll %r1, %r0, 0(%r2) ; rnsbg %r1, %r4, 32, 40, 24 ; xilf %r1, 4278190080 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: rll %r2, %r0, 8(%r2)
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; ATOMIC_RMW (SMIN)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   sllk %r5, %r3, 3
+;   nill %r3, 65532
+;   lcr %r2, %r5
+;   l %r0, 0(%r3)
+;   0: rll %r1, %r0, 0(%r5) ; rnsbg %r1, %r4, 32, 40, 24 ; xilf %r1, 4278190080 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   rll %r2, %r0, 8(%r5)
+;   br %r14
 
 function %atomic_rmw_smin_i64(i64, i64, i64) -> i64 {
 block0(v0: i64, v1: i64, v2: i64):
@@ -404,10 +402,11 @@ block0(v0: i64, v1: i64, v2: i64):
   return v3
 }
 
-; check:  lg %r0, 0(%r3)
-; nextln: 0: lrvgr %r1, %r0 ; cgr %r4, %r1 ; jgnl 1f ; lrvgr %r1, %r4 ; csg %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: lrvgr %r2, %r0
-; nextln: br %r14
+; block0:
+;   lg %r0, 0(%r3)
+;   0: lrvgr %r1, %r0 ; cgr %r4, %r1 ; jgnl 1f ; lrvgr %r1, %r4 ; csg %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   lrvgr %r2, %r0
+;   br %r14
 
 function %atomic_rmw_smin_i32(i64, i64, i32) -> i32 {
 block0(v0: i64, v1: i64, v2: i32):
@@ -415,10 +414,11 @@ block0(v0: i64, v1: i64, v2: i32):
   return v3
 }
 
-; check:  l %r0, 0(%r3)
-; nextln: 0: lrvr %r1, %r0 ; cr %r4, %r1 ; jgnl 1f ; lrvr %r1, %r4 ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: lrvr %r2, %r0
-; nextln: br %r14
+; block0:
+;   l %r0, 0(%r3)
+;   0: lrvr %r1, %r0 ; cr %r4, %r1 ; jgnl 1f ; lrvr %r1, %r4 ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   lrvr %r2, %r0
+;   br %r14
 
 function %atomic_rmw_smin_i16(i64, i64, i16) -> i16 {
 block0(v0: i64, v1: i64, v2: i16):
@@ -426,14 +426,15 @@ block0(v0: i64, v1: i64, v2: i16):
   return v3
 }
 
-; check:  sllk %r2, %r3, 3
-; nextln: nill %r3, 65532
-; nextln: sllk %r4, %r4, 16
-; nextln: l %r0, 0(%r3)
-; nextln: 0: rll %r1, %r0, 16(%r2) ; lrvr %r1, %r1 ; cr %r4, %r1 ; jgnl 1f ; risbgn %r1, %r4, 32, 48, 0 ; lrvr %r1, %r1 ; rll %r1, %r1, 16(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: rll %r2, %r0, 0(%r2)
-; nextln: lrvr %r2, %r2
-; nextln: br %r14
+; block0:
+;   sllk %r5, %r3, 3
+;   nill %r3, 65532
+;   sllk %r2, %r4, 16
+;   l %r0, 0(%r3)
+;   0: rll %r1, %r0, 16(%r5) ; lrvr %r1, %r1 ; cr %r2, %r1 ; jgnl 1f ; risbgn %r1, %r2, 32, 48, 0 ; lrvr %r1, %r1 ; rll %r1, %r1, 16(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   rll %r2, %r0, 0(%r5)
+;   lrvr %r2, %r2
+;   br %r14
 
 function %atomic_rmw_smin_i8(i64, i64, i8) -> i8 {
 block0(v0: i64, v1: i64, v2: i8):
@@ -441,18 +442,15 @@ block0(v0: i64, v1: i64, v2: i8):
   return v3
 }
 
-; check:  sllk %r2, %r3, 3
-; nextln: nill %r3, 65532
-; nextln: sllk %r4, %r4, 24
-; nextln: lcr %r5, %r2
-; nextln: l %r0, 0(%r3)
-; nextln: 0: rll %r1, %r0, 0(%r2) ; cr %r4, %r1 ; jgnl 1f ; risbgn %r1, %r4, 32, 40, 0 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: rll %r2, %r0, 8(%r2)
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; ATOMIC_RMW (SMAX)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   sllk %r5, %r3, 3
+;   nill %r3, 65532
+;   sllk %r2, %r4, 24
+;   lcr %r4, %r5
+;   l %r0, 0(%r3)
+;   0: rll %r1, %r0, 0(%r5) ; cr %r2, %r1 ; jgnl 1f ; risbgn %r1, %r2, 32, 40, 0 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   rll %r2, %r0, 8(%r5)
+;   br %r14
 
 function %atomic_rmw_smax_i64(i64, i64, i64) -> i64 {
 block0(v0: i64, v1: i64, v2: i64):
@@ -460,10 +458,11 @@ block0(v0: i64, v1: i64, v2: i64):
   return v3
 }
 
-; check:  lg %r0, 0(%r3)
-; nextln: 0: lrvgr %r1, %r0 ; cgr %r4, %r1 ; jgnh 1f ; lrvgr %r1, %r4 ; csg %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: lrvgr %r2, %r0
-; nextln: br %r14
+; block0:
+;   lg %r0, 0(%r3)
+;   0: lrvgr %r1, %r0 ; cgr %r4, %r1 ; jgnh 1f ; lrvgr %r1, %r4 ; csg %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   lrvgr %r2, %r0
+;   br %r14
 
 function %atomic_rmw_smax_i32(i64, i64, i32) -> i32 {
 block0(v0: i64, v1: i64, v2: i32):
@@ -471,10 +470,11 @@ block0(v0: i64, v1: i64, v2: i32):
   return v3
 }
 
-; check:  l %r0, 0(%r3)
-; nextln: 0: lrvr %r1, %r0 ; cr %r4, %r1 ; jgnh 1f ; lrvr %r1, %r4 ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: lrvr %r2, %r0
-; nextln: br %r14
+; block0:
+;   l %r0, 0(%r3)
+;   0: lrvr %r1, %r0 ; cr %r4, %r1 ; jgnh 1f ; lrvr %r1, %r4 ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   lrvr %r2, %r0
+;   br %r14
 
 function %atomic_rmw_smax_i16(i64, i64, i16) -> i16 {
 block0(v0: i64, v1: i64, v2: i16):
@@ -482,14 +482,15 @@ block0(v0: i64, v1: i64, v2: i16):
   return v3
 }
 
-; check:  sllk %r2, %r3, 3
-; nextln: nill %r3, 65532
-; nextln: sllk %r4, %r4, 16
-; nextln: l %r0, 0(%r3)
-; nextln: 0: rll %r1, %r0, 16(%r2) ; lrvr %r1, %r1 ; cr %r4, %r1 ; jgnh 1f ; risbgn %r1, %r4, 32, 48, 0 ; lrvr %r1, %r1 ; rll %r1, %r1, 16(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: rll %r2, %r0, 0(%r2)
-; nextln: lrvr %r2, %r2
-; nextln: br %r14
+; block0:
+;   sllk %r5, %r3, 3
+;   nill %r3, 65532
+;   sllk %r2, %r4, 16
+;   l %r0, 0(%r3)
+;   0: rll %r1, %r0, 16(%r5) ; lrvr %r1, %r1 ; cr %r2, %r1 ; jgnh 1f ; risbgn %r1, %r2, 32, 48, 0 ; lrvr %r1, %r1 ; rll %r1, %r1, 16(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   rll %r2, %r0, 0(%r5)
+;   lrvr %r2, %r2
+;   br %r14
 
 function %atomic_rmw_smax_i8(i64, i64, i8) -> i8 {
 block0(v0: i64, v1: i64, v2: i8):
@@ -497,18 +498,15 @@ block0(v0: i64, v1: i64, v2: i8):
   return v3
 }
 
-; check:  sllk %r2, %r3, 3
-; nextln: nill %r3, 65532
-; nextln: sllk %r4, %r4, 24
-; nextln: lcr %r5, %r2
-; nextln: l %r0, 0(%r3)
-; nextln: 0: rll %r1, %r0, 0(%r2) ; cr %r4, %r1 ; jgnh 1f ; risbgn %r1, %r4, 32, 40, 0 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: rll %r2, %r0, 8(%r2)
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; ATOMIC_RMW (UMIN)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   sllk %r5, %r3, 3
+;   nill %r3, 65532
+;   sllk %r2, %r4, 24
+;   lcr %r4, %r5
+;   l %r0, 0(%r3)
+;   0: rll %r1, %r0, 0(%r5) ; cr %r2, %r1 ; jgnh 1f ; risbgn %r1, %r2, 32, 40, 0 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   rll %r2, %r0, 8(%r5)
+;   br %r14
 
 function %atomic_rmw_umin_i64(i64, i64, i64) -> i64 {
 block0(v0: i64, v1: i64, v2: i64):
@@ -516,10 +514,11 @@ block0(v0: i64, v1: i64, v2: i64):
   return v3
 }
 
-; check:  lg %r0, 0(%r3)
-; nextln: 0: lrvgr %r1, %r0 ; clgr %r4, %r1 ; jgnl 1f ; lrvgr %r1, %r4 ; csg %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: lrvgr %r2, %r0
-; nextln: br %r14
+; block0:
+;   lg %r0, 0(%r3)
+;   0: lrvgr %r1, %r0 ; clgr %r4, %r1 ; jgnl 1f ; lrvgr %r1, %r4 ; csg %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   lrvgr %r2, %r0
+;   br %r14
 
 function %atomic_rmw_umin_i32(i64, i64, i32) -> i32 {
 block0(v0: i64, v1: i64, v2: i32):
@@ -527,10 +526,11 @@ block0(v0: i64, v1: i64, v2: i32):
   return v3
 }
 
-; check:  l %r0, 0(%r3)
-; nextln: 0: lrvr %r1, %r0 ; clr %r4, %r1 ; jgnl 1f ; lrvr %r1, %r4 ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: lrvr %r2, %r0
-; nextln: br %r14
+; block0:
+;   l %r0, 0(%r3)
+;   0: lrvr %r1, %r0 ; clr %r4, %r1 ; jgnl 1f ; lrvr %r1, %r4 ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   lrvr %r2, %r0
+;   br %r14
 
 function %atomic_rmw_umin_i16(i64, i64, i16) -> i16 {
 block0(v0: i64, v1: i64, v2: i16):
@@ -538,14 +538,15 @@ block0(v0: i64, v1: i64, v2: i16):
   return v3
 }
 
-; check:  sllk %r2, %r3, 3
-; nextln: nill %r3, 65532
-; nextln: sllk %r4, %r4, 16
-; nextln: l %r0, 0(%r3)
-; nextln: 0: rll %r1, %r0, 16(%r2) ; lrvr %r1, %r1 ; clr %r4, %r1 ; jgnl 1f ; risbgn %r1, %r4, 32, 48, 0 ; lrvr %r1, %r1 ; rll %r1, %r1, 16(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: rll %r2, %r0, 0(%r2)
-; nextln: lrvr %r2, %r2
-; nextln: br %r14
+; block0:
+;   sllk %r5, %r3, 3
+;   nill %r3, 65532
+;   sllk %r2, %r4, 16
+;   l %r0, 0(%r3)
+;   0: rll %r1, %r0, 16(%r5) ; lrvr %r1, %r1 ; clr %r2, %r1 ; jgnl 1f ; risbgn %r1, %r2, 32, 48, 0 ; lrvr %r1, %r1 ; rll %r1, %r1, 16(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   rll %r2, %r0, 0(%r5)
+;   lrvr %r2, %r2
+;   br %r14
 
 function %atomic_rmw_umin_i8(i64, i64, i8) -> i8 {
 block0(v0: i64, v1: i64, v2: i8):
@@ -553,18 +554,15 @@ block0(v0: i64, v1: i64, v2: i8):
   return v3
 }
 
-; check:  sllk %r2, %r3, 3
-; nextln: nill %r3, 65532
-; nextln: sllk %r4, %r4, 24
-; nextln: lcr %r5, %r2
-; nextln: l %r0, 0(%r3)
-; nextln: 0: rll %r1, %r0, 0(%r2) ; clr %r4, %r1 ; jgnl 1f ; risbgn %r1, %r4, 32, 40, 0 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: rll %r2, %r0, 8(%r2)
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; ATOMIC_RMW (UMAX)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   sllk %r5, %r3, 3
+;   nill %r3, 65532
+;   sllk %r2, %r4, 24
+;   lcr %r4, %r5
+;   l %r0, 0(%r3)
+;   0: rll %r1, %r0, 0(%r5) ; clr %r2, %r1 ; jgnl 1f ; risbgn %r1, %r2, 32, 40, 0 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   rll %r2, %r0, 8(%r5)
+;   br %r14
 
 function %atomic_rmw_umax_i64(i64, i64, i64) -> i64 {
 block0(v0: i64, v1: i64, v2: i64):
@@ -572,10 +570,11 @@ block0(v0: i64, v1: i64, v2: i64):
   return v3
 }
 
-; check:  lg %r0, 0(%r3)
-; nextln: 0: lrvgr %r1, %r0 ; clgr %r4, %r1 ; jgnh 1f ; lrvgr %r1, %r4 ; csg %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: lrvgr %r2, %r0
-; nextln: br %r14
+; block0:
+;   lg %r0, 0(%r3)
+;   0: lrvgr %r1, %r0 ; clgr %r4, %r1 ; jgnh 1f ; lrvgr %r1, %r4 ; csg %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   lrvgr %r2, %r0
+;   br %r14
 
 function %atomic_rmw_umax_i32(i64, i64, i32) -> i32 {
 block0(v0: i64, v1: i64, v2: i32):
@@ -583,10 +582,11 @@ block0(v0: i64, v1: i64, v2: i32):
   return v3
 }
 
-; check:  l %r0, 0(%r3)
-; nextln: 0: lrvr %r1, %r0 ; clr %r4, %r1 ; jgnh 1f ; lrvr %r1, %r4 ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: lrvr %r2, %r0
-; nextln: br %r14
+; block0:
+;   l %r0, 0(%r3)
+;   0: lrvr %r1, %r0 ; clr %r4, %r1 ; jgnh 1f ; lrvr %r1, %r4 ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   lrvr %r2, %r0
+;   br %r14
 
 function %atomic_rmw_umax_i16(i64, i64, i16) -> i16 {
 block0(v0: i64, v1: i64, v2: i16):
@@ -594,14 +594,15 @@ block0(v0: i64, v1: i64, v2: i16):
   return v3
 }
 
-; check:  sllk %r2, %r3, 3
-; nextln: nill %r3, 65532
-; nextln: sllk %r4, %r4, 16
-; nextln: l %r0, 0(%r3)
-; nextln: 0: rll %r1, %r0, 16(%r2) ; lrvr %r1, %r1 ; clr %r4, %r1 ; jgnh 1f ; risbgn %r1, %r4, 32, 48, 0 ; lrvr %r1, %r1 ; rll %r1, %r1, 16(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: rll %r2, %r0, 0(%r2)
-; nextln: lrvr %r2, %r2
-; nextln: br %r14
+; block0:
+;   sllk %r5, %r3, 3
+;   nill %r3, 65532
+;   sllk %r2, %r4, 16
+;   l %r0, 0(%r3)
+;   0: rll %r1, %r0, 16(%r5) ; lrvr %r1, %r1 ; clr %r2, %r1 ; jgnh 1f ; risbgn %r1, %r2, 32, 48, 0 ; lrvr %r1, %r1 ; rll %r1, %r1, 16(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   rll %r2, %r0, 0(%r5)
+;   lrvr %r2, %r2
+;   br %r14
 
 function %atomic_rmw_umax_i8(i64, i64, i8) -> i8 {
 block0(v0: i64, v1: i64, v2: i8):
@@ -609,12 +610,13 @@ block0(v0: i64, v1: i64, v2: i8):
   return v3
 }
 
-; check:  sllk %r2, %r3, 3
-; nextln: nill %r3, 65532
-; nextln: sllk %r4, %r4, 24
-; nextln: lcr %r5, %r2
-; nextln: l %r0, 0(%r3)
-; nextln: 0: rll %r1, %r0, 0(%r2) ; clr %r4, %r1 ; jgnh 1f ; risbgn %r1, %r4, 32, 40, 0 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: rll %r2, %r0, 8(%r2)
-; nextln: br %r14
+; block0:
+;   sllk %r5, %r3, 3
+;   nill %r3, 65532
+;   sllk %r2, %r4, 24
+;   lcr %r4, %r5
+;   l %r0, 0(%r3)
+;   0: rll %r1, %r0, 0(%r5) ; clr %r2, %r1 ; jgnh 1f ; risbgn %r1, %r2, 32, 40, 0 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   rll %r2, %r0, 8(%r5)
+;   br %r14
 
diff --git a/cranelift/filetests/filetests/isa/s390x/atomic_rmw.clif b/cranelift/filetests/filetests/isa/s390x/atomic_rmw.clif
index 13b3adb1cd..648845f374 100644
--- a/cranelift/filetests/filetests/isa/s390x/atomic_rmw.clif
+++ b/cranelift/filetests/filetests/isa/s390x/atomic_rmw.clif
@@ -1,4 +1,4 @@
-test compile
+test compile precise-output
 target s390x
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -11,10 +11,11 @@ block0(v0: i64, v1: i64, v2: i64):
   return v3
 }
 
-; check:  lg %r0, 0(%r3)
-; nextln: 0: csg %r0, %r4, 0(%r3) ; jglh 0b ; 1:
-; nextln: lgr %r2, %r0
-; nextln: br %r14
+; block0:
+;   lg %r0, 0(%r3)
+;   0: csg %r0, %r4, 0(%r3) ; jglh 0b ; 1:
+;   lgr %r2, %r0
+;   br %r14
 
 function %atomic_rmw_xchg_i32(i64, i64, i32) -> i32 {
 block0(v0: i64, v1: i64, v2: i32):
@@ -22,10 +23,11 @@ block0(v0: i64, v1: i64, v2: i32):
   return v3
 }
 
-; check:  l %r0, 0(%r3)
-; nextln: 0: cs %r0, %r4, 0(%r3) ; jglh 0b ; 1:
-; nextln: lr %r2, %r0
-; nextln: br %r14
+; block0:
+;   l %r0, 0(%r3)
+;   0: cs %r0, %r4, 0(%r3) ; jglh 0b ; 1:
+;   lgr %r2, %r0
+;   br %r14
 
 function %atomic_rmw_xchg_i16(i64, i64, i16) -> i16 {
 block0(v0: i64, v1: i64, v2: i16):
@@ -33,12 +35,13 @@ block0(v0: i64, v1: i64, v2: i16):
   return v3
 }
 
-; check:  sllk %r2, %r3, 3
-; nextln: nill %r3, 65532
-; nextln: l %r0, 0(%r3)
-; nextln: 0: rll %r1, %r0, 0(%r2) ; risbgn %r1, %r4, 32, 48, 16 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: rll %r2, %r0, 16(%r2)
-; nextln: br %r14
+; block0:
+;   sllk %r5, %r3, 3
+;   nill %r3, 65532
+;   l %r0, 0(%r3)
+;   0: rll %r1, %r0, 0(%r5) ; risbgn %r1, %r4, 32, 48, 16 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   rll %r2, %r0, 16(%r5)
+;   br %r14
 
 function %atomic_rmw_xchg_i8(i64, i64, i8) -> i8 {
 block0(v0: i64, v1: i64, v2: i8):
@@ -46,17 +49,14 @@ block0(v0: i64, v1: i64, v2: i8):
   return v3
 }
 
-; check:  sllk %r2, %r3, 3
-; nextln: nill %r3, 65532
-; nextln: lcr %r5, %r2
-; nextln: l %r0, 0(%r3)
-; nextln: 0: rll %r1, %r0, 0(%r2) ; risbgn %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: rll %r2, %r0, 8(%r2)
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; ATOMIC_RMW (ADD)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   sllk %r5, %r3, 3
+;   nill %r3, 65532
+;   lcr %r2, %r5
+;   l %r0, 0(%r3)
+;   0: rll %r1, %r0, 0(%r5) ; risbgn %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   rll %r2, %r0, 8(%r5)
+;   br %r14
 
 function %atomic_rmw_add_i64(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -64,8 +64,9 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  laag %r2, %r3, 0(%r2)
-; nextln: br %r14
+; block0:
+;   laag %r2, %r3, 0(%r2)
+;   br %r14
 
 function %atomic_rmw_add_i32(i64, i32) -> i32 {
 block0(v0: i64, v1: i32):
@@ -73,8 +74,9 @@ block0(v0: i64, v1: i32):
   return v2
 }
 
-; check:  laa %r2, %r3, 0(%r2)
-; nextln: br %r14
+; block0:
+;   laa %r2, %r3, 0(%r2)
+;   br %r14
 
 function %atomic_rmw_add_i16(i64, i64, i16) -> i16 {
 block0(v0: i64, v1: i64, v2: i16):
@@ -82,13 +84,14 @@ block0(v0: i64, v1: i64, v2: i16):
   return v3
 }
 
-; check:  sllk %r2, %r3, 3
-; nextln: nill %r3, 65532
-; nextln: sllk %r4, %r4, 16
-; nextln: l %r0, 0(%r3)
-; nextln: 0: rll %r1, %r0, 0(%r2) ; ar %r1, %r4 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: rll %r2, %r0, 16(%r2)
-; nextln: br %r14
+; block0:
+;   sllk %r5, %r3, 3
+;   nill %r3, 65532
+;   sllk %r2, %r4, 16
+;   l %r0, 0(%r3)
+;   0: rll %r1, %r0, 0(%r5) ; ar %r1, %r2 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   rll %r2, %r0, 16(%r5)
+;   br %r14
 
 function %atomic_rmw_add_i8(i64, i64, i8) -> i8 {
 block0(v0: i64, v1: i64, v2: i8):
@@ -96,18 +99,15 @@ block0(v0: i64, v1: i64, v2: i8):
   return v3
 }
 
-; check:  sllk %r2, %r3, 3
-; nextln: nill %r3, 65532
-; nextln: sllk %r4, %r4, 24
-; nextln: lcr %r5, %r2
-; nextln: l %r0, 0(%r3)
-; nextln: 0: rll %r1, %r0, 0(%r2) ; ar %r1, %r4 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: rll %r2, %r0, 8(%r2)
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; ATOMIC_RMW (SUB)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   sllk %r5, %r3, 3
+;   nill %r3, 65532
+;   sllk %r2, %r4, 24
+;   lcr %r4, %r5
+;   l %r0, 0(%r3)
+;   0: rll %r1, %r0, 0(%r5) ; ar %r1, %r2 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   rll %r2, %r0, 8(%r5)
+;   br %r14
 
 function %atomic_rmw_sub_i64(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -115,9 +115,10 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  lcgr %r3, %r3
-; nextln: laag %r2, %r3, 0(%r2)
-; nextln: br %r14
+; block0:
+;   lcgr %r3, %r3
+;   laag %r2, %r3, 0(%r2)
+;   br %r14
 
 function %atomic_rmw_sub_i32(i64, i32) -> i32 {
 block0(v0: i64, v1: i32):
@@ -125,9 +126,10 @@ block0(v0: i64, v1: i32):
   return v2
 }
 
-; check:  lcr %r3, %r3
-; nextln: laa %r2, %r3, 0(%r2)
-; nextln: br %r14
+; block0:
+;   lcr %r3, %r3
+;   laa %r2, %r3, 0(%r2)
+;   br %r14
 
 function %atomic_rmw_sub_i16(i64, i64, i16) -> i16 {
 block0(v0: i64, v1: i64, v2: i16):
@@ -135,13 +137,14 @@ block0(v0: i64, v1: i64, v2: i16):
   return v3
 }
 
-; check:  sllk %r2, %r3, 3
-; nextln: nill %r3, 65532
-; nextln: sllk %r4, %r4, 16
-; nextln: l %r0, 0(%r3)
-; nextln: 0: rll %r1, %r0, 0(%r2) ; sr %r1, %r4 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: rll %r2, %r0, 16(%r2)
-; nextln: br %r14
+; block0:
+;   sllk %r5, %r3, 3
+;   nill %r3, 65532
+;   sllk %r2, %r4, 16
+;   l %r0, 0(%r3)
+;   0: rll %r1, %r0, 0(%r5) ; sr %r1, %r2 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   rll %r2, %r0, 16(%r5)
+;   br %r14
 
 function %atomic_rmw_sub_i8(i64, i64, i8) -> i8 {
 block0(v0: i64, v1: i64, v2: i8):
@@ -149,18 +152,15 @@ block0(v0: i64, v1: i64, v2: i8):
   return v3
 }
 
-; check:  sllk %r2, %r3, 3
-; nextln: nill %r3, 65532
-; nextln: sllk %r4, %r4, 24
-; nextln: lcr %r5, %r2
-; nextln: l %r0, 0(%r3)
-; nextln: 0: rll %r1, %r0, 0(%r2) ; sr %r1, %r4 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: rll %r2, %r0, 8(%r2)
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; ATOMIC_RMW (AND)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   sllk %r5, %r3, 3
+;   nill %r3, 65532
+;   sllk %r2, %r4, 24
+;   lcr %r4, %r5
+;   l %r0, 0(%r3)
+;   0: rll %r1, %r0, 0(%r5) ; sr %r1, %r2 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   rll %r2, %r0, 8(%r5)
+;   br %r14
 
 function %atomic_rmw_and_i64(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -168,8 +168,9 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  lang %r2, %r3, 0(%r2)
-; nextln: br %r14
+; block0:
+;   lang %r2, %r3, 0(%r2)
+;   br %r14
 
 function %atomic_rmw_and_i32(i64, i32) -> i32 {
 block0(v0: i64, v1: i32):
@@ -177,8 +178,9 @@ block0(v0: i64, v1: i32):
   return v2
 }
 
-; check:  lan %r2, %r3, 0(%r2)
-; nextln: br %r14
+; block0:
+;   lan %r2, %r3, 0(%r2)
+;   br %r14
 
 function %atomic_rmw_and_i16(i64, i64, i16) -> i16 {
 block0(v0: i64, v1: i64, v2: i16):
@@ -186,12 +188,13 @@ block0(v0: i64, v1: i64, v2: i16):
   return v3
 }
 
-; check:  sllk %r2, %r3, 3
-; nextln: nill %r3, 65532
-; nextln: l %r0, 0(%r3)
-; nextln: 0: rll %r1, %r0, 0(%r2) ; rnsbg %r1, %r4, 32, 48, 16 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: rll %r2, %r0, 16(%r2)
-; nextln: br %r14
+; block0:
+;   sllk %r5, %r3, 3
+;   nill %r3, 65532
+;   l %r0, 0(%r3)
+;   0: rll %r1, %r0, 0(%r5) ; rnsbg %r1, %r4, 32, 48, 16 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   rll %r2, %r0, 16(%r5)
+;   br %r14
 
 function %atomic_rmw_and_i8(i64, i64, i8) -> i8 {
 block0(v0: i64, v1: i64, v2: i8):
@@ -199,17 +202,14 @@ block0(v0: i64, v1: i64, v2: i8):
   return v3
 }
 
-; check:  sllk %r2, %r3, 3
-; nextln: nill %r3, 65532
-; nextln: lcr %r5, %r2
-; nextln: l %r0, 0(%r3)
-; nextln: 0: rll %r1, %r0, 0(%r2) ; rnsbg %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: rll %r2, %r0, 8(%r2)
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; ATOMIC_RMW (OR)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   sllk %r5, %r3, 3
+;   nill %r3, 65532
+;   lcr %r2, %r5
+;   l %r0, 0(%r3)
+;   0: rll %r1, %r0, 0(%r5) ; rnsbg %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   rll %r2, %r0, 8(%r5)
+;   br %r14
 
 function %atomic_rmw_or_i64(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -217,8 +217,9 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  laog %r2, %r3, 0(%r2)
-; nextln: br %r14
+; block0:
+;   laog %r2, %r3, 0(%r2)
+;   br %r14
 
 function %atomic_rmw_or_i32(i64, i32) -> i32 {
 block0(v0: i64, v1: i32):
@@ -226,8 +227,9 @@ block0(v0: i64, v1: i32):
   return v2
 }
 
-; check:  lao %r2, %r3, 0(%r2)
-; nextln: br %r14
+; block0:
+;   lao %r2, %r3, 0(%r2)
+;   br %r14
 
 function %atomic_rmw_or_i16(i64, i64, i16) -> i16 {
 block0(v0: i64, v1: i64, v2: i16):
@@ -235,12 +237,13 @@ block0(v0: i64, v1: i64, v2: i16):
   return v3
 }
 
-; check:  sllk %r2, %r3, 3
-; nextln: nill %r3, 65532
-; nextln: l %r0, 0(%r3)
-; nextln: 0: rll %r1, %r0, 0(%r2) ; rosbg %r1, %r4, 32, 48, 16 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: rll %r2, %r0, 16(%r2)
-; nextln: br %r14
+; block0:
+;   sllk %r5, %r3, 3
+;   nill %r3, 65532
+;   l %r0, 0(%r3)
+;   0: rll %r1, %r0, 0(%r5) ; rosbg %r1, %r4, 32, 48, 16 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   rll %r2, %r0, 16(%r5)
+;   br %r14
 
 function %atomic_rmw_or_i8(i64, i64, i8) -> i8 {
 block0(v0: i64, v1: i64, v2: i8):
@@ -248,17 +251,14 @@ block0(v0: i64, v1: i64, v2: i8):
   return v3
 }
 
-; check:  sllk %r2, %r3, 3
-; nextln: nill %r3, 65532
-; nextln: lcr %r5, %r2
-; nextln: l %r0, 0(%r3)
-; nextln: 0: rll %r1, %r0, 0(%r2) ; rosbg %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: rll %r2, %r0, 8(%r2)
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; ATOMIC_RMW (XOR)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   sllk %r5, %r3, 3
+;   nill %r3, 65532
+;   lcr %r2, %r5
+;   l %r0, 0(%r3)
+;   0: rll %r1, %r0, 0(%r5) ; rosbg %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   rll %r2, %r0, 8(%r5)
+;   br %r14
 
 function %atomic_rmw_xor_i64(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -266,8 +266,9 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  laxg %r2, %r3, 0(%r2)
-; nextln: br %r14
+; block0:
+;   laxg %r2, %r3, 0(%r2)
+;   br %r14
 
 function %atomic_rmw_xor_i32(i64, i32) -> i32 {
 block0(v0: i64, v1: i32):
@@ -275,8 +276,9 @@ block0(v0: i64, v1: i32):
   return v2
 }
 
-; check:  lax %r2, %r3, 0(%r2)
-; nextln: br %r14
+; block0:
+;   lax %r2, %r3, 0(%r2)
+;   br %r14
 
 function %atomic_rmw_xor_i16(i64, i64, i16) -> i16 {
 block0(v0: i64, v1: i64, v2: i16):
@@ -284,12 +286,13 @@ block0(v0: i64, v1: i64, v2: i16):
   return v3
 }
 
-; check:  sllk %r2, %r3, 3
-; nextln: nill %r3, 65532
-; nextln: l %r0, 0(%r3)
-; nextln: 0: rll %r1, %r0, 0(%r2) ; rxsbg %r1, %r4, 32, 48, 16 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: rll %r2, %r0, 16(%r2)
-; nextln: br %r14
+; block0:
+;   sllk %r5, %r3, 3
+;   nill %r3, 65532
+;   l %r0, 0(%r3)
+;   0: rll %r1, %r0, 0(%r5) ; rxsbg %r1, %r4, 32, 48, 16 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   rll %r2, %r0, 16(%r5)
+;   br %r14
 
 function %atomic_rmw_xor_i8(i64, i64, i8) -> i8 {
 block0(v0: i64, v1: i64, v2: i8):
@@ -297,17 +300,14 @@ block0(v0: i64, v1: i64, v2: i8):
   return v3
 }
 
-; check:  sllk %r2, %r3, 3
-; nextln: nill %r3, 65532
-; nextln: lcr %r5, %r2
-; nextln: l %r0, 0(%r3)
-; nextln: 0: rll %r1, %r0, 0(%r2) ; rxsbg %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: rll %r2, %r0, 8(%r2)
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; ATOMIC_RMW (NAND)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   sllk %r5, %r3, 3
+;   nill %r3, 65532
+;   lcr %r2, %r5
+;   l %r0, 0(%r3)
+;   0: rll %r1, %r0, 0(%r5) ; rxsbg %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   rll %r2, %r0, 8(%r5)
+;   br %r14
 
 function %atomic_rmw_nand_i64(i64, i64, i64) -> i64 {
 block0(v0: i64, v1: i64, v2: i64):
@@ -315,10 +315,11 @@ block0(v0: i64, v1: i64, v2: i64):
   return v3
 }
 
-; check:  lg %r0, 0(%r3)
-; nextln: 0: ngrk %r1, %r0, %r4 ; xilf %r1, 4294967295 ; xihf %r1, 4294967295 ; csg %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: lgr %r2, %r0
-; nextln: br %r14
+; block0:
+;   lg %r0, 0(%r3)
+;   0: ngrk %r1, %r0, %r4 ; xilf %r1, 4294967295 ; xihf %r1, 4294967295 ; csg %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   lgr %r2, %r0
+;   br %r14
 
 function %atomic_rmw_nand_i32(i64, i64, i32) -> i32 {
 block0(v0: i64, v1: i64, v2: i32):
@@ -326,10 +327,11 @@ block0(v0: i64, v1: i64, v2: i32):
   return v3
 }
 
-; check:  l %r0, 0(%r3)
-; nextln: 0: nrk %r1, %r0, %r4 ; xilf %r1, 4294967295 ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: lr %r2, %r0
-; nextln: br %r14
+; block0:
+;   l %r0, 0(%r3)
+;   0: nrk %r1, %r0, %r4 ; xilf %r1, 4294967295 ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   lgr %r2, %r0
+;   br %r14
 
 function %atomic_rmw_nand_i16(i64, i64, i16) -> i16 {
 block0(v0: i64, v1: i64, v2: i16):
@@ -337,12 +339,13 @@ block0(v0: i64, v1: i64, v2: i16):
   return v3
 }
 
-; check:  sllk %r2, %r3, 3
-; nextln: nill %r3, 65532
-; nextln: l %r0, 0(%r3)
-; nextln: 0: rll %r1, %r0, 0(%r2) ; rnsbg %r1, %r4, 32, 48, 16 ; xilf %r1, 4294901760 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: rll %r2, %r0, 16(%r2)
-; nextln: br %r14
+; block0:
+;   sllk %r5, %r3, 3
+;   nill %r3, 65532
+;   l %r0, 0(%r3)
+;   0: rll %r1, %r0, 0(%r5) ; rnsbg %r1, %r4, 32, 48, 16 ; xilf %r1, 4294901760 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   rll %r2, %r0, 16(%r5)
+;   br %r14
 
 function %atomic_rmw_nand_i8(i64, i64, i8) -> i8 {
 block0(v0: i64, v1: i64, v2: i8):
@@ -350,17 +353,14 @@ block0(v0: i64, v1: i64, v2: i8):
   return v3
 }
 
-; check:  sllk %r2, %r3, 3
-; nextln: nill %r3, 65532
-; nextln: lcr %r5, %r2
-; nextln: l %r0, 0(%r3)
-; nextln: 0: rll %r1, %r0, 0(%r2) ; rnsbg %r1, %r4, 32, 40, 24 ; xilf %r1, 4278190080 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: rll %r2, %r0, 8(%r2)
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; ATOMIC_RMW (SMIN)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   sllk %r5, %r3, 3
+;   nill %r3, 65532
+;   lcr %r2, %r5
+;   l %r0, 0(%r3)
+;   0: rll %r1, %r0, 0(%r5) ; rnsbg %r1, %r4, 32, 40, 24 ; xilf %r1, 4278190080 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   rll %r2, %r0, 8(%r5)
+;   br %r14
 
 function %atomic_rmw_smin_i64(i64, i64, i64) -> i64 {
 block0(v0: i64, v1: i64, v2: i64):
@@ -368,10 +368,11 @@ block0(v0: i64, v1: i64, v2: i64):
   return v3
 }
 
-; check:  lg %r0, 0(%r3)
-; nextln: 0: cgr %r4, %r0 ; jgnl 1f ; csg %r0, %r4, 0(%r3) ; jglh 0b ; 1:
-; nextln: lgr %r2, %r0
-; nextln: br %r14
+; block0:
+;   lg %r0, 0(%r3)
+;   0: cgr %r4, %r0 ; jgnl 1f ; csg %r0, %r4, 0(%r3) ; jglh 0b ; 1:
+;   lgr %r2, %r0
+;   br %r14
 
 function %atomic_rmw_smin_i32(i64, i64, i32) -> i32 {
 block0(v0: i64, v1: i64, v2: i32):
@@ -379,10 +380,11 @@ block0(v0: i64, v1: i64, v2: i32):
   return v3
 }
 
-; check:  l %r0, 0(%r3)
-; nextln: 0: cr %r4, %r0 ; jgnl 1f ; cs %r0, %r4, 0(%r3) ; jglh 0b ; 1:
-; nextln: lr %r2, %r0
-; nextln: br %r14
+; block0:
+;   l %r0, 0(%r3)
+;   0: cr %r4, %r0 ; jgnl 1f ; cs %r0, %r4, 0(%r3) ; jglh 0b ; 1:
+;   lgr %r2, %r0
+;   br %r14
 
 function %atomic_rmw_smin_i16(i64, i64, i16) -> i16 {
 block0(v0: i64, v1: i64, v2: i16):
@@ -390,13 +392,14 @@ block0(v0: i64, v1: i64, v2: i16):
   return v3
 }
 
-; check:  sllk %r2, %r3, 3
-; nextln: nill %r3, 65532
-; nextln: sllk %r4, %r4, 16
-; nextln: l %r0, 0(%r3)
-; nextln: 0: rll %r1, %r0, 0(%r2) ; cr %r4, %r1 ; jgnl 1f ; risbgn %r1, %r4, 32, 48, 0 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: rll %r2, %r0, 16(%r2)
-; nextln: br %r14
+; block0:
+;   sllk %r5, %r3, 3
+;   nill %r3, 65532
+;   sllk %r2, %r4, 16
+;   l %r0, 0(%r3)
+;   0: rll %r1, %r0, 0(%r5) ; cr %r2, %r1 ; jgnl 1f ; risbgn %r1, %r2, 32, 48, 0 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   rll %r2, %r0, 16(%r5)
+;   br %r14
 
 function %atomic_rmw_smin_i8(i64, i64, i8) -> i8 {
 block0(v0: i64, v1: i64, v2: i8):
@@ -404,18 +407,15 @@ block0(v0: i64, v1: i64, v2: i8):
   return v3
 }
 
-; check:  sllk %r2, %r3, 3
-; nextln: nill %r3, 65532
-; nextln: sllk %r4, %r4, 24
-; nextln: lcr %r5, %r2
-; nextln: l %r0, 0(%r3)
-; nextln: 0: rll %r1, %r0, 0(%r2) ; cr %r4, %r1 ; jgnl 1f ; risbgn %r1, %r4, 32, 40, 0 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: rll %r2, %r0, 8(%r2)
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; ATOMIC_RMW (SMAX)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   sllk %r5, %r3, 3
+;   nill %r3, 65532
+;   sllk %r2, %r4, 24
+;   lcr %r4, %r5
+;   l %r0, 0(%r3)
+;   0: rll %r1, %r0, 0(%r5) ; cr %r2, %r1 ; jgnl 1f ; risbgn %r1, %r2, 32, 40, 0 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   rll %r2, %r0, 8(%r5)
+;   br %r14
 
 function %atomic_rmw_smax_i64(i64, i64, i64) -> i64 {
 block0(v0: i64, v1: i64, v2: i64):
@@ -423,10 +423,11 @@ block0(v0: i64, v1: i64, v2: i64):
   return v3
 }
 
-; check:  lg %r0, 0(%r3)
-; nextln: 0: cgr %r4, %r0 ; jgnh 1f ; csg %r0, %r4, 0(%r3) ; jglh 0b ; 1:
-; nextln: lgr %r2, %r0
-; nextln: br %r14
+; block0:
+;   lg %r0, 0(%r3)
+;   0: cgr %r4, %r0 ; jgnh 1f ; csg %r0, %r4, 0(%r3) ; jglh 0b ; 1:
+;   lgr %r2, %r0
+;   br %r14
 
 function %atomic_rmw_smax_i32(i64, i64, i32) -> i32 {
 block0(v0: i64, v1: i64, v2: i32):
@@ -434,10 +435,11 @@ block0(v0: i64, v1: i64, v2: i32):
   return v3
 }
 
-; check:  l %r0, 0(%r3)
-; nextln: 0: cr %r4, %r0 ; jgnh 1f ; cs %r0, %r4, 0(%r3) ; jglh 0b ; 1:
-; nextln: lr %r2, %r0
-; nextln: br %r14
+; block0:
+;   l %r0, 0(%r3)
+;   0: cr %r4, %r0 ; jgnh 1f ; cs %r0, %r4, 0(%r3) ; jglh 0b ; 1:
+;   lgr %r2, %r0
+;   br %r14
 
 function %atomic_rmw_smax_i16(i64, i64, i16) -> i16 {
 block0(v0: i64, v1: i64, v2: i16):
@@ -445,13 +447,14 @@ block0(v0: i64, v1: i64, v2: i16):
   return v3
 }
 
-; check:  sllk %r2, %r3, 3
-; nextln: nill %r3, 65532
-; nextln: sllk %r4, %r4, 16
-; nextln: l %r0, 0(%r3)
-; nextln: 0: rll %r1, %r0, 0(%r2) ; cr %r4, %r1 ; jgnh 1f ; risbgn %r1, %r4, 32, 48, 0 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: rll %r2, %r0, 16(%r2)
-; nextln: br %r14
+; block0:
+;   sllk %r5, %r3, 3
+;   nill %r3, 65532
+;   sllk %r2, %r4, 16
+;   l %r0, 0(%r3)
+;   0: rll %r1, %r0, 0(%r5) ; cr %r2, %r1 ; jgnh 1f ; risbgn %r1, %r2, 32, 48, 0 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   rll %r2, %r0, 16(%r5)
+;   br %r14
 
 function %atomic_rmw_smax_i8(i64, i64, i8) -> i8 {
 block0(v0: i64, v1: i64, v2: i8):
@@ -459,18 +462,15 @@ block0(v0: i64, v1: i64, v2: i8):
   return v3
 }
 
-; check:  sllk %r2, %r3, 3
-; nextln: nill %r3, 65532
-; nextln: sllk %r4, %r4, 24
-; nextln: lcr %r5, %r2
-; nextln: l %r0, 0(%r3)
-; nextln: 0: rll %r1, %r0, 0(%r2) ; cr %r4, %r1 ; jgnh 1f ; risbgn %r1, %r4, 32, 40, 0 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: rll %r2, %r0, 8(%r2)
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; ATOMIC_RMW (UMIN)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   sllk %r5, %r3, 3
+;   nill %r3, 65532
+;   sllk %r2, %r4, 24
+;   lcr %r4, %r5
+;   l %r0, 0(%r3)
+;   0: rll %r1, %r0, 0(%r5) ; cr %r2, %r1 ; jgnh 1f ; risbgn %r1, %r2, 32, 40, 0 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   rll %r2, %r0, 8(%r5)
+;   br %r14
 
 function %atomic_rmw_umin_i64(i64, i64, i64) -> i64 {
 block0(v0: i64, v1: i64, v2: i64):
@@ -478,10 +478,11 @@ block0(v0: i64, v1: i64, v2: i64):
   return v3
 }
 
-; check:  lg %r0, 0(%r3)
-; nextln: 0: clgr %r4, %r0 ; jgnl 1f ; csg %r0, %r4, 0(%r3) ; jglh 0b ; 1:
-; nextln: lgr %r2, %r0
-; nextln: br %r14
+; block0:
+;   lg %r0, 0(%r3)
+;   0: clgr %r4, %r0 ; jgnl 1f ; csg %r0, %r4, 0(%r3) ; jglh 0b ; 1:
+;   lgr %r2, %r0
+;   br %r14
 
 function %atomic_rmw_umin_i32(i64, i64, i32) -> i32 {
 block0(v0: i64, v1: i64, v2: i32):
@@ -489,10 +490,11 @@ block0(v0: i64, v1: i64, v2: i32):
   return v3
 }
 
-; check:  l %r0, 0(%r3)
-; nextln: 0: clr %r4, %r0 ; jgnl 1f ; cs %r0, %r4, 0(%r3) ; jglh 0b ; 1:
-; nextln: lr %r2, %r0
-; nextln: br %r14
+; block0:
+;   l %r0, 0(%r3)
+;   0: clr %r4, %r0 ; jgnl 1f ; cs %r0, %r4, 0(%r3) ; jglh 0b ; 1:
+;   lgr %r2, %r0
+;   br %r14
 
 function %atomic_rmw_umin_i16(i64, i64, i16) -> i16 {
 block0(v0: i64, v1: i64, v2: i16):
@@ -500,13 +502,14 @@ block0(v0: i64, v1: i64, v2: i16):
   return v3
 }
 
-; check:  sllk %r2, %r3, 3
-; nextln: nill %r3, 65532
-; nextln: sllk %r4, %r4, 16
-; nextln: l %r0, 0(%r3)
-; nextln: 0: rll %r1, %r0, 0(%r2) ; clr %r4, %r1 ; jgnl 1f ; risbgn %r1, %r4, 32, 48, 0 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: rll %r2, %r0, 16(%r2)
-; nextln: br %r14
+; block0:
+;   sllk %r5, %r3, 3
+;   nill %r3, 65532
+;   sllk %r2, %r4, 16
+;   l %r0, 0(%r3)
+;   0: rll %r1, %r0, 0(%r5) ; clr %r2, %r1 ; jgnl 1f ; risbgn %r1, %r2, 32, 48, 0 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   rll %r2, %r0, 16(%r5)
+;   br %r14
 
 function %atomic_rmw_umin_i8(i64, i64, i8) -> i8 {
 block0(v0: i64, v1: i64, v2: i8):
@@ -514,18 +517,15 @@ block0(v0: i64, v1: i64, v2: i8):
   return v3
 }
 
-; check:  sllk %r2, %r3, 3
-; nextln: nill %r3, 65532
-; nextln: sllk %r4, %r4, 24
-; nextln: lcr %r5, %r2
-; nextln: l %r0, 0(%r3)
-; nextln: 0: rll %r1, %r0, 0(%r2) ; clr %r4, %r1 ; jgnl 1f ; risbgn %r1, %r4, 32, 40, 0 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: rll %r2, %r0, 8(%r2)
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; ATOMIC_RMW (UMAX)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   sllk %r5, %r3, 3
+;   nill %r3, 65532
+;   sllk %r2, %r4, 24
+;   lcr %r4, %r5
+;   l %r0, 0(%r3)
+;   0: rll %r1, %r0, 0(%r5) ; clr %r2, %r1 ; jgnl 1f ; risbgn %r1, %r2, 32, 40, 0 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   rll %r2, %r0, 8(%r5)
+;   br %r14
 
 function %atomic_rmw_umax_i64(i64, i64, i64) -> i64 {
 block0(v0: i64, v1: i64, v2: i64):
@@ -533,10 +533,11 @@ block0(v0: i64, v1: i64, v2: i64):
   return v3
 }
 
-; check:  lg %r0, 0(%r3)
-; nextln: 0: clgr %r4, %r0 ; jgnh 1f ; csg %r0, %r4, 0(%r3) ; jglh 0b ; 1:
-; nextln: lgr %r2, %r0
-; nextln: br %r14
+; block0:
+;   lg %r0, 0(%r3)
+;   0: clgr %r4, %r0 ; jgnh 1f ; csg %r0, %r4, 0(%r3) ; jglh 0b ; 1:
+;   lgr %r2, %r0
+;   br %r14
 
 function %atomic_rmw_umax_i32(i64, i64, i32) -> i32 {
 block0(v0: i64, v1: i64, v2: i32):
@@ -544,10 +545,11 @@ block0(v0: i64, v1: i64, v2: i32):
   return v3
 }
 
-; check:  l %r0, 0(%r3)
-; nextln: 0: clr %r4, %r0 ; jgnh 1f ; cs %r0, %r4, 0(%r3) ; jglh 0b ; 1:
-; nextln: lr %r2, %r0
-; nextln: br %r14
+; block0:
+;   l %r0, 0(%r3)
+;   0: clr %r4, %r0 ; jgnh 1f ; cs %r0, %r4, 0(%r3) ; jglh 0b ; 1:
+;   lgr %r2, %r0
+;   br %r14
 
 function %atomic_rmw_umax_i16(i64, i64, i16) -> i16 {
 block0(v0: i64, v1: i64, v2: i16):
@@ -555,13 +557,14 @@ block0(v0: i64, v1: i64, v2: i16):
   return v3
 }
 
-; check:  sllk %r2, %r3, 3
-; nextln: nill %r3, 65532
-; nextln: sllk %r4, %r4, 16
-; nextln: l %r0, 0(%r3)
-; nextln: 0: rll %r1, %r0, 0(%r2) ; clr %r4, %r1 ; jgnh 1f ; risbgn %r1, %r4, 32, 48, 0 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: rll %r2, %r0, 16(%r2)
-; nextln: br %r14
+; block0:
+;   sllk %r5, %r3, 3
+;   nill %r3, 65532
+;   sllk %r2, %r4, 16
+;   l %r0, 0(%r3)
+;   0: rll %r1, %r0, 0(%r5) ; clr %r2, %r1 ; jgnh 1f ; risbgn %r1, %r2, 32, 48, 0 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   rll %r2, %r0, 16(%r5)
+;   br %r14
 
 function %atomic_rmw_umax_i8(i64, i64, i8) -> i8 {
 block0(v0: i64, v1: i64, v2: i8):
@@ -569,12 +572,13 @@ block0(v0: i64, v1: i64, v2: i8):
   return v3
 }
 
-; check:  sllk %r2, %r3, 3
-; nextln: nill %r3, 65532
-; nextln: sllk %r4, %r4, 24
-; nextln: lcr %r5, %r2
-; nextln: l %r0, 0(%r3)
-; nextln: 0: rll %r1, %r0, 0(%r2) ; clr %r4, %r1 ; jgnh 1f ; risbgn %r1, %r4, 32, 40, 0 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
-; nextln: rll %r2, %r0, 8(%r2)
-; nextln: br %r14
+; block0:
+;   sllk %r5, %r3, 3
+;   nill %r3, 65532
+;   sllk %r2, %r4, 24
+;   lcr %r4, %r5
+;   l %r0, 0(%r3)
+;   0: rll %r1, %r0, 0(%r5) ; clr %r2, %r1 ; jgnh 1f ; risbgn %r1, %r2, 32, 40, 0 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1:
+;   rll %r2, %r0, 8(%r5)
+;   br %r14
 
diff --git a/cranelift/filetests/filetests/isa/s390x/atomic_store-little.clif b/cranelift/filetests/filetests/isa/s390x/atomic_store-little.clif
index 176a988f68..1f83d1e81e 100644
--- a/cranelift/filetests/filetests/isa/s390x/atomic_store-little.clif
+++ b/cranelift/filetests/filetests/isa/s390x/atomic_store-little.clif
@@ -1,4 +1,4 @@
-test compile
+test compile precise-output
 target s390x
 
 function %atomic_store_i64(i64, i64) {
@@ -7,9 +7,10 @@ block0(v0: i64, v1: i64):
   return
 }
 
-; check:  strvg %r2, 0(%r3)
-; nextln: bcr 14, 0
-; nextln: br %r14
+; block0:
+;   strvg %r2, 0(%r3)
+;   bcr 14, 0
+;   br %r14
 
 function %atomic_store_i64_sym(i64) {
   gv0 = symbol colocated %sym
@@ -19,9 +20,10 @@ block0(v0: i64):
   return
 }
 
-; check:  larl %r1, %sym + 0 ; strvg %r2, 0(%r1)
-; nextln: bcr 14, 0
-; nextln: br %r14
+; block0:
+;   larl %r1, %sym + 0 ; strvg %r2, 0(%r1)
+;   bcr 14, 0
+;   br %r14
 
 function %atomic_store_imm_i64(i64) {
 block0(v0: i64):
@@ -30,10 +32,11 @@ block0(v0: i64):
   return
 }
 
-; check:  lghi %r3, 12345
-; nextln: strvg %r3, 0(%r2)
-; nextln: bcr 14, 0
-; nextln: br %r14
+; block0:
+;   lghi %r4, 12345
+;   strvg %r4, 0(%r2)
+;   bcr 14, 0
+;   br %r14
 
 function %atomic_store_i32(i32, i64) {
 block0(v0: i32, v1: i64):
@@ -41,9 +44,10 @@ block0(v0: i32, v1: i64):
   return
 }
 
-; check:  strv %r2, 0(%r3)
-; nextln: bcr 14, 0
-; nextln: br %r14
+; block0:
+;   strv %r2, 0(%r3)
+;   bcr 14, 0
+;   br %r14
 
 function %atomic_store_i32_sym(i32) {
   gv0 = symbol colocated %sym
@@ -53,9 +57,10 @@ block0(v0: i32):
   return
 }
 
-; check:  larl %r1, %sym + 0 ; strv %r2, 0(%r1)
-; nextln: bcr 14, 0
-; nextln: br %r14
+; block0:
+;   larl %r1, %sym + 0 ; strv %r2, 0(%r1)
+;   bcr 14, 0
+;   br %r14
 
 function %atomic_store_imm_i32(i64) {
 block0(v0: i64):
@@ -64,10 +69,11 @@ block0(v0: i64):
   return
 }
 
-; check:  lhi %r3, 12345
-; nextln: strv %r3, 0(%r2)
-; nextln: bcr 14, 0
-; nextln: br %r14
+; block0:
+;   lhi %r4, 12345
+;   strv %r4, 0(%r2)
+;   bcr 14, 0
+;   br %r14
 
 function %atomic_store_i16(i16, i64) {
 block0(v0: i16, v1: i64):
@@ -75,9 +81,10 @@ block0(v0: i16, v1: i64):
   return
 }
 
-; check:  strvh %r2, 0(%r3)
-; nextln: bcr 14, 0
-; nextln: br %r14
+; block0:
+;   strvh %r2, 0(%r3)
+;   bcr 14, 0
+;   br %r14
 
 function %atomic_store_i16_sym(i16) {
   gv0 = symbol colocated %sym
@@ -87,9 +94,10 @@ block0(v0: i16):
   return
 }
 
-; check:  larl %r1, %sym + 0 ; strvh %r2, 0(%r1)
-; nextln: bcr 14, 0
-; nextln: br %r14
+; block0:
+;   larl %r1, %sym + 0 ; strvh %r2, 0(%r1)
+;   bcr 14, 0
+;   br %r14
 
 function %atomic_store_imm_i16(i64) {
 block0(v0: i64):
@@ -98,9 +106,10 @@ block0(v0: i64):
   return
 }
 
-; check:  mvhhi 0(%r2), 14640
-; nextln: bcr 14, 0
-; nextln: br %r14
+; block0:
+;   mvhhi 0(%r2), 14640
+;   bcr 14, 0
+;   br %r14
 
 function %atomic_store_i8(i8, i64) {
 block0(v0: i8, v1: i64):
@@ -108,9 +117,10 @@ block0(v0: i8, v1: i64):
   return
 }
 
-; check:  stc %r2, 0(%r3)
-; nextln: bcr 14, 0
-; nextln: br %r14
+; block0:
+;   stc %r2, 0(%r3)
+;   bcr 14, 0
+;   br %r14
 
 function %atomic_store_imm_i8(i64) {
 block0(v0: i64):
@@ -119,7 +129,8 @@ block0(v0: i64):
   return
 }
 
-; check:  mvi 0(%r2), 123
-; nextln: bcr 14, 0
-; nextln: br %r14
+; block0:
+;   mvi 0(%r2), 123
+;   bcr 14, 0
+;   br %r14
 
diff --git a/cranelift/filetests/filetests/isa/s390x/atomic_store.clif b/cranelift/filetests/filetests/isa/s390x/atomic_store.clif
index c7cc4c1dab..f536779be3 100644
--- a/cranelift/filetests/filetests/isa/s390x/atomic_store.clif
+++ b/cranelift/filetests/filetests/isa/s390x/atomic_store.clif
@@ -1,4 +1,4 @@
-test compile
+test compile precise-output
 target s390x
 
 function %atomic_store_i64(i64, i64) {
@@ -7,9 +7,10 @@ block0(v0: i64, v1: i64):
   return
 }
 
-; check:  stg %r2, 0(%r3)
-; nextln: bcr 14, 0
-; nextln: br %r14
+; block0:
+;   stg %r2, 0(%r3)
+;   bcr 14, 0
+;   br %r14
 
 function %atomic_store_i64_sym(i64) {
   gv0 = symbol colocated %sym
@@ -19,9 +20,10 @@ block0(v0: i64):
   return
 }
 
-; check:  stgrl %r2, %sym + 0
-; nextln: bcr 14, 0
-; nextln: br %r14
+; block0:
+;   stgrl %r2, %sym + 0
+;   bcr 14, 0
+;   br %r14
 
 function %atomic_store_imm_i64(i64) {
 block0(v0: i64):
@@ -30,9 +32,10 @@ block0(v0: i64):
   return
 }
 
-; check:  mvghi 0(%r2), 12345
-; nextln: bcr 14, 0
-; nextln: br %r14
+; block0:
+;   mvghi 0(%r2), 12345
+;   bcr 14, 0
+;   br %r14
 
 function %atomic_store_i32(i32, i64) {
 block0(v0: i32, v1: i64):
@@ -40,9 +43,10 @@ block0(v0: i32, v1: i64):
   return
 }
 
-; check:  st %r2, 0(%r3)
-; nextln: bcr 14, 0
-; nextln: br %r14
+; block0:
+;   st %r2, 0(%r3)
+;   bcr 14, 0
+;   br %r14
 
 function %atomic_store_i32_sym(i32) {
   gv0 = symbol colocated %sym
@@ -52,9 +56,10 @@ block0(v0: i32):
   return
 }
 
-; check:  strl %r2, %sym + 0
-; nextln: bcr 14, 0
-; nextln: br %r14
+; block0:
+;   strl %r2, %sym + 0
+;   bcr 14, 0
+;   br %r14
 
 function %atomic_store_imm_i32(i64) {
 block0(v0: i64):
@@ -63,9 +68,10 @@ block0(v0: i64):
   return
 }
 
-; check:  mvhi 0(%r2), 12345
-; nextln: bcr 14, 0
-; nextln: br %r14
+; block0:
+;   mvhi 0(%r2), 12345
+;   bcr 14, 0
+;   br %r14
 
 function %atomic_store_i16(i16, i64) {
 block0(v0: i16, v1: i64):
@@ -73,9 +79,10 @@ block0(v0: i16, v1: i64):
   return
 }
 
-; check:  sth %r2, 0(%r3)
-; nextln: bcr 14, 0
-; nextln: br %r14
+; block0:
+;   sth %r2, 0(%r3)
+;   bcr 14, 0
+;   br %r14
 
 function %atomic_store_i16_sym(i16) {
   gv0 = symbol colocated %sym
@@ -85,9 +92,10 @@ block0(v0: i16):
   return
 }
 
-; check:  sthrl %r2, %sym + 0
-; nextln: bcr 14, 0
-; nextln: br %r14
+; block0:
+;   sthrl %r2, %sym + 0
+;   bcr 14, 0
+;   br %r14
 
 function %atomic_store_imm_i16(i64) {
 block0(v0: i64):
@@ -96,9 +104,10 @@ block0(v0: i64):
   return
 }
 
-; check:  mvhhi 0(%r2), 12345
-; nextln: bcr 14, 0
-; nextln: br %r14
+; block0:
+;   mvhhi 0(%r2), 12345
+;   bcr 14, 0
+;   br %r14
 
 function %atomic_store_i8(i8, i64) {
 block0(v0: i8, v1: i64):
@@ -106,9 +115,10 @@ block0(v0: i8, v1: i64):
   return
 }
 
-; check:  stc %r2, 0(%r3)
-; nextln: bcr 14, 0
-; nextln: br %r14
+; block0:
+;   stc %r2, 0(%r3)
+;   bcr 14, 0
+;   br %r14
 
 function %atomic_store_imm_i8(i64) {
 block0(v0: i64):
@@ -117,7 +127,8 @@ block0(v0: i64):
   return
 }
 
-; check:  mvi 0(%r2), 123
-; nextln: bcr 14, 0
-; nextln: br %r14
+; block0:
+;   mvi 0(%r2), 123
+;   bcr 14, 0
+;   br %r14
 
diff --git a/cranelift/filetests/filetests/isa/s390x/bitops-arch13.clif b/cranelift/filetests/filetests/isa/s390x/bitops-arch13.clif
index 4350ab053d..e06b019347 100644
--- a/cranelift/filetests/filetests/isa/s390x/bitops-arch13.clif
+++ b/cranelift/filetests/filetests/isa/s390x/bitops-arch13.clif
@@ -1,4 +1,4 @@
-test compile
+test compile precise-output
 target s390x arch13
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -11,8 +11,9 @@ block0(v0: i64):
     return v1
 }
 
-; check:  popcnt %r2, %r2, 8
-; nextln: br %r14
+; block0:
+;   popcnt %r2, %r2, 8
+;   br %r14
 
 function %popcnt_i32(i32) -> i32 {
 block0(v0: i32):
@@ -20,9 +21,10 @@ block0(v0: i32):
     return v1
 }
 
-; check:  llgfr %r2, %r2
-; nextln: popcnt %r2, %r2, 8
-; nextln: br %r14
+; block0:
+;   llgfr %r5, %r2
+;   popcnt %r2, %r5, 8
+;   br %r14
 
 function %popcnt_i16(i16) -> i16 {
 block0(v0: i16):
@@ -30,9 +32,10 @@ block0(v0: i16):
     return v1
 }
 
-; check:  llghr %r2, %r2
-; nextln: popcnt %r2, %r2, 8
-; nextln: br %r14
+; block0:
+;   llghr %r5, %r2
+;   popcnt %r2, %r5, 8
+;   br %r14
 
 function %popcnt_i8(i8) -> i8 {
 block0(v0: i8):
@@ -40,5 +43,7 @@ block0(v0: i8):
     return v1
 }
 
-; check: popcnt %r2, %r2
-; nextln: br %r14
+; block0:
+;   popcnt %r2, %r2
+;   br %r14
+
diff --git a/cranelift/filetests/filetests/isa/s390x/bitops.clif b/cranelift/filetests/filetests/isa/s390x/bitops.clif
index 8939a946af..444b5242e1 100644
--- a/cranelift/filetests/filetests/isa/s390x/bitops.clif
+++ b/cranelift/filetests/filetests/isa/s390x/bitops.clif
@@ -1,4 +1,4 @@
-test compile
+test compile precise-output
 target s390x
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -42,9 +42,10 @@ block0(v0: i64):
     return v1
 }
 
-; check:  flogr %r0, %r2
-; nextln: lgr %r2, %r0
-; nextln: br %r14
+; block0:
+;   flogr %r0, %r2
+;   lgr %r2, %r0
+;   br %r14
 
 function %clz_i32(i32) -> i32 {
 block0(v0: i32):
@@ -52,10 +53,11 @@ block0(v0: i32):
     return v1
 }
 
-; check:  llgfr %r2, %r2
-; nextln: flogr %r0, %r2
-; nextln: ahik %r2, %r0, -32
-; nextln: br %r14
+; block0:
+;   llgfr %r5, %r2
+;   flogr %r0, %r5
+;   ahik %r2, %r0, -32
+;   br %r14
 
 function %clz_i16(i16) -> i16 {
 block0(v0: i16):
@@ -63,10 +65,11 @@ block0(v0: i16):
     return v1
 }
 
-; check:  llghr %r2, %r2
-; nextln: flogr %r0, %r2
-; nextln: ahik %r2, %r0, -48
-; nextln: br %r14
+; block0:
+;   llghr %r5, %r2
+;   flogr %r0, %r5
+;   ahik %r2, %r0, -48
+;   br %r14
 
 function %clz_i8(i8) -> i8 {
 block0(v0: i8):
@@ -74,14 +77,11 @@ block0(v0: i8):
     return v1
 }
 
-; check:  llgcr %r2, %r2
-; nextln: flogr %r0, %r2
-; nextln: ahik %r2, %r0, -56
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; CLS
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   llgcr %r5, %r2
+;   flogr %r0, %r5
+;   ahik %r2, %r0, -56
+;   br %r14
 
 function %cls_i64(i64) -> i64 {
 block0(v0: i64):
@@ -89,11 +89,12 @@ block0(v0: i64):
     return v1
 }
 
-; check:  srag %r3, %r2, 63
-; nextln: xgr %r2, %r3
-; nextln: flogr %r0, %r2
-; nextln: lgr %r2, %r0
-; nextln: br %r14
+; block0:
+;   srag %r5, %r2, 63
+;   xgrk %r3, %r2, %r5
+;   flogr %r0, %r3
+;   lgr %r2, %r0
+;   br %r14
 
 function %cls_i32(i32) -> i32 {
 block0(v0: i32):
@@ -101,12 +102,13 @@ block0(v0: i32):
     return v1
 }
 
-; check:  lgfr %r2, %r2
-; nextln: srag %r3, %r2, 63
-; nextln: xgr %r2, %r3
-; nextln: flogr %r0, %r2
-; nextln: ahik %r2, %r0, -32
-; nextln: br %r14
+; block0:
+;   lgfr %r5, %r2
+;   srag %r3, %r5, 63
+;   xgr %r5, %r3
+;   flogr %r0, %r5
+;   ahik %r2, %r0, -32
+;   br %r14
 
 function %cls_i16(i16) -> i16 {
 block0(v0: i16):
@@ -114,12 +116,13 @@ block0(v0: i16):
     return v1
 }
 
-; check:  lghr %r2, %r2
-; nextln: srag %r3, %r2, 63
-; nextln: xgr %r2, %r3
-; nextln: flogr %r0, %r2
-; nextln: ahik %r2, %r0, -48
-; nextln: br %r14
+; block0:
+;   lghr %r5, %r2
+;   srag %r3, %r5, 63
+;   xgr %r5, %r3
+;   flogr %r0, %r5
+;   ahik %r2, %r0, -48
+;   br %r14
 
 function %cls_i8(i8) -> i8 {
 block0(v0: i8):
@@ -127,16 +130,13 @@ block0(v0: i8):
     return v1
 }
 
-; check:  lgbr %r2, %r2
-; nextln: srag %r3, %r2, 63
-; nextln: xgr %r2, %r3
-; nextln: flogr %r0, %r2
-; nextln: ahik %r2, %r0, -56
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; CTZ
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   lgbr %r5, %r2
+;   srag %r3, %r5, 63
+;   xgr %r5, %r3
+;   flogr %r0, %r5
+;   ahik %r2, %r0, -56
+;   br %r14
 
 function %ctz_i64(i64) -> i64 {
 block0(v0: i64):
@@ -144,13 +144,14 @@ block0(v0: i64):
     return v1
 }
 
-; check:  lcgr %r3, %r2
-; nextln: ngr %r2, %r3
-; nextln: flogr %r0, %r2
-; nextln: locghie %r0, -1
-; nextln: lghi %r2, 63
-; nextln: sgr %r2, %r0
-; nextln: br %r14
+; block0:
+;   lcgr %r5, %r2
+;   ngrk %r3, %r2, %r5
+;   flogr %r0, %r3
+;   locghie %r0, -1
+;   lghi %r3, 63
+;   sgrk %r2, %r3, %r0
+;   br %r14
 
 function %ctz_i32(i32) -> i32 {
 block0(v0: i32):
@@ -158,13 +159,14 @@ block0(v0: i32):
     return v1
 }
 
-; check:  oihl %r2, 1
-; nextln: lcgr %r3, %r2
-; nextln: ngr %r2, %r3
-; nextln: flogr %r0, %r2
-; nextln: lhi %r2, 63
-; nextln: sr %r2, %r0
-; nextln: br %r14
+; block0:
+;   oihl %r2, 1
+;   lcgr %r4, %r2
+;   ngr %r2, %r4
+;   flogr %r0, %r2
+;   lhi %r5, 63
+;   srk %r2, %r5, %r0
+;   br %r14
 
 function %ctz_i16(i16) -> i16 {
 block0(v0: i16):
@@ -172,13 +174,14 @@ block0(v0: i16):
     return v1
 }
 
-; check:  oilh %r2, 1
-; nextln: lcgr %r3, %r2
-; nextln: ngr %r2, %r3
-; nextln: flogr %r0, %r2
-; nextln: lhi %r2, 63
-; nextln: sr %r2, %r0
-; nextln: br %r14
+; block0:
+;   oilh %r2, 1
+;   lcgr %r4, %r2
+;   ngr %r2, %r4
+;   flogr %r0, %r2
+;   lhi %r5, 63
+;   srk %r2, %r5, %r0
+;   br %r14
 
 function %ctz_i8(i8) -> i8 {
 block0(v0: i8):
@@ -186,17 +189,14 @@ block0(v0: i8):
     return v1
 }
 
-; check:  oill %r2, 256
-; nextln: lcgr %r3, %r2
-; nextln: ngr %r2, %r3
-; nextln: flogr %r0, %r2
-; nextln: lhi %r2, 63
-; nextln: sr %r2, %r0
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; POPCNT
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   oill %r2, 256
+;   lcgr %r4, %r2
+;   ngr %r2, %r4
+;   flogr %r0, %r2
+;   lhi %r5, 63
+;   srk %r2, %r5, %r0
+;   br %r14
 
 function %popcnt_i64(i64) -> i64 {
 block0(v0: i64):
@@ -204,15 +204,16 @@ block0(v0: i64):
     return v1
 }
 
-; check:  popcnt %r2, %r2
-; nextln: sllg %r3, %r2, 32
-; nextln: agr %r2, %r3
-; nextln: sllg %r3, %r2, 16
-; nextln: agr %r2, %r3
-; nextln: sllg %r3, %r2, 8
-; nextln: agr %r2, %r3
-; nextln: srlg %r2, %r2, 56
-; nextln: br %r14
+; block0:
+;   popcnt %r5, %r2
+;   sllg %r3, %r5, 32
+;   agr %r5, %r3
+;   sllg %r3, %r5, 16
+;   agr %r5, %r3
+;   sllg %r3, %r5, 8
+;   agr %r5, %r3
+;   srlg %r2, %r5, 56
+;   br %r14
 
 function %popcnt_i32(i32) -> i32 {
 block0(v0: i32):
@@ -220,13 +221,14 @@ block0(v0: i32):
     return v1
 }
 
-; check:  popcnt %r2, %r2
-; nextln: sllk %r3, %r2, 16
-; nextln: ar %r2, %r3
-; nextln: sllk %r3, %r2, 8
-; nextln: ar %r2, %r3
-; nextln: srlk %r2, %r2, 24
-; nextln: br %r14
+; block0:
+;   popcnt %r5, %r2
+;   sllk %r3, %r5, 16
+;   ar %r5, %r3
+;   sllk %r3, %r5, 8
+;   ar %r5, %r3
+;   srlk %r2, %r5, 24
+;   br %r14
 
 function %popcnt_i16(i16) -> i16 {
 block0(v0: i16):
@@ -234,11 +236,12 @@ block0(v0: i16):
     return v1
 }
 
-; check:  popcnt %r2, %r2
-; nextln: sllk %r3, %r2, 8
-; nextln: ar %r2, %r3
-; nextln: srlk %r2, %r2, 8
-; nextln: br %r14
+; block0:
+;   popcnt %r5, %r2
+;   sllk %r3, %r5, 8
+;   ar %r5, %r3
+;   srlk %r2, %r5, 8
+;   br %r14
 
 function %popcnt_i8(i8) -> i8 {
 block0(v0: i8):
@@ -246,5 +249,7 @@ block0(v0: i8):
     return v1
 }
 
-; check: popcnt %r2, %r2
-; nextln: br %r14
+; block0:
+;   popcnt %r2, %r2
+;   br %r14
+
diff --git a/cranelift/filetests/filetests/isa/s390x/bitwise-arch13.clif b/cranelift/filetests/filetests/isa/s390x/bitwise-arch13.clif
index 5630fee0dc..f0298ce69c 100644
--- a/cranelift/filetests/filetests/isa/s390x/bitwise-arch13.clif
+++ b/cranelift/filetests/filetests/isa/s390x/bitwise-arch13.clif
@@ -1,5 +1,5 @@
 
-test compile
+test compile precise-output
 target s390x arch13
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -12,8 +12,9 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  nngrk %r2, %r2, %r3
-; nextln: br %r14
+; block0:
+;   nngrk %r2, %r2, %r3
+;   br %r14
 
 function %band_not_i32(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
@@ -21,8 +22,9 @@ block0(v0: i32, v1: i32):
   return v2
 }
 
-; check:  nnrk %r2, %r2, %r3
-; nextln: br %r14
+; block0:
+;   nnrk %r2, %r2, %r3
+;   br %r14
 
 function %band_not_i16(i16, i16) -> i16 {
 block0(v0: i16, v1: i16):
@@ -30,8 +32,9 @@ block0(v0: i16, v1: i16):
   return v2
 }
 
-; check:  nnrk %r2, %r2, %r3
-; nextln: br %r14
+; block0:
+;   nnrk %r2, %r2, %r3
+;   br %r14
 
 function %band_not_i8(i8, i8) -> i8 {
 block0(v0: i8, v1: i8):
@@ -39,12 +42,9 @@ block0(v0: i8, v1: i8):
   return v2
 }
 
-; check:  nnrk %r2, %r2, %r3
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; BOR_NOT
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   nnrk %r2, %r2, %r3
+;   br %r14
 
 function %bor_not_i64(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -52,8 +52,9 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  nogrk %r2, %r2, %r3
-; nextln: br %r14
+; block0:
+;   nogrk %r2, %r2, %r3
+;   br %r14
 
 function %bor_not_i32(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
@@ -61,8 +62,9 @@ block0(v0: i32, v1: i32):
   return v2
 }
 
-; check:  nork %r2, %r2, %r3
-; nextln: br %r14
+; block0:
+;   nork %r2, %r2, %r3
+;   br %r14
 
 function %bor_not_i16(i16, i16) -> i16 {
 block0(v0: i16, v1: i16):
@@ -70,8 +72,9 @@ block0(v0: i16, v1: i16):
   return v2
 }
 
-; check:  nork %r2, %r2, %r3
-; nextln: br %r14
+; block0:
+;   nork %r2, %r2, %r3
+;   br %r14
 
 function %bor_not_i8(i8, i8) -> i8 {
 block0(v0: i8, v1: i8):
@@ -79,12 +82,9 @@ block0(v0: i8, v1: i8):
   return v2
 }
 
-; check:  nork %r2, %r2, %r3
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; BXOR_NOT
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   nork %r2, %r2, %r3
+;   br %r14
 
 function %bxor_not_i64(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -92,8 +92,9 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  nxgrk %r2, %r2, %r3
-; nextln: br %r14
+; block0:
+;   nxgrk %r2, %r2, %r3
+;   br %r14
 
 function %bxor_not_i32(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
@@ -101,8 +102,9 @@ block0(v0: i32, v1: i32):
   return v2
 }
 
-; check:  nxrk %r2, %r2, %r3
-; nextln: br %r14
+; block0:
+;   nxrk %r2, %r2, %r3
+;   br %r14
 
 function %bxor_not_i16(i16, i16) -> i16 {
 block0(v0: i16, v1: i16):
@@ -110,8 +112,9 @@ block0(v0: i16, v1: i16):
   return v2
 }
 
-; check:  nxrk %r2, %r2, %r3
-; nextln: br %r14
+; block0:
+;   nxrk %r2, %r2, %r3
+;   br %r14
 
 function %bxor_not_i8(i8, i8) -> i8 {
 block0(v0: i8, v1: i8):
@@ -119,12 +122,9 @@ block0(v0: i8, v1: i8):
   return v2
 }
 
-; check:  nxrk %r2, %r2, %r3
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; BNOT
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   nxrk %r2, %r2, %r3
+;   br %r14
 
 function %bnot_i64(i64) -> i64 {
 block0(v0: i64):
@@ -132,8 +132,9 @@ block0(v0: i64):
   return v1
 }
 
-; check:  nogrk %r2, %r2, %r2
-; nextln: br %r14
+; block0:
+;   nogrk %r2, %r2, %r2
+;   br %r14
 
 function %bnot_i32(i32) -> i32 {
 block0(v0: i32):
@@ -141,8 +142,9 @@ block0(v0: i32):
   return v1
 }
 
-; check:  nork %r2, %r2, %r2
-; nextln: br %r14
+; block0:
+;   nork %r2, %r2, %r2
+;   br %r14
 
 function %bnot_i16(i16) -> i16 {
 block0(v0: i16):
@@ -150,8 +152,9 @@ block0(v0: i16):
   return v1
 }
 
-; check:  nork %r2, %r2, %r2
-; nextln: br %r14
+; block0:
+;   nork %r2, %r2, %r2
+;   br %r14
 
 function %bnot_i8(i8) -> i8 {
 block0(v0: i8):
@@ -159,12 +162,9 @@ block0(v0: i8):
   return v1
 }
 
-; check:  nork %r2, %r2, %r2
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; BITSELECT
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   nork %r2, %r2, %r2
+;   br %r14
 
 function %bitselect_i64(i64, i64, i64) -> i64 {
 block0(v0: i64, v1: i64, v2: i64):
@@ -172,10 +172,11 @@ block0(v0: i64, v1: i64, v2: i64):
   return v3
 }
 
-; check:  ngr %r3, %r2
-; nextln: nngrk %r2, %r4, %r2
-; nextln: ogr %r2, %r3
-; nextln: br %r14
+; block0:
+;   ngrk %r5, %r3, %r2
+;   nngrk %r3, %r4, %r2
+;   ogrk %r2, %r3, %r5
+;   br %r14
 
 function %bitselect_i32(i32, i32, i32) -> i32 {
 block0(v0: i32, v1: i32, v2: i32):
@@ -183,10 +184,11 @@ block0(v0: i32, v1: i32, v2: i32):
   return v3
 }
 
-; check:  nr %r3, %r2
-; nextln: nnrk %r2, %r4, %r2
-; nextln: or %r2, %r3
-; nextln: br %r14
+; block0:
+;   nrk %r5, %r3, %r2
+;   nnrk %r3, %r4, %r2
+;   ork %r2, %r3, %r5
+;   br %r14
 
 function %bitselect_i16(i16, i16, i16) -> i16 {
 block0(v0: i16, v1: i16, v2: i16):
@@ -194,10 +196,11 @@ block0(v0: i16, v1: i16, v2: i16):
   return v3
 }
 
-; check:  nr %r3, %r2
-; nextln: nnrk %r2, %r4, %r2
-; nextln: or %r2, %r3
-; nextln: br %r14
+; block0:
+;   nrk %r5, %r3, %r2
+;   nnrk %r3, %r4, %r2
+;   ork %r2, %r3, %r5
+;   br %r14
 
 function %bitselect_i8(i8, i8, i8) -> i8 {
 block0(v0: i8, v1: i8, v2: i8):
@@ -205,8 +208,9 @@ block0(v0: i8, v1: i8, v2: i8):
   return v3
 }
 
-; check:  nr %r3, %r2
-; nextln: nnrk %r2, %r4, %r2
-; nextln: or %r2, %r3
-; nextln: br %r14
+; block0:
+;   nrk %r5, %r3, %r2
+;   nnrk %r3, %r4, %r2
+;   ork %r2, %r3, %r5
+;   br %r14
 
diff --git a/cranelift/filetests/filetests/isa/s390x/bitwise.clif b/cranelift/filetests/filetests/isa/s390x/bitwise.clif
index 205c692300..3fe6cbfd2c 100644
--- a/cranelift/filetests/filetests/isa/s390x/bitwise.clif
+++ b/cranelift/filetests/filetests/isa/s390x/bitwise.clif
@@ -1,5 +1,5 @@
 
-test compile
+test compile precise-output
 target s390x
 
 ; FIXME: add immediate operand versions
@@ -14,8 +14,9 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  ngr %r2, %r3
-; nextln: br %r14
+; block0:
+;   ngr %r2, %r3
+;   br %r14
 
 function %band_i64_mem(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -24,8 +25,9 @@ block0(v0: i64, v1: i64):
   return v3
 }
 
-; check:  ng %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   ng %r2, 0(%r3)
+;   br %r14
 
 function %band_i32(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
@@ -33,8 +35,9 @@ block0(v0: i32, v1: i32):
   return v2
 }
 
-; check:  nr %r2, %r3
-; nextln: br %r14
+; block0:
+;   nr %r2, %r3
+;   br %r14
 
 function %band_i32_mem(i32, i64) -> i32 {
 block0(v0: i32, v1: i64):
@@ -43,8 +46,9 @@ block0(v0: i32, v1: i64):
   return v3
 }
 
-; check:  n %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   n %r2, 0(%r3)
+;   br %r14
 
 function %band_i32_memoff(i32, i64) -> i32 {
 block0(v0: i32, v1: i64):
@@ -53,8 +57,9 @@ block0(v0: i32, v1: i64):
   return v3
 }
 
-; check:  ny %r2, 4096(%r3)
-; nextln: br %r14
+; block0:
+;   ny %r2, 4096(%r3)
+;   br %r14
 
 function %band_i16(i16, i16) -> i16 {
 block0(v0: i16, v1: i16):
@@ -62,8 +67,9 @@ block0(v0: i16, v1: i16):
   return v2
 }
 
-; check:  nr %r2, %r3
-; nextln: br %r14
+; block0:
+;   nr %r2, %r3
+;   br %r14
 
 function %band_i16_mem(i16, i64) -> i16 {
 block0(v0: i16, v1: i64):
@@ -72,9 +78,10 @@ block0(v0: i16, v1: i64):
   return v3
 }
 
-; check:  llh %r3, 0(%r3)
-; nextln: nr %r2, %r3
-; nextln: br %r14
+; block0:
+;   llh %r4, 0(%r3)
+;   nr %r2, %r4
+;   br %r14
 
 function %band_i8(i8, i8) -> i8 {
 block0(v0: i8, v1: i8):
@@ -82,8 +89,9 @@ block0(v0: i8, v1: i8):
   return v2
 }
 
-; check:  nr %r2, %r3
-; nextln: br %r14
+; block0:
+;   nr %r2, %r3
+;   br %r14
 
 function %band_i8_mem(i8, i64) -> i8 {
 block0(v0: i8, v1: i64):
@@ -92,13 +100,10 @@ block0(v0: i8, v1: i64):
   return v3
 }
 
-; check:  llc %r3, 0(%r3)
-; nextln: nr %r2, %r3
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; BOR
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   llc %r4, 0(%r3)
+;   nr %r2, %r4
+;   br %r14
 
 function %bor_i64(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -106,8 +111,9 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  ogr %r2, %r3
-; nextln: br %r14
+; block0:
+;   ogr %r2, %r3
+;   br %r14
 
 function %bor_i64_mem(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -116,8 +122,9 @@ block0(v0: i64, v1: i64):
   return v3
 }
 
-; check:  og %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   og %r2, 0(%r3)
+;   br %r14
 
 function %bor_i32(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
@@ -125,8 +132,9 @@ block0(v0: i32, v1: i32):
   return v2
 }
 
-; check:  or %r2, %r3
-; nextln: br %r14
+; block0:
+;   or %r2, %r3
+;   br %r14
 
 function %bor_i32_mem(i32, i64) -> i32 {
 block0(v0: i32, v1: i64):
@@ -135,8 +143,9 @@ block0(v0: i32, v1: i64):
   return v3
 }
 
-; check:  o %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   o %r2, 0(%r3)
+;   br %r14
 
 function %bor_i32_memoff(i32, i64) -> i32 {
 block0(v0: i32, v1: i64):
@@ -145,8 +154,9 @@ block0(v0: i32, v1: i64):
   return v3
 }
 
-; check:  oy %r2, 4096(%r3)
-; nextln: br %r14
+; block0:
+;   oy %r2, 4096(%r3)
+;   br %r14
 
 function %bor_i16(i16, i16) -> i16 {
 block0(v0: i16, v1: i16):
@@ -154,8 +164,9 @@ block0(v0: i16, v1: i16):
   return v2
 }
 
-; check:  or %r2, %r3
-; nextln: br %r14
+; block0:
+;   or %r2, %r3
+;   br %r14
 
 function %bor_i16_mem(i16, i64) -> i16 {
 block0(v0: i16, v1: i64):
@@ -164,9 +175,10 @@ block0(v0: i16, v1: i64):
   return v3
 }
 
-; check:  llh %r3, 0(%r3)
-; nextln: or %r2, %r3
-; nextln: br %r14
+; block0:
+;   llh %r4, 0(%r3)
+;   or %r2, %r4
+;   br %r14
 
 function %bor_i8(i8, i8) -> i8 {
 block0(v0: i8, v1: i8):
@@ -174,8 +186,9 @@ block0(v0: i8, v1: i8):
   return v2
 }
 
-; check:  or %r2, %r3
-; nextln: br %r14
+; block0:
+;   or %r2, %r3
+;   br %r14
 
 function %bor_i8_mem(i8, i64) -> i8 {
 block0(v0: i8, v1: i64):
@@ -184,13 +197,10 @@ block0(v0: i8, v1: i64):
   return v3
 }
 
-; check:  llc %r3, 0(%r3)
-; nextln: or %r2, %r3
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; BXOR
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   llc %r4, 0(%r3)
+;   or %r2, %r4
+;   br %r14
 
 function %bxor_i64(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -198,8 +208,9 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  xgr %r2, %r3
-; nextln: br %r14
+; block0:
+;   xgr %r2, %r3
+;   br %r14
 
 function %bxor_i64_mem(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -208,8 +219,9 @@ block0(v0: i64, v1: i64):
   return v3
 }
 
-; check:  xg %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   xg %r2, 0(%r3)
+;   br %r14
 
 function %bxor_i32(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
@@ -217,8 +229,9 @@ block0(v0: i32, v1: i32):
   return v2
 }
 
-; check:  xr %r2, %r3
-; nextln: br %r14
+; block0:
+;   xr %r2, %r3
+;   br %r14
 
 function %bxor_i32_mem(i32, i64) -> i32 {
 block0(v0: i32, v1: i64):
@@ -227,8 +240,9 @@ block0(v0: i32, v1: i64):
   return v3
 }
 
-; check:  x %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   x %r2, 0(%r3)
+;   br %r14
 
 function %bxor_i32_memoff(i32, i64) -> i32 {
 block0(v0: i32, v1: i64):
@@ -237,8 +251,9 @@ block0(v0: i32, v1: i64):
   return v3
 }
 
-; check:  xy %r2, 4096(%r3)
-; nextln: br %r14
+; block0:
+;   xy %r2, 4096(%r3)
+;   br %r14
 
 function %bxor_i16(i16, i16) -> i16 {
 block0(v0: i16, v1: i16):
@@ -246,8 +261,9 @@ block0(v0: i16, v1: i16):
   return v2
 }
 
-; check:  xr %r2, %r3
-; nextln: br %r14
+; block0:
+;   xr %r2, %r3
+;   br %r14
 
 function %bxor_i16_mem(i16, i64) -> i16 {
 block0(v0: i16, v1: i64):
@@ -256,9 +272,10 @@ block0(v0: i16, v1: i64):
   return v3
 }
 
-; check:  llh %r3, 0(%r3)
-; nextln: xr %r2, %r3
-; nextln: br %r14
+; block0:
+;   llh %r4, 0(%r3)
+;   xr %r2, %r4
+;   br %r14
 
 function %bxor_i8(i8, i8) -> i8 {
 block0(v0: i8, v1: i8):
@@ -266,8 +283,9 @@ block0(v0: i8, v1: i8):
   return v2
 }
 
-; check:  xr %r2, %r3
-; nextln: br %r14
+; block0:
+;   xr %r2, %r3
+;   br %r14
 
 function %bxor_i8_mem(i8, i64) -> i8 {
 block0(v0: i8, v1: i64):
@@ -276,13 +294,10 @@ block0(v0: i8, v1: i64):
   return v3
 }
 
-; check:  llc %r3, 0(%r3)
-; nextln: xr %r2, %r3
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; BAND_NOT
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   llc %r4, 0(%r3)
+;   xr %r2, %r4
+;   br %r14
 
 function %band_not_i64(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -290,10 +305,11 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  ngr %r2, %r3
-; nextln: xilf %r2, 4294967295
-; nextln: xihf %r2, 4294967295
-; nextln: br %r14
+; block0:
+;   ngr %r2, %r3
+;   xilf %r2, 4294967295
+;   xihf %r2, 4294967295
+;   br %r14
 
 function %band_not_i32(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
@@ -301,9 +317,10 @@ block0(v0: i32, v1: i32):
   return v2
 }
 
-; check:  nr %r2, %r3
-; nextln: xilf %r2, 4294967295
-; nextln: br %r14
+; block0:
+;   nr %r2, %r3
+;   xilf %r2, 4294967295
+;   br %r14
 
 function %band_not_i16(i16, i16) -> i16 {
 block0(v0: i16, v1: i16):
@@ -311,9 +328,10 @@ block0(v0: i16, v1: i16):
   return v2
 }
 
-; check:  nr %r2, %r3
-; nextln: xilf %r2, 4294967295
-; nextln: br %r14
+; block0:
+;   nr %r2, %r3
+;   xilf %r2, 4294967295
+;   br %r14
 
 function %band_not_i8(i8, i8) -> i8 {
 block0(v0: i8, v1: i8):
@@ -321,13 +339,10 @@ block0(v0: i8, v1: i8):
   return v2
 }
 
-; check:  nr %r2, %r3
-; nextln: xilf %r2, 4294967295
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; BOR_NOT
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   nr %r2, %r3
+;   xilf %r2, 4294967295
+;   br %r14
 
 function %bor_not_i64(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -335,10 +350,11 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  ogr %r2, %r3
-; nextln: xilf %r2, 4294967295
-; nextln: xihf %r2, 4294967295
-; nextln: br %r14
+; block0:
+;   ogr %r2, %r3
+;   xilf %r2, 4294967295
+;   xihf %r2, 4294967295
+;   br %r14
 
 function %bor_not_i32(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
@@ -346,9 +362,10 @@ block0(v0: i32, v1: i32):
   return v2
 }
 
-; check:  or %r2, %r3
-; nextln: xilf %r2, 4294967295
-; nextln: br %r14
+; block0:
+;   or %r2, %r3
+;   xilf %r2, 4294967295
+;   br %r14
 
 function %bor_not_i16(i16, i16) -> i16 {
 block0(v0: i16, v1: i16):
@@ -356,9 +373,10 @@ block0(v0: i16, v1: i16):
   return v2
 }
 
-; check:  or %r2, %r3
-; nextln: xilf %r2, 4294967295
-; nextln: br %r14
+; block0:
+;   or %r2, %r3
+;   xilf %r2, 4294967295
+;   br %r14
 
 function %bor_not_i8(i8, i8) -> i8 {
 block0(v0: i8, v1: i8):
@@ -366,13 +384,10 @@ block0(v0: i8, v1: i8):
   return v2
 }
 
-; check:  or %r2, %r3
-; nextln: xilf %r2, 4294967295
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; BXOR_NOT
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   or %r2, %r3
+;   xilf %r2, 4294967295
+;   br %r14
 
 function %bxor_not_i64(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -380,10 +395,11 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  xgr %r2, %r3
-; nextln: xilf %r2, 4294967295
-; nextln: xihf %r2, 4294967295
-; nextln: br %r14
+; block0:
+;   xgr %r2, %r3
+;   xilf %r2, 4294967295
+;   xihf %r2, 4294967295
+;   br %r14
 
 function %bxor_not_i32(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
@@ -391,9 +407,10 @@ block0(v0: i32, v1: i32):
   return v2
 }
 
-; check:  xr %r2, %r3
-; nextln: xilf %r2, 4294967295
-; nextln: br %r14
+; block0:
+;   xr %r2, %r3
+;   xilf %r2, 4294967295
+;   br %r14
 
 function %bxor_not_i16(i16, i16) -> i16 {
 block0(v0: i16, v1: i16):
@@ -401,9 +418,10 @@ block0(v0: i16, v1: i16):
   return v2
 }
 
-; check:  xr %r2, %r3
-; nextln: xilf %r2, 4294967295
-; nextln: br %r14
+; block0:
+;   xr %r2, %r3
+;   xilf %r2, 4294967295
+;   br %r14
 
 function %bxor_not_i8(i8, i8) -> i8 {
 block0(v0: i8, v1: i8):
@@ -411,13 +429,10 @@ block0(v0: i8, v1: i8):
   return v2
 }
 
-; check:  xr %r2, %r3
-; nextln: xilf %r2, 4294967295
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; BNOT
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   xr %r2, %r3
+;   xilf %r2, 4294967295
+;   br %r14
 
 function %bnot_i64(i64) -> i64 {
 block0(v0: i64):
@@ -425,9 +440,10 @@ block0(v0: i64):
   return v1
 }
 
-; check:  xilf %r2, 4294967295
-; nextln: xihf %r2, 4294967295
-; nextln: br %r14
+; block0:
+;   xilf %r2, 4294967295
+;   xihf %r2, 4294967295
+;   br %r14
 
 function %bnot_i32(i32) -> i32 {
 block0(v0: i32):
@@ -435,8 +451,9 @@ block0(v0: i32):
   return v1
 }
 
-; check:  xilf %r2, 4294967295
-; nextln: br %r14
+; block0:
+;   xilf %r2, 4294967295
+;   br %r14
 
 function %bnot_i16(i16) -> i16 {
 block0(v0: i16):
@@ -444,8 +461,9 @@ block0(v0: i16):
   return v1
 }
 
-; check:  xilf %r2, 4294967295
-; nextln: br %r14
+; block0:
+;   xilf %r2, 4294967295
+;   br %r14
 
 function %bnot_i8(i8) -> i8 {
 block0(v0: i8):
@@ -453,12 +471,9 @@ block0(v0: i8):
   return v1
 }
 
-; check:  xilf %r2, 4294967295
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; BITSELECT
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   xilf %r2, 4294967295
+;   br %r14
 
 function %bitselect_i64(i64, i64, i64) -> i64 {
 block0(v0: i64, v1: i64, v2: i64):
@@ -466,12 +481,13 @@ block0(v0: i64, v1: i64, v2: i64):
   return v3
 }
 
-; check:  ngr %r3, %r2
-; nextln: ngrk %r2, %r4, %r2
-; nextln: xilf %r2, 4294967295
-; nextln: xihf %r2, 4294967295
-; nextln: ogr %r2, %r3
-; nextln: br %r14
+; block0:
+;   ngrk %r5, %r3, %r2
+;   ngrk %r3, %r4, %r2
+;   xilf %r3, 4294967295
+;   xihf %r3, 4294967295
+;   ogrk %r2, %r3, %r5
+;   br %r14
 
 function %bitselect_i32(i32, i32, i32) -> i32 {
 block0(v0: i32, v1: i32, v2: i32):
@@ -479,11 +495,12 @@ block0(v0: i32, v1: i32, v2: i32):
   return v3
 }
 
-; check:  nr %r3, %r2
-; nextln: nrk %r2, %r4, %r2
-; nextln: xilf %r2, 4294967295
-; nextln: or %r2, %r3
-; nextln: br %r14
+; block0:
+;   nrk %r5, %r3, %r2
+;   nrk %r3, %r4, %r2
+;   xilf %r3, 4294967295
+;   ork %r2, %r3, %r5
+;   br %r14
 
 function %bitselect_i16(i16, i16, i16) -> i16 {
 block0(v0: i16, v1: i16, v2: i16):
@@ -491,11 +508,12 @@ block0(v0: i16, v1: i16, v2: i16):
   return v3
 }
 
-; check:  nr %r3, %r2
-; nextln: nrk %r2, %r4, %r2
-; nextln: xilf %r2, 4294967295
-; nextln: or %r2, %r3
-; nextln: br %r14
+; block0:
+;   nrk %r5, %r3, %r2
+;   nrk %r3, %r4, %r2
+;   xilf %r3, 4294967295
+;   ork %r2, %r3, %r5
+;   br %r14
 
 function %bitselect_i8(i8, i8, i8) -> i8 {
 block0(v0: i8, v1: i8, v2: i8):
@@ -503,9 +521,10 @@ block0(v0: i8, v1: i8, v2: i8):
   return v3
 }
 
-; check:  nr %r3, %r2
-; nextln: nrk %r2, %r4, %r2
-; nextln: xilf %r2, 4294967295
-; nextln: or %r2, %r3
-; nextln: br %r14
+; block0:
+;   nrk %r5, %r3, %r2
+;   nrk %r3, %r4, %r2
+;   xilf %r3, 4294967295
+;   ork %r2, %r3, %r5
+;   br %r14
 
diff --git a/cranelift/filetests/filetests/isa/s390x/call.clif b/cranelift/filetests/filetests/isa/s390x/call.clif
index 4fee8cf9f8..bd3648316b 100644
--- a/cranelift/filetests/filetests/isa/s390x/call.clif
+++ b/cranelift/filetests/filetests/isa/s390x/call.clif
@@ -1,4 +1,4 @@
-test compile
+test compile precise-output
 target s390x
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -13,13 +13,14 @@ block0(v0: i64):
     return v1
 }
 
-; check:  stmg %r14, %r15, 112(%r15)
-; nextln: aghi %r15, -160
-; nextln: virtual_sp_offset_adjust 160
-; nextln: bras %r1, 12 ; data %g + 0 ; lg %r3, 0(%r1)
-; nextln: basr %r14, %r3
-; nextln: lmg %r14, %r15, 272(%r15)
-; nextln: br %r14
+;   stmg %r14, %r15, 112(%r15)
+;   aghi %r15, -160
+;   virtual_sp_offset_adjust 160
+; block0:
+;   bras %r1, 12 ; data %g + 0 ; lg %r3, 0(%r1)
+;   basr %r14, %r3
+;   lmg %r14, %r15, 272(%r15)
+;   br %r14
 
 function %call_uext(i32) -> i64 {
     fn0 = %g(i32 uext) -> i64
@@ -29,22 +30,24 @@ block0(v0: i32):
     return v1
 }
 
-; check:  stmg %r14, %r15, 112(%r15)
-; nextln: aghi %r15, -160
-; nextln: virtual_sp_offset_adjust 160
-; nextln: llgfr %r2, %r2
-; nextln: bras %r1, 12 ; data %g + 0 ; lg %r3, 0(%r1)
-; nextln: basr %r14, %r3
-; nextln: lmg %r14, %r15, 272(%r15)
-; nextln: br %r14
+;   stmg %r14, %r15, 112(%r15)
+;   aghi %r15, -160
+;   virtual_sp_offset_adjust 160
+; block0:
+;   llgfr %r2, %r2
+;   bras %r1, 12 ; data %g + 0 ; lg %r3, 0(%r1)
+;   basr %r14, %r3
+;   lmg %r14, %r15, 272(%r15)
+;   br %r14
 
 function %ret_uext(i32) -> i32 uext {
 block0(v0: i32):
     return v0
 }
 
-; check:  llgfr %r2, %r2
-; nextln: br %r14
+; block0:
+;   llgfr %r2, %r2
+;   br %r14
 
 function %call_uext(i32) -> i64 {
     fn0 = %g(i32 sext) -> i64
@@ -54,22 +57,24 @@ block0(v0: i32):
     return v1
 }
 
-; check:  stmg %r14, %r15, 112(%r15)
-; nextln: aghi %r15, -160
-; nextln: virtual_sp_offset_adjust 160
-; nextln: lgfr %r2, %r2
-; nextln: bras %r1, 12 ; data %g + 0 ; lg %r3, 0(%r1)
-; nextln: basr %r14, %r3
-; nextln: lmg %r14, %r15, 272(%r15)
-; nextln: br %r14
+;   stmg %r14, %r15, 112(%r15)
+;   aghi %r15, -160
+;   virtual_sp_offset_adjust 160
+; block0:
+;   lgfr %r2, %r2
+;   bras %r1, 12 ; data %g + 0 ; lg %r3, 0(%r1)
+;   basr %r14, %r3
+;   lmg %r14, %r15, 272(%r15)
+;   br %r14
 
 function %ret_uext(i32) -> i32 sext {
 block0(v0: i32):
     return v0
 }
 
-; check:  lgfr %r2, %r2
-; nextln: br %r14
+; block0:
+;   lgfr %r2, %r2
+;   br %r14
 
 function %call_colocated(i64) -> i64 {
     fn0 = colocated %g(i64) -> i64
@@ -79,12 +84,13 @@ block0(v0: i64):
     return v1
 }
 
-; check:  stmg %r14, %r15, 112(%r15)
-; nextln: aghi %r15, -160
-; nextln: virtual_sp_offset_adjust 160
-; nextln: brasl %r14, %g
-; nextln: lmg %r14, %r15, 272(%r15)
-; nextln: br %r14
+;   stmg %r14, %r15, 112(%r15)
+;   aghi %r15, -160
+;   virtual_sp_offset_adjust 160
+; block0:
+;   brasl %r14, %g
+;   lmg %r14, %r15, 272(%r15)
+;   br %r14
 
 function %f2(i32) -> i64 {
     fn0 = %g(i32 uext) -> i64
@@ -94,9 +100,15 @@ block0(v0: i32):
     return v1
 }
 
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; CALL_INDIRECT
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;   stmg %r14, %r15, 112(%r15)
+;   aghi %r15, -160
+;   virtual_sp_offset_adjust 160
+; block0:
+;   llgfr %r2, %r2
+;   bras %r1, 12 ; data %g + 0 ; lg %r3, 0(%r1)
+;   basr %r14, %r3
+;   lmg %r14, %r15, 272(%r15)
+;   br %r14
 
 function %call_indirect(i64, i64) -> i64 {
     sig0 = (i64) -> i64
@@ -105,9 +117,11 @@ block0(v0: i64, v1: i64):
     return v2
 }
 
-; check:  stmg %r14, %r15, 112(%r15)
-; nextln: aghi %r15, -160
-; nextln: virtual_sp_offset_adjust 160
-; nextln: basr %r14, %r3
-; nextln: lmg %r14, %r15, 272(%r15)
-; nextln: br %r14
+;   stmg %r14, %r15, 112(%r15)
+;   aghi %r15, -160
+;   virtual_sp_offset_adjust 160
+; block0:
+;   basr %r14, %r3
+;   lmg %r14, %r15, 272(%r15)
+;   br %r14
+
diff --git a/cranelift/filetests/filetests/isa/s390x/condbr.clif b/cranelift/filetests/filetests/isa/s390x/condbr.clif
index 12b81b705c..9aa2bf4197 100644
--- a/cranelift/filetests/filetests/isa/s390x/condbr.clif
+++ b/cranelift/filetests/filetests/isa/s390x/condbr.clif
@@ -1,4 +1,4 @@
-test compile
+test compile precise-output
 target s390x
 
 function %f(i64, i64) -> b1 {
@@ -7,10 +7,11 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  clgr %r2, %r3
-; nextln: lhi %r2, 0
-; nextln: lochie %r2, 1
-; nextln: br %r14
+; block0:
+;   clgr %r2, %r3
+;   lhi %r2, 0
+;   lochie %r2, 1
+;   br %r14
 
 function %f(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -27,15 +28,15 @@ block2:
   return v5
 }
 
-; check: Block 0:
-; check:  clgr %r2, %r3
-; nextln: jge label1 ; jg label2
-; check: Block 1:
-; check:  lghi %r2, 1
-; nextln: br %r14
-; check: Block 2:
-; check:  lghi %r2, 2
-; nextln: br %r14
+; block0:
+;   clgr %r2, %r3
+;   jge label1 ; jg label2
+; block1:
+;   lghi %r2, 1
+;   br %r14
+; block2:
+;   lghi %r2, 2
+;   br %r14
 
 function %f(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -48,15 +49,14 @@ block1:
   return v4
 }
 
-; FIXME: Should optimize away branches
+; block0:
+;   clgr %r2, %r3
+;   jge label1 ; jg label2
+; block1:
+;   jg label3
+; block2:
+;   jg label3
+; block3:
+;   lghi %r2, 1
+;   br %r14
 
-; check: Block 0:
-; check:  clgr %r2, %r3
-; nextln: jge label1 ; jg label2
-; check: Block 1:
-; check:  jg label3
-; check: Block 2:
-; check:  jg label3
-; check: Block 3:
-; check:  lghi %r2, 1
-; nextln: br %r14
diff --git a/cranelift/filetests/filetests/isa/s390x/condops.clif b/cranelift/filetests/filetests/isa/s390x/condops.clif
index aaf1c3ba54..5a73e0ae1e 100644
--- a/cranelift/filetests/filetests/isa/s390x/condops.clif
+++ b/cranelift/filetests/filetests/isa/s390x/condops.clif
@@ -1,4 +1,4 @@
-test compile
+test compile precise-output
 target s390x
 
 function %f(i8, i64, i64) -> i64 {
@@ -9,11 +9,12 @@ block0(v0: i8, v1: i64, v2: i64):
   return v5
 }
 
-; check:  llcr %r2, %r2
-; nextln: clfi %r2, 42
-; nextln: locgre %r4, %r3
-; nextln: lgr %r2, %r4
-; nextln: br %r14
+; block0:
+;   llcr %r5, %r2
+;   clfi %r5, 42
+;   lgr %r2, %r4
+;   locgre %r2, %r3
+;   br %r14
 
 function %g(b1, i8, i8) -> i8 {
 block0(v0: b1, v1: i8, v2: i8):
@@ -21,13 +22,12 @@ block0(v0: b1, v1: i8, v2: i8):
   return v3
 }
 
-; FIXME: optimize i8/i16 compares
-
-; check:  llcr %r2, %r2
-; nextln: chi %r2, 0
-; nextln: locrlh %r4, %r3
-; nextln: lr %r2, %r4
-; nextln: br %r14
+; block0:
+;   llcr %r5, %r2
+;   chi %r5, 0
+;   lgr %r2, %r4
+;   locrlh %r2, %r3
+;   br %r14
 
 function %i(i32, i8, i8) -> i8 {
 block0(v0: i32, v1: i8, v2: i8):
@@ -37,7 +37,9 @@ block0(v0: i32, v1: i8, v2: i8):
   return v5
 }
 
-; check:  clfi %r2, 42
-; nextln: locre %r4, %r3
-; nextln: lr %r2, %r4
-; nextln: br %r14
+; block0:
+;   clfi %r2, 42
+;   lgr %r2, %r4
+;   locre %r2, %r3
+;   br %r14
+
diff --git a/cranelift/filetests/filetests/isa/s390x/constants.clif b/cranelift/filetests/filetests/isa/s390x/constants.clif
index 96effdecde..9a9025873b 100644
--- a/cranelift/filetests/filetests/isa/s390x/constants.clif
+++ b/cranelift/filetests/filetests/isa/s390x/constants.clif
@@ -1,4 +1,4 @@
-test compile
+test compile precise-output
 target s390x
 
 function %f() -> b8 {
@@ -7,8 +7,9 @@ block0:
   return v0
 }
 
-; check:  lhi %r2, 255
-; nextln: br %r14
+; block0:
+;   lhi %r2, 255
+;   br %r14
 
 function %f() -> b16 {
 block0:
@@ -16,8 +17,9 @@ block0:
   return v0
 }
 
-; check:  lhi %r2, 0
-; nextln: br %r14
+; block0:
+;   lhi %r2, 0
+;   br %r14
 
 function %f() -> i64 {
 block0:
@@ -25,8 +27,9 @@ block0:
   return v0
 }
 
-; check:  lghi %r2, 0
-; nextln: br %r14
+; block0:
+;   lghi %r2, 0
+;   br %r14
 
 function %f() -> i64 {
 block0:
@@ -34,8 +37,9 @@ block0:
   return v0
 }
 
-; check:  lgfi %r2, 65535
-; nextln: br %r14
+; block0:
+;   lgfi %r2, 65535
+;   br %r14
 
 function %f() -> i64 {
 block0:
@@ -43,8 +47,9 @@ block0:
   return v0
 }
 
-; check:  llilh %r2, 65535
-; nextln: br %r14
+; block0:
+;   llilh %r2, 65535
+;   br %r14
 
 function %f() -> i64 {
 block0:
@@ -52,8 +57,9 @@ block0:
   return v0
 }
 
-; check:  llihl %r2, 65535
-; nextln: br %r14
+; block0:
+;   llihl %r2, 65535
+;   br %r14
 
 function %f() -> i64 {
 block0:
@@ -61,8 +67,9 @@ block0:
   return v0
 }
 
-; check:  llihh %r2, 65535
-; nextln: br %r14
+; block0:
+;   llihh %r2, 65535
+;   br %r14
 
 function %f() -> i64 {
 block0:
@@ -70,8 +77,9 @@ block0:
   return v0
 }
 
-; check:  lghi %r2, -1
-; nextln: br %r14
+; block0:
+;   lghi %r2, -1
+;   br %r14
 
 function %f() -> i64 {
 block0:
@@ -79,28 +87,31 @@ block0:
   return v0
 }
 
-; check:  lgfi %r2, -65536
-; nextln: br %r14
+; block0:
+;   lgfi %r2, -65536
+;   br %r14
 
 function %f() -> i64 {
 block0:
-  v0 = iconst.i64 0xf34bf0a31212003a ; random digits
+  v0 = iconst.i64 0xf34bf0a31212003a ;; random digits
   return v0
 }
 
-; check:  llihf %r2, 4081840291
-; nextln: iilf %r2, 303169594
-; nextln: br %r14
+; block0:
+;   llihf %r2, 4081840291
+;   iilf %r2, 303169594
+;   br %r14
 
 function %f() -> i64 {
 block0:
-  v0 = iconst.i64 0x12e900001ef40000 ; random digits with 2 clear half words
+  v0 = iconst.i64 0x12e900001ef40000 ;; random digits with 2 clear half words
   return v0
 }
 
-; check:  llihh %r2, 4841
-; nextln: iilh %r2, 7924
-; nextln: br %r14
+; block0:
+;   llihh %r2, 4841
+;   iilh %r2, 7924
+;   br %r14
 
 function %f() -> i32 {
 block0:
@@ -108,6 +119,7 @@ block0:
   return v0
 }
 
-; check:  lhi %r2, -1
-; nextln: br %r14
+; block0:
+;   lhi %r2, -1
+;   br %r14
 
diff --git a/cranelift/filetests/filetests/isa/s390x/conversions.clif b/cranelift/filetests/filetests/isa/s390x/conversions.clif
index 5d57c8881a..aaf7c48a57 100644
--- a/cranelift/filetests/filetests/isa/s390x/conversions.clif
+++ b/cranelift/filetests/filetests/isa/s390x/conversions.clif
@@ -1,4 +1,4 @@
-test compile
+test compile precise-output
 target s390x
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -11,8 +11,9 @@ block0(v0: i32):
   return v1
 }
 
-; check:  llgfr %r2, %r2
-; nextln: br %r14
+; block0:
+;   llgfr %r2, %r2
+;   br %r14
 
 function %uextend_i16_i64(i16) -> i64 {
 block0(v0: i16):
@@ -20,8 +21,9 @@ block0(v0: i16):
   return v1
 }
 
-; check:  llghr %r2, %r2
-; nextln: br %r14
+; block0:
+;   llghr %r2, %r2
+;   br %r14
 
 function %uextend_i16_i32(i16) -> i32 {
 block0(v0: i16):
@@ -29,8 +31,9 @@ block0(v0: i16):
   return v1
 }
 
-; check:  llhr %r2, %r2
-; nextln: br %r14
+; block0:
+;   llhr %r2, %r2
+;   br %r14
 
 function %uextend_i8_i64(i8) -> i64 {
 block0(v0: i8):
@@ -38,8 +41,9 @@ block0(v0: i8):
   return v1
 }
 
-; check:  llgcr %r2, %r2
-; nextln: br %r14
+; block0:
+;   llgcr %r2, %r2
+;   br %r14
 
 function %uextend_i8_i32(i8) -> i32 {
 block0(v0: i8):
@@ -47,8 +51,9 @@ block0(v0: i8):
   return v1
 }
 
-; check:  llcr %r2, %r2
-; nextln: br %r14
+; block0:
+;   llcr %r2, %r2
+;   br %r14
 
 function %uextend_i8_i16(i8) -> i16 {
 block0(v0: i8):
@@ -56,13 +61,9 @@ block0(v0: i8):
   return v1
 }
 
-; check:  llcr %r2, %r2
-; nextln: br %r14
-
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; SEXTEND
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   llcr %r2, %r2
+;   br %r14
 
 function %sextend_i32_i64(i32) -> i64 {
 block0(v0: i32):
@@ -70,8 +71,9 @@ block0(v0: i32):
   return v1
 }
 
-; check:  lgfr %r2, %r2
-; nextln: br %r14
+; block0:
+;   lgfr %r2, %r2
+;   br %r14
 
 function %sextend_i16_i64(i16) -> i64 {
 block0(v0: i16):
@@ -79,8 +81,9 @@ block0(v0: i16):
   return v1
 }
 
-; check:  lghr %r2, %r2
-; nextln: br %r14
+; block0:
+;   lghr %r2, %r2
+;   br %r14
 
 function %sextend_i16_i32(i16) -> i32 {
 block0(v0: i16):
@@ -88,8 +91,9 @@ block0(v0: i16):
   return v1
 }
 
-; check:  lhr %r2, %r2
-; nextln: br %r14
+; block0:
+;   lhr %r2, %r2
+;   br %r14
 
 function %sextend_i8_i64(i8) -> i64 {
 block0(v0: i8):
@@ -97,8 +101,9 @@ block0(v0: i8):
   return v1
 }
 
-; check:  lgbr %r2, %r2
-; nextln: br %r14
+; block0:
+;   lgbr %r2, %r2
+;   br %r14
 
 function %sextend_i8_i32(i8) -> i32 {
 block0(v0: i8):
@@ -106,8 +111,9 @@ block0(v0: i8):
   return v1
 }
 
-; check:  lbr %r2, %r2
-; nextln: br %r14
+; block0:
+;   lbr %r2, %r2
+;   br %r14
 
 function %sextend_i8_i16(i8) -> i16 {
 block0(v0: i8):
@@ -115,13 +121,9 @@ block0(v0: i8):
   return v1
 }
 
-; check:  lbr %r2, %r2
-; nextln: br %r14
-
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; IREDUCE
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   lbr %r2, %r2
+;   br %r14
 
 function %ireduce_i64_i32(i64, i64) -> i32 {
 block0(v0: i64, v1: i64):
@@ -129,8 +131,9 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  lr %r2, %r3
-; nextln: br %r14
+; block0:
+;   lgr %r2, %r3
+;   br %r14
 
 function %ireduce_i64_i16(i64, i64) -> i16 {
 block0(v0: i64, v1: i64):
@@ -138,8 +141,9 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  lr %r2, %r3
-; nextln: br %r14
+; block0:
+;   lgr %r2, %r3
+;   br %r14
 
 function %ireduce_i64_i8(i64, i64) -> i8 {
 block0(v0: i64, v1: i64):
@@ -147,8 +151,9 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  lr %r2, %r3
-; nextln: br %r14
+; block0:
+;   lgr %r2, %r3
+;   br %r14
 
 function %ireduce_i32_i16(i32, i32) -> i16 {
 block0(v0: i32, v1: i32):
@@ -156,8 +161,9 @@ block0(v0: i32, v1: i32):
   return v2
 }
 
-; check:  lr %r2, %r3
-; nextln: br %r14
+; block0:
+;   lgr %r2, %r3
+;   br %r14
 
 function %ireduce_i32_i8(i32, i32) -> i8 {
 block0(v0: i32, v1: i32):
@@ -165,8 +171,9 @@ block0(v0: i32, v1: i32):
   return v2
 }
 
-; check:  lr %r2, %r3
-; nextln: br %r14
+; block0:
+;   lgr %r2, %r3
+;   br %r14
 
 function %ireduce_i16_i8(i16, i16) -> i8 {
 block0(v0: i16, v1: i16):
@@ -174,12 +181,9 @@ block0(v0: i16, v1: i16):
   return v2
 }
 
-; check:  lr %r2, %r3
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; BEXTEND
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   lgr %r2, %r3
+;   br %r14
 
 function %bextend_b32_b64(b32) -> b64 {
 block0(v0: b32):
@@ -187,8 +191,9 @@ block0(v0: b32):
   return v1
 }
 
-; check:  lgfr %r2, %r2
-; nextln: br %r14
+; block0:
+;   lgfr %r2, %r2
+;   br %r14
 
 function %bextend_b16_b64(b16) -> b64 {
 block0(v0: b16):
@@ -196,8 +201,9 @@ block0(v0: b16):
   return v1
 }
 
-; check:  lghr %r2, %r2
-; nextln: br %r14
+; block0:
+;   lghr %r2, %r2
+;   br %r14
 
 function %bextend_b16_b32(b16) -> b32 {
 block0(v0: b16):
@@ -205,8 +211,9 @@ block0(v0: b16):
   return v1
 }
 
-; check:  lhr %r2, %r2
-; nextln: br %r14
+; block0:
+;   lhr %r2, %r2
+;   br %r14
 
 function %bextend_b8_b64(b8) -> b64 {
 block0(v0: b8):
@@ -214,8 +221,9 @@ block0(v0: b8):
   return v1
 }
 
-; check:  lgbr %r2, %r2
-; nextln: br %r14
+; block0:
+;   lgbr %r2, %r2
+;   br %r14
 
 function %bextend_b8_b32(b8) -> b32 {
 block0(v0: b8):
@@ -223,8 +231,9 @@ block0(v0: b8):
   return v1
 }
 
-; check:  lbr %r2, %r2
-; nextln: br %r14
+; block0:
+;   lbr %r2, %r2
+;   br %r14
 
 function %bextend_b8_b16(b8) -> b16 {
 block0(v0: b8):
@@ -232,8 +241,9 @@ block0(v0: b8):
   return v1
 }
 
-; check:  lbr %r2, %r2
-; nextln: br %r14
+; block0:
+;   lbr %r2, %r2
+;   br %r14
 
 function %bextend_b1_b64(b1) -> b64 {
 block0(v0: b1):
@@ -241,9 +251,10 @@ block0(v0: b1):
   return v1
 }
 
-; check:  sllg %r2, %r2, 63
-; nextln: srag %r2, %r2, 63
-; nextln: br %r14
+; block0:
+;   sllg %r5, %r2, 63
+;   srag %r2, %r5, 63
+;   br %r14
 
 function %bextend_b1_b32(b1) -> b32 {
 block0(v0: b1):
@@ -251,9 +262,10 @@ block0(v0: b1):
   return v1
 }
 
-; check:  sllk %r2, %r2, 31
-; nextln: srak %r2, %r2, 31
-; nextln: br %r14
+; block0:
+;   sllk %r5, %r2, 31
+;   srak %r2, %r5, 31
+;   br %r14
 
 function %bextend_b1_b16(b1) -> b16 {
 block0(v0: b1):
@@ -261,9 +273,10 @@ block0(v0: b1):
   return v1
 }
 
-; check:  sllk %r2, %r2, 31
-; nextln: srak %r2, %r2, 31
-; nextln: br %r14
+; block0:
+;   sllk %r5, %r2, 31
+;   srak %r2, %r5, 31
+;   br %r14
 
 function %bextend_b1_b8(b1) -> b8 {
 block0(v0: b1):
@@ -271,13 +284,10 @@ block0(v0: b1):
   return v1
 }
 
-; check:  sllk %r2, %r2, 31
-; nextln: srak %r2, %r2, 31
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; BREDUCE
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   sllk %r5, %r2, 31
+;   srak %r2, %r5, 31
+;   br %r14
 
 function %breduce_b64_b32(b64, b64) -> b32 {
 block0(v0: b64, v1: b64):
@@ -285,8 +295,9 @@ block0(v0: b64, v1: b64):
   return v2
 }
 
-; check:  lr %r2, %r3
-; nextln: br %r14
+; block0:
+;   lgr %r2, %r3
+;   br %r14
 
 function %breduce_b64_b16(b64, b64) -> b16 {
 block0(v0: b64, v1: b64):
@@ -294,8 +305,9 @@ block0(v0: b64, v1: b64):
   return v2
 }
 
-; check:  lr %r2, %r3
-; nextln: br %r14
+; block0:
+;   lgr %r2, %r3
+;   br %r14
 
 function %breduce_b64_b8(b64, b64) -> b8 {
 block0(v0: b64, v1: b64):
@@ -303,8 +315,9 @@ block0(v0: b64, v1: b64):
   return v2
 }
 
-; check:  lr %r2, %r3
-; nextln: br %r14
+; block0:
+;   lgr %r2, %r3
+;   br %r14
 
 function %breduce_b64_b1(b64, b64) -> b1 {
 block0(v0: b64, v1: b64):
@@ -312,8 +325,9 @@ block0(v0: b64, v1: b64):
   return v2
 }
 
-; check:  lr %r2, %r3
-; nextln: br %r14
+; block0:
+;   lgr %r2, %r3
+;   br %r14
 
 function %breduce_b32_b16(b32, b32) -> b16 {
 block0(v0: b32, v1: b32):
@@ -321,8 +335,9 @@ block0(v0: b32, v1: b32):
   return v2
 }
 
-; check:  lr %r2, %r3
-; nextln: br %r14
+; block0:
+;   lgr %r2, %r3
+;   br %r14
 
 function %breduce_b32_b8(b32, b32) -> b8 {
 block0(v0: b32, v1: b32):
@@ -330,8 +345,9 @@ block0(v0: b32, v1: b32):
   return v2
 }
 
-; check:  lr %r2, %r3
-; nextln: br %r14
+; block0:
+;   lgr %r2, %r3
+;   br %r14
 
 function %breduce_b32_b1(b32, b32) -> b1 {
 block0(v0: b32, v1: b32):
@@ -339,8 +355,9 @@ block0(v0: b32, v1: b32):
   return v2
 }
 
-; check:  lr %r2, %r3
-; nextln: br %r14
+; block0:
+;   lgr %r2, %r3
+;   br %r14
 
 function %breduce_b16_b8(b16, b16) -> b8 {
 block0(v0: b16, v1: b16):
@@ -348,8 +365,9 @@ block0(v0: b16, v1: b16):
   return v2
 }
 
-; check:  lr %r2, %r3
-; nextln: br %r14
+; block0:
+;   lgr %r2, %r3
+;   br %r14
 
 function %breduce_b16_b1(b16, b16) -> b1 {
 block0(v0: b16, v1: b16):
@@ -357,8 +375,9 @@ block0(v0: b16, v1: b16):
   return v2
 }
 
-; check:  lr %r2, %r3
-; nextln: br %r14
+; block0:
+;   lgr %r2, %r3
+;   br %r14
 
 function %breduce_b8_b1(b8, b8) -> b1 {
 block0(v0: b8, v1: b8):
@@ -366,12 +385,9 @@ block0(v0: b8, v1: b8):
   return v2
 }
 
-; check:  lr %r2, %r3
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; BMASK
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   lgr %r2, %r3
+;   br %r14
 
 function %bmask_b64_i64(b64, b64) -> i64 {
 block0(v0: b64, v1: b64):
@@ -379,8 +395,9 @@ block0(v0: b64, v1: b64):
   return v2
 }
 
-; check:  lgr %r2, %r3
-; nextln: br %r14
+; block0:
+;   lgr %r2, %r3
+;   br %r14
 
 function %bmask_b64_i32(b64, b64) -> i32 {
 block0(v0: b64, v1: b64):
@@ -388,8 +405,9 @@ block0(v0: b64, v1: b64):
   return v2
 }
 
-; check:  lr %r2, %r3
-; nextln: br %r14
+; block0:
+;   lgr %r2, %r3
+;   br %r14
 
 function %bmask_b64_i16(b64, b64) -> i16 {
 block0(v0: b64, v1: b64):
@@ -397,8 +415,9 @@ block0(v0: b64, v1: b64):
   return v2
 }
 
-; check:  lr %r2, %r3
-; nextln: br %r14
+; block0:
+;   lgr %r2, %r3
+;   br %r14
 
 function %bmask_b64_i8(b64, b64) -> i8 {
 block0(v0: b64, v1: b64):
@@ -406,8 +425,9 @@ block0(v0: b64, v1: b64):
   return v2
 }
 
-; check:  lr %r2, %r3
-; nextln: br %r14
+; block0:
+;   lgr %r2, %r3
+;   br %r14
 
 function %bmask_b32_i64(b32, b32) -> i64 {
 block0(v0: b32, v1: b32):
@@ -415,8 +435,9 @@ block0(v0: b32, v1: b32):
   return v2
 }
 
-; check:  lgfr %r2, %r3
-; nextln: br %r14
+; block0:
+;   lgfr %r2, %r3
+;   br %r14
 
 function %bmask_b32_i32(b32, b32) -> i32 {
 block0(v0: b32, v1: b32):
@@ -424,8 +445,9 @@ block0(v0: b32, v1: b32):
   return v2
 }
 
-; check:  lr %r2, %r3
-; nextln: br %r14
+; block0:
+;   lgr %r2, %r3
+;   br %r14
 
 function %bmask_b32_i16(b32, b32) -> i16 {
 block0(v0: b32, v1: b32):
@@ -433,8 +455,9 @@ block0(v0: b32, v1: b32):
   return v2
 }
 
-; check:  lr %r2, %r3
-; nextln: br %r14
+; block0:
+;   lgr %r2, %r3
+;   br %r14
 
 function %bmask_b32_i8(b32, b32) -> i8 {
 block0(v0: b32, v1: b32):
@@ -442,8 +465,9 @@ block0(v0: b32, v1: b32):
   return v2
 }
 
-; check:  lr %r2, %r3
-; nextln: br %r14
+; block0:
+;   lgr %r2, %r3
+;   br %r14
 
 function %bmask_b16_i64(b16, b16) -> i64 {
 block0(v0: b16, v1: b16):
@@ -451,8 +475,9 @@ block0(v0: b16, v1: b16):
   return v2
 }
 
-; check:  lghr %r2, %r3
-; nextln: br %r14
+; block0:
+;   lghr %r2, %r3
+;   br %r14
 
 function %bmask_b16_i32(b16, b16) -> i32 {
 block0(v0: b16, v1: b16):
@@ -460,8 +485,9 @@ block0(v0: b16, v1: b16):
   return v2
 }
 
-; check:  lhr %r2, %r3
-; nextln: br %r14
+; block0:
+;   lhr %r2, %r3
+;   br %r14
 
 function %bmask_b16_i16(b16, b16) -> i16 {
 block0(v0: b16, v1: b16):
@@ -469,8 +495,9 @@ block0(v0: b16, v1: b16):
   return v2
 }
 
-; check:  lr %r2, %r3
-; nextln: br %r14
+; block0:
+;   lgr %r2, %r3
+;   br %r14
 
 function %bmask_b16_i8(b16, b16) -> i8 {
 block0(v0: b16, v1: b16):
@@ -478,8 +505,9 @@ block0(v0: b16, v1: b16):
   return v2
 }
 
-; check:  lr %r2, %r3
-; nextln: br %r14
+; block0:
+;   lgr %r2, %r3
+;   br %r14
 
 function %bmask_b8_i64(b8, b8) -> i64 {
 block0(v0: b8, v1: b8):
@@ -487,8 +515,9 @@ block0(v0: b8, v1: b8):
   return v2
 }
 
-; check:  lgbr %r2, %r3
-; nextln: br %r14
+; block0:
+;   lgbr %r2, %r3
+;   br %r14
 
 function %bmask_b8_i32(b8, b8) -> i32 {
 block0(v0: b8, v1: b8):
@@ -496,8 +525,9 @@ block0(v0: b8, v1: b8):
   return v2
 }
 
-; check:  lbr %r2, %r3
-; nextln: br %r14
+; block0:
+;   lbr %r2, %r3
+;   br %r14
 
 function %bmask_b8_i16(b8, b8) -> i16 {
 block0(v0: b8, v1: b8):
@@ -505,8 +535,9 @@ block0(v0: b8, v1: b8):
   return v2
 }
 
-; check:  lbr %r2, %r3
-; nextln: br %r14
+; block0:
+;   lbr %r2, %r3
+;   br %r14
 
 function %bmask_b8_i8(b8, b8) -> i8 {
 block0(v0: b8, v1: b8):
@@ -514,8 +545,9 @@ block0(v0: b8, v1: b8):
   return v2
 }
 
-; check:  lr %r2, %r3
-; nextln: br %r14
+; block0:
+;   lgr %r2, %r3
+;   br %r14
 
 function %bmask_b1_i64(b1, b1) -> i64 {
 block0(v0: b1, v1: b1):
@@ -523,9 +555,10 @@ block0(v0: b1, v1: b1):
   return v2
 }
 
-; check:  sllg %r2, %r3, 63
-; nextln: srag %r2, %r2, 63
-; nextln: br %r14
+; block0:
+;   sllg %r3, %r3, 63
+;   srag %r2, %r3, 63
+;   br %r14
 
 function %bmask_b1_i32(b1, b1) -> i32 {
 block0(v0: b1, v1: b1):
@@ -533,9 +566,10 @@ block0(v0: b1, v1: b1):
   return v2
 }
 
-; check:  sllk %r2, %r3, 31
-; nextln: srak %r2, %r2, 31
-; nextln: br %r14
+; block0:
+;   sllk %r3, %r3, 31
+;   srak %r2, %r3, 31
+;   br %r14
 
 function %bmask_b1_i16(b1, b1) -> i16 {
 block0(v0: b1, v1: b1):
@@ -543,9 +577,10 @@ block0(v0: b1, v1: b1):
   return v2
 }
 
-; check:  sllk %r2, %r3, 31
-; nextln: srak %r2, %r2, 31
-; nextln: br %r14
+; block0:
+;   sllk %r3, %r3, 31
+;   srak %r2, %r3, 31
+;   br %r14
 
 function %bmask_b1_i8(b1, b1) -> i8 {
 block0(v0: b1, v1: b1):
@@ -553,13 +588,10 @@ block0(v0: b1, v1: b1):
   return v2
 }
 
-; check:  sllk %r2, %r3, 31
-; nextln: srak %r2, %r2, 31
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; BINT
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   sllk %r3, %r3, 31
+;   srak %r2, %r3, 31
+;   br %r14
 
 function %bint_b64_i64(b64) -> i64 {
 block0(v0: b64):
@@ -567,9 +599,10 @@ block0(v0: b64):
   return v1
 }
 
-; check:  lghi %r3, 1
-; nextln: ngr %r2, %r3
-; nextln: br %r14
+; block0:
+;   lghi %r5, 1
+;   ngr %r2, %r5
+;   br %r14
 
 function %bint_b64_i32(b64) -> i32 {
 block0(v0: b64):
@@ -577,8 +610,9 @@ block0(v0: b64):
   return v1
 }
 
-; check:  nilf %r2, 1
-; nextln: br %r14
+; block0:
+;   nilf %r2, 1
+;   br %r14
 
 function %bint_b64_i16(b64) -> i16 {
 block0(v0: b64):
@@ -586,8 +620,9 @@ block0(v0: b64):
   return v1
 }
 
-; check:  nill %r2, 1
-; nextln: br %r14
+; block0:
+;   nill %r2, 1
+;   br %r14
 
 function %bint_b64_i8(b64) -> i8 {
 block0(v0: b64):
@@ -595,8 +630,9 @@ block0(v0: b64):
   return v1
 }
 
-; check:  nill %r2, 1
-; nextln: br %r14
+; block0:
+;   nill %r2, 1
+;   br %r14
 
 function %bint_b32_i64(b32) -> i64 {
 block0(v0: b32):
@@ -604,9 +640,10 @@ block0(v0: b32):
   return v1
 }
 
-; check:  lghi %r3, 1
-; nextln: ngr %r2, %r3
-; nextln: br %r14
+; block0:
+;   lghi %r5, 1
+;   ngr %r2, %r5
+;   br %r14
 
 function %bint_b32_i32(b32) -> i32 {
 block0(v0: b32):
@@ -614,8 +651,9 @@ block0(v0: b32):
   return v1
 }
 
-; check:  nilf %r2, 1
-; nextln: br %r14
+; block0:
+;   nilf %r2, 1
+;   br %r14
 
 function %bint_b32_i16(b32) -> i16 {
 block0(v0: b32):
@@ -623,8 +661,9 @@ block0(v0: b32):
   return v1
 }
 
-; check:  nill %r2, 1
-; nextln: br %r14
+; block0:
+;   nill %r2, 1
+;   br %r14
 
 function %bint_b32_i8(b32) -> i8 {
 block0(v0: b32):
@@ -632,8 +671,9 @@ block0(v0: b32):
   return v1
 }
 
-; check:  nill %r2, 1
-; nextln: br %r14
+; block0:
+;   nill %r2, 1
+;   br %r14
 
 function %bint_b16_i64(b16) -> i64 {
 block0(v0: b16):
@@ -641,9 +681,10 @@ block0(v0: b16):
   return v1
 }
 
-; check:  lghi %r3, 1
-; nextln: ngr %r2, %r3
-; nextln: br %r14
+; block0:
+;   lghi %r5, 1
+;   ngr %r2, %r5
+;   br %r14
 
 function %bint_b16_i32(b16) -> i32 {
 block0(v0: b16):
@@ -651,8 +692,9 @@ block0(v0: b16):
   return v1
 }
 
-; check:  nilf %r2, 1
-; nextln: br %r14
+; block0:
+;   nilf %r2, 1
+;   br %r14
 
 function %bint_b16_i16(b16) -> i16 {
 block0(v0: b16):
@@ -660,8 +702,9 @@ block0(v0: b16):
   return v1
 }
 
-; check:  nill %r2, 1
-; nextln: br %r14
+; block0:
+;   nill %r2, 1
+;   br %r14
 
 function %bint_b16_i8(b16) -> i8 {
 block0(v0: b16):
@@ -669,8 +712,9 @@ block0(v0: b16):
   return v1
 }
 
-; check:  nill %r2, 1
-; nextln: br %r14
+; block0:
+;   nill %r2, 1
+;   br %r14
 
 function %bint_b8_i64(b8) -> i64 {
 block0(v0: b8):
@@ -678,9 +722,10 @@ block0(v0: b8):
   return v1
 }
 
-; check:  lghi %r3, 1
-; nextln: ngr %r2, %r3
-; nextln: br %r14
+; block0:
+;   lghi %r5, 1
+;   ngr %r2, %r5
+;   br %r14
 
 function %bint_b8_i32(b8) -> i32 {
 block0(v0: b8):
@@ -688,8 +733,9 @@ block0(v0: b8):
   return v1
 }
 
-; check:  nilf %r2, 1
-; nextln: br %r14
+; block0:
+;   nilf %r2, 1
+;   br %r14
 
 function %bint_b8_i16(b8) -> i16 {
 block0(v0: b8):
@@ -697,8 +743,9 @@ block0(v0: b8):
   return v1
 }
 
-; check:  nill %r2, 1
-; nextln: br %r14
+; block0:
+;   nill %r2, 1
+;   br %r14
 
 function %bint_b8_i8(b8) -> i8 {
 block0(v0: b8):
@@ -706,8 +753,9 @@ block0(v0: b8):
   return v1
 }
 
-; check:  nill %r2, 1
-; nextln: br %r14
+; block0:
+;   nill %r2, 1
+;   br %r14
 
 function %bint_b1_i64(b1) -> i64 {
 block0(v0: b1):
@@ -715,9 +763,10 @@ block0(v0: b1):
   return v1
 }
 
-; check:  lghi %r3, 1
-; nextln: ngr %r2, %r3
-; nextln: br %r14
+; block0:
+;   lghi %r5, 1
+;   ngr %r2, %r5
+;   br %r14
 
 function %bint_b1_i32(b1) -> i32 {
 block0(v0: b1):
@@ -725,8 +774,9 @@ block0(v0: b1):
   return v1
 }
 
-; check:  nilf %r2, 1
-; nextln: br %r14
+; block0:
+;   nilf %r2, 1
+;   br %r14
 
 function %bint_b1_i16(b1) -> i16 {
 block0(v0: b1):
@@ -734,8 +784,9 @@ block0(v0: b1):
   return v1
 }
 
-; check:  nill %r2, 1
-; nextln: br %r14
+; block0:
+;   nill %r2, 1
+;   br %r14
 
 function %bint_b1_i8(b1) -> i8 {
 block0(v0: b1):
@@ -743,6 +794,7 @@ block0(v0: b1):
   return v1
 }
 
-; check:  nill %r2, 1
-; nextln: br %r14
+; block0:
+;   nill %r2, 1
+;   br %r14
 
diff --git a/cranelift/filetests/filetests/isa/s390x/div-traps.clif b/cranelift/filetests/filetests/isa/s390x/div-traps.clif
index 2d7428b50d..76aaba8d67 100644
--- a/cranelift/filetests/filetests/isa/s390x/div-traps.clif
+++ b/cranelift/filetests/filetests/isa/s390x/div-traps.clif
@@ -1,4 +1,4 @@
-test compile
+test compile precise-output
 set avoid_div_traps=1
 target s390x
 
@@ -12,16 +12,17 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  lgr %r1, %r2
-; nextln: cgite %r3, 0
-; nextln: llihf %r2, 2147483647
-; nextln: iilf %r2, 4294967295
-; nextln: xgr %r2, %r1
-; nextln: ngr %r2, %r3
-; nextln: cgite %r2, -1
-; nextln: dsgr %r0, %r3
-; nextln: lgr %r2, %r1
-; nextln: br %r14
+; block0:
+;   lgr %r1, %r2
+;   cgite %r3, 0
+;   llihf %r5, 2147483647
+;   iilf %r5, 4294967295
+;   xgrk %r4, %r5, %r1
+;   ngrk %r2, %r4, %r3
+;   cgite %r2, -1
+;   dsgr %r0, %r3
+;   lgr %r2, %r1
+;   br %r14
 
 function %sdiv_i64_imm(i64) -> i64 {
 block0(v0: i64):
@@ -30,11 +31,12 @@ block0(v0: i64):
   return v2
 }
 
-; check:  lgr %r1, %r2
-; nextln: lghi %r2, 2
-; nextln: dsgr %r0, %r2
-; nextln: lgr %r2, %r1
-; nextln: br %r14
+; block0:
+;   lgr %r1, %r2
+;   lghi %r2, 2
+;   dsgr %r0, %r2
+;   lgr %r2, %r1
+;   br %r14
 
 function %sdiv_i32(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
@@ -42,15 +44,16 @@ block0(v0: i32, v1: i32):
   return v2
 }
 
-; check:  lgfr %r1, %r2
-; nextln: cite %r3, 0
-; nextln: iilf %r2, 2147483647
-; nextln: xr %r2, %r1
-; nextln: nr %r2, %r3
-; nextln: cite %r2, -1
-; nextln: dsgfr %r0, %r3
-; nextln: lr %r2, %r1
-; nextln: br %r14
+; block0:
+;   lgfr %r1, %r2
+;   cite %r3, 0
+;   iilf %r5, 2147483647
+;   xrk %r4, %r5, %r1
+;   nrk %r5, %r4, %r3
+;   cite %r5, -1
+;   dsgfr %r0, %r3
+;   lgr %r2, %r1
+;   br %r14
 
 function %sdiv_i32_imm(i32) -> i32 {
 block0(v0: i32):
@@ -59,11 +62,12 @@ block0(v0: i32):
   return v2
 }
 
-; check:  lgfr %r1, %r2
-; nextln: lhi %r2, 2
-; nextln: dsgfr %r0, %r2
-; nextln: lr %r2, %r1
-; nextln: br %r14
+; block0:
+;   lgfr %r1, %r2
+;   lhi %r2, 2
+;   dsgfr %r0, %r2
+;   lgr %r2, %r1
+;   br %r14
 
 function %sdiv_i16(i16, i16) -> i16 {
 block0(v0: i16, v1: i16):
@@ -71,16 +75,17 @@ block0(v0: i16, v1: i16):
   return v2
 }
 
-; check:  lghr %r1, %r2
-; nextln: lhr %r2, %r3
-; nextln: cite %r2, 0
-; nextln: lhi %r3, 32767
-; nextln: xr %r3, %r1
-; nextln: nr %r3, %r2
-; nextln: cite %r3, -1
-; nextln: dsgfr %r0, %r2
-; nextln: lr %r2, %r1
-; nextln: br %r14
+; block0:
+;   lghr %r1, %r2
+;   lhr %r4, %r3
+;   cite %r4, 0
+;   lhi %r3, 32767
+;   xrk %r5, %r3, %r1
+;   nrk %r3, %r5, %r4
+;   cite %r3, -1
+;   dsgfr %r0, %r4
+;   lgr %r2, %r1
+;   br %r14
 
 function %sdiv_i16_imm(i16) -> i16 {
 block0(v0: i16):
@@ -89,11 +94,12 @@ block0(v0: i16):
   return v2
 }
 
-; check:  lghr %r1, %r2
-; nextln: lhi %r2, 2
-; nextln: dsgfr %r0, %r2
-; nextln: lr %r2, %r1
-; nextln: br %r14
+; block0:
+;   lghr %r1, %r2
+;   lhi %r2, 2
+;   dsgfr %r0, %r2
+;   lgr %r2, %r1
+;   br %r14
 
 function %sdiv_i8(i8, i8) -> i8 {
 block0(v0: i8, v1: i8):
@@ -101,16 +107,17 @@ block0(v0: i8, v1: i8):
   return v2
 }
 
-; check:  lgbr %r1, %r2
-; nextln: lbr %r2, %r3
-; nextln: cite %r2, 0
-; nextln: lhi %r3, 127
-; nextln: xr %r3, %r1
-; nextln: nr %r3, %r2
-; nextln: cite %r3, -1
-; nextln: dsgfr %r0, %r2
-; nextln: lr %r2, %r1
-; nextln: br %r14
+; block0:
+;   lgbr %r1, %r2
+;   lbr %r4, %r3
+;   cite %r4, 0
+;   lhi %r3, 127
+;   xrk %r5, %r3, %r1
+;   nrk %r3, %r5, %r4
+;   cite %r3, -1
+;   dsgfr %r0, %r4
+;   lgr %r2, %r1
+;   br %r14
 
 function %sdiv_i8_imm(i8) -> i8 {
 block0(v0: i8):
@@ -119,15 +126,12 @@ block0(v0: i8):
   return v2
 }
 
-; check:  lgbr %r1, %r2
-; nextln: lhi %r2, 2
-; nextln: dsgfr %r0, %r2
-; nextln: lr %r2, %r1
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; UDIV
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   lgbr %r1, %r2
+;   lhi %r2, 2
+;   dsgfr %r0, %r2
+;   lgr %r2, %r1
+;   br %r14
 
 function %udiv_i64(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -135,12 +139,13 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  lghi %r0, 0
-; nextln: lgr %r1, %r2
-; nextln: cgite %r3, 0
-; nextln: dlgr %r0, %r3
-; nextln: lgr %r2, %r1
-; nextln: br %r14
+; block0:
+;   lghi %r0, 0
+;   lgr %r1, %r2
+;   cgite %r3, 0
+;   dlgr %r0, %r3
+;   lgr %r2, %r1
+;   br %r14
 
 function %udiv_i64_imm(i64) -> i64 {
 block0(v0: i64):
@@ -149,12 +154,13 @@ block0(v0: i64):
   return v2
 }
 
-; check:  lghi %r0, 0
-; nextln: lgr %r1, %r2
-; nextln: lghi %r2, 2
-; nextln: dlgr %r0, %r2
-; nextln: lgr %r2, %r1
-; nextln: br %r14
+; block0:
+;   lghi %r0, 0
+;   lgr %r1, %r2
+;   lghi %r3, 2
+;   dlgr %r0, %r3
+;   lgr %r2, %r1
+;   br %r14
 
 function %udiv_i32(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
@@ -162,12 +168,13 @@ block0(v0: i32, v1: i32):
   return v2
 }
 
-; check:  lhi %r0, 0
-; nextln: lr %r1, %r2
-; nextln: cite %r3, 0
-; nextln: dlr %r0, %r3
-; nextln: lr %r2, %r1
-; nextln: br %r14
+; block0:
+;   lhi %r0, 0
+;   lgr %r1, %r2
+;   cite %r3, 0
+;   dlr %r0, %r3
+;   lgr %r2, %r1
+;   br %r14
 
 function %udiv_i32_imm(i32) -> i32 {
 block0(v0: i32):
@@ -176,12 +183,13 @@ block0(v0: i32):
   return v2
 }
 
-; check:  lhi %r0, 0
-; nextln: lr %r1, %r2
-; nextln: lhi %r2, 2
-; nextln: dlr %r0, %r2
-; nextln: lr %r2, %r1
-; nextln: br %r14
+; block0:
+;   lhi %r0, 0
+;   lgr %r1, %r2
+;   lhi %r3, 2
+;   dlr %r0, %r3
+;   lgr %r2, %r1
+;   br %r14
 
 function %udiv_i16(i16, i16) -> i16 {
 block0(v0: i16, v1: i16):
@@ -189,13 +197,14 @@ block0(v0: i16, v1: i16):
   return v2
 }
 
-; check:  lhi %r0, 0
-; nextln: llhr %r1, %r2
-; nextln: llhr %r2, %r3
-; nextln: cite %r2, 0
-; nextln: dlr %r0, %r2
-; nextln: lr %r2, %r1
-; nextln: br %r14
+; block0:
+;   lhi %r0, 0
+;   llhr %r1, %r2
+;   llhr %r5, %r3
+;   cite %r5, 0
+;   dlr %r0, %r5
+;   lgr %r2, %r1
+;   br %r14
 
 function %udiv_i16_imm(i16) -> i16 {
 block0(v0: i16):
@@ -204,12 +213,13 @@ block0(v0: i16):
   return v2
 }
 
-; check:  lhi %r0, 0
-; nextln: llhr %r1, %r2
-; nextln: lhi %r2, 2
-; nextln: dlr %r0, %r2
-; nextln: lr %r2, %r1
-; nextln: br %r14
+; block0:
+;   lhi %r0, 0
+;   llhr %r1, %r2
+;   lhi %r3, 2
+;   dlr %r0, %r3
+;   lgr %r2, %r1
+;   br %r14
 
 function %udiv_i8(i8, i8) -> i8 {
 block0(v0: i8, v1: i8):
@@ -217,13 +227,14 @@ block0(v0: i8, v1: i8):
   return v2
 }
 
-; check:  lhi %r0, 0
-; nextln: llcr %r1, %r2
-; nextln: llcr %r2, %r3
-; nextln: cite %r2, 0
-; nextln: dlr %r0, %r2
-; nextln: lr %r2, %r1
-; nextln: br %r14
+; block0:
+;   lhi %r0, 0
+;   llcr %r1, %r2
+;   llcr %r5, %r3
+;   cite %r5, 0
+;   dlr %r0, %r5
+;   lgr %r2, %r1
+;   br %r14
 
 function %udiv_i8_imm(i8) -> i8 {
 block0(v0: i8):
@@ -232,16 +243,13 @@ block0(v0: i8):
   return v2
 }
 
-; check:  lhi %r0, 0
-; nextln: llcr %r1, %r2
-; nextln: lhi %r2, 2
-; nextln: dlr %r0, %r2
-; nextln: lr %r2, %r1
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; SREM
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   lhi %r0, 0
+;   llcr %r1, %r2
+;   lhi %r3, 2
+;   dlr %r0, %r3
+;   lgr %r2, %r1
+;   br %r14
 
 function %srem_i64(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -249,13 +257,14 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  lgr %r1, %r2
-; nextln: cgite %r3, 0
-; nextln: cghi %r3, -1
-; nextln: locghie %r1, 0
-; nextln: dsgr %r0, %r3
-; nextln: lgr %r2, %r0
-; nextln: br %r14
+; block0:
+;   lgr %r1, %r2
+;   cgite %r3, 0
+;   cghi %r3, -1
+;   locghie %r1, 0
+;   dsgr %r0, %r3
+;   lgr %r2, %r0
+;   br %r14
 
 function %srem_i32(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
@@ -263,11 +272,12 @@ block0(v0: i32, v1: i32):
   return v2
 }
 
-; check:  lgfr %r1, %r2
-; nextln: cite %r3, 0
-; nextln: dsgfr %r0, %r3
-; nextln: lr %r2, %r0
-; nextln: br %r14
+; block0:
+;   lgfr %r1, %r2
+;   cite %r3, 0
+;   dsgfr %r0, %r3
+;   lgr %r2, %r0
+;   br %r14
 
 function %srem_i16(i16, i16) -> i16 {
 block0(v0: i16, v1: i16):
@@ -275,12 +285,13 @@ block0(v0: i16, v1: i16):
   return v2
 }
 
-; check:  lghr %r1, %r2
-; nextln: lhr %r2, %r3
-; nextln: cite %r2, 0
-; nextln: dsgfr %r0, %r2
-; nextln: lr %r2, %r0
-; nextln: br %r14
+; block0:
+;   lghr %r1, %r2
+;   lhr %r4, %r3
+;   cite %r4, 0
+;   dsgfr %r0, %r4
+;   lgr %r2, %r0
+;   br %r14
 
 function %srem_i8(i8, i8) -> i8 {
 block0(v0: i8, v1: i8):
@@ -288,16 +299,13 @@ block0(v0: i8, v1: i8):
   return v2
 }
 
-; check:  lgbr %r1, %r2
-; nextln: lbr %r2, %r3
-; nextln: cite %r2, 0
-; nextln: dsgfr %r0, %r2
-; nextln: lr %r2, %r0
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; UREM
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   lgbr %r1, %r2
+;   lbr %r4, %r3
+;   cite %r4, 0
+;   dsgfr %r0, %r4
+;   lgr %r2, %r0
+;   br %r14
 
 function %urem_i64(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -305,12 +313,13 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  lghi %r0, 0
-; nextln: lgr %r1, %r2
-; nextln: cgite %r3, 0
-; nextln: dlgr %r0, %r3
-; nextln: lgr %r2, %r0
-; nextln: br %r14
+; block0:
+;   lghi %r0, 0
+;   lgr %r1, %r2
+;   cgite %r3, 0
+;   dlgr %r0, %r3
+;   lgr %r2, %r0
+;   br %r14
 
 function %urem_i32(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
@@ -318,12 +327,13 @@ block0(v0: i32, v1: i32):
   return v2
 }
 
-; check:  lhi %r0, 0
-; nextln: lr %r1, %r2
-; nextln: cite %r3, 0
-; nextln: dlr %r0, %r3
-; nextln: lr %r2, %r0
-; nextln: br %r14
+; block0:
+;   lhi %r0, 0
+;   lgr %r1, %r2
+;   cite %r3, 0
+;   dlr %r0, %r3
+;   lgr %r2, %r0
+;   br %r14
 
 function %urem_i16(i16, i16) -> i16 {
 block0(v0: i16, v1: i16):
@@ -331,13 +341,14 @@ block0(v0: i16, v1: i16):
   return v2
 }
 
-; check:  lhi %r0, 0
-; nextln: llhr %r1, %r2
-; nextln: llhr %r2, %r3
-; nextln: cite %r2, 0
-; nextln: dlr %r0, %r2
-; nextln: lr %r2, %r0
-; nextln: br %r14
+; block0:
+;   lhi %r0, 0
+;   llhr %r1, %r2
+;   llhr %r5, %r3
+;   cite %r5, 0
+;   dlr %r0, %r5
+;   lgr %r2, %r0
+;   br %r14
 
 function %urem_i8(i8, i8) -> i8 {
 block0(v0: i8, v1: i8):
@@ -345,11 +356,12 @@ block0(v0: i8, v1: i8):
   return v2
 }
 
-; check:  lhi %r0, 0
-; nextln: llcr %r1, %r2
-; nextln: llcr %r2, %r3
-; nextln: cite %r2, 0
-; nextln: dlr %r0, %r2
-; nextln: lr %r2, %r0
-; nextln: br %r14
+; block0:
+;   lhi %r0, 0
+;   llcr %r1, %r2
+;   llcr %r5, %r3
+;   cite %r5, 0
+;   dlr %r0, %r5
+;   lgr %r2, %r0
+;   br %r14
 
diff --git a/cranelift/filetests/filetests/isa/s390x/fence.clif b/cranelift/filetests/filetests/isa/s390x/fence.clif
index 9e97a9f12c..2439ec7a2e 100644
--- a/cranelift/filetests/filetests/isa/s390x/fence.clif
+++ b/cranelift/filetests/filetests/isa/s390x/fence.clif
@@ -1,4 +1,4 @@
-test compile
+test compile precise-output
 target s390x
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -11,7 +11,7 @@ block0:
   return
 }
 
-; check:  bcr 14, 0
-; nextln: br %r14
-
+; block0:
+;   bcr 14, 0
+;   br %r14
 
diff --git a/cranelift/filetests/filetests/isa/s390x/floating-point.clif b/cranelift/filetests/filetests/isa/s390x/floating-point.clif
index 6f95c82487..11a5292c8d 100644
--- a/cranelift/filetests/filetests/isa/s390x/floating-point.clif
+++ b/cranelift/filetests/filetests/isa/s390x/floating-point.clif
@@ -1,4 +1,4 @@
-test compile
+test compile precise-output
 target s390x
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -14,8 +14,9 @@ block0:
   return v1
 }
 
-; check:  bras %r1, 8 ; data.f32 0 ; le %f0, 0(%r1)
-; nextln: br %r14
+; block0:
+;   bras %r1, 8 ; data.f32 0 ; le %f0, 0(%r1)
+;   br %r14
 
 function %f64const_zero() -> f64 {
 block0:
@@ -23,8 +24,9 @@ block0:
   return v1
 }
 
-; check:  bras %r1, 12 ; data.f64 0 ; ld %f0, 0(%r1)
-; nextln: br %r14
+; block0:
+;   bras %r1, 12 ; data.f64 0 ; ld %f0, 0(%r1)
+;   br %r14
 
 function %f32const_one() -> f32 {
 block0:
@@ -32,8 +34,9 @@ block0:
   return v1
 }
 
-; check:  bras %r1, 8 ; data.f32 1 ; le %f0, 0(%r1)
-; nextln: br %r14
+; block0:
+;   bras %r1, 8 ; data.f32 1 ; le %f0, 0(%r1)
+;   br %r14
 
 function %f64const_one() -> f64 {
 block0:
@@ -41,12 +44,9 @@ block0:
   return v1
 }
 
-; check:  bras %r1, 12 ; data.f64 1 ; ld %f0, 0(%r1)
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; FADD
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   bras %r1, 12 ; data.f64 1 ; ld %f0, 0(%r1)
+;   br %r14
 
 function %fadd_f32(f32, f32) -> f32 {
 block0(v0: f32, v1: f32):
@@ -54,8 +54,9 @@ block0(v0: f32, v1: f32):
   return v2
 }
 
-; check:  aebr %f0, %f2
-; nextln: br %r14
+; block0:
+;   aebr %f0, %f2
+;   br %r14
 
 function %fadd_f64(f64, f64) -> f64 {
 block0(v0: f64, v1: f64):
@@ -63,12 +64,9 @@ block0(v0: f64, v1: f64):
   return v2
 }
 
-; check:  adbr %f0, %f2
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; FSUB
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   adbr %f0, %f2
+;   br %r14
 
 function %fsub_f32(f32, f32) -> f32 {
 block0(v0: f32, v1: f32):
@@ -76,8 +74,9 @@ block0(v0: f32, v1: f32):
   return v2
 }
 
-; check:  sebr %f0, %f2
-; nextln: br %r14
+; block0:
+;   sebr %f0, %f2
+;   br %r14
 
 function %fsub_f64(f64, f64) -> f64 {
 block0(v0: f64, v1: f64):
@@ -85,12 +84,9 @@ block0(v0: f64, v1: f64):
   return v2
 }
 
-; check:  sdbr %f0, %f2
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; FMUL
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   sdbr %f0, %f2
+;   br %r14
 
 function %fmul_f32(f32, f32) -> f32 {
 block0(v0: f32, v1: f32):
@@ -98,8 +94,9 @@ block0(v0: f32, v1: f32):
   return v2
 }
 
-; check:  meebr %f0, %f2
-; nextln: br %r14
+; block0:
+;   meebr %f0, %f2
+;   br %r14
 
 function %fmul_f64(f64, f64) -> f64 {
 block0(v0: f64, v1: f64):
@@ -107,12 +104,9 @@ block0(v0: f64, v1: f64):
   return v2
 }
 
-; check:  mdbr %f0, %f2
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; FDIV
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   mdbr %f0, %f2
+;   br %r14
 
 function %fdiv_f32(f32, f32) -> f32 {
 block0(v0: f32, v1: f32):
@@ -120,8 +114,9 @@ block0(v0: f32, v1: f32):
   return v2
 }
 
-; check:  debr %f0, %f2
-; nextln: br %r14
+; block0:
+;   debr %f0, %f2
+;   br %r14
 
 function %fdiv_f64(f64, f64) -> f64 {
 block0(v0: f64, v1: f64):
@@ -129,12 +124,9 @@ block0(v0: f64, v1: f64):
   return v2
 }
 
-; check:  ddbr %f0, %f2
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; FMIN
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   ddbr %f0, %f2
+;   br %r14
 
 function %fmin_f32(f32, f32) -> f32 {
 block0(v0: f32, v1: f32):
@@ -142,8 +134,9 @@ block0(v0: f32, v1: f32):
   return v2
 }
 
-; check:  wfminsb %f0, %f0, %f2, 1
-; nextln: br %r14
+; block0:
+;   wfminsb %f0, %f0, %f2, 1
+;   br %r14
 
 function %fmin_f64(f64, f64) -> f64 {
 block0(v0: f64, v1: f64):
@@ -151,12 +144,9 @@ block0(v0: f64, v1: f64):
   return v2
 }
 
-; check:  wfmindb %f0, %f0, %f2, 1
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; FMAX
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   wfmindb %f0, %f0, %f2, 1
+;   br %r14
 
 function %fmax_f32(f32, f32) -> f32 {
 block0(v0: f32, v1: f32):
@@ -164,8 +154,9 @@ block0(v0: f32, v1: f32):
   return v2
 }
 
-; check:  wfmaxsb %f0, %f0, %f2, 1
-; nextln: br %r14
+; block0:
+;   wfmaxsb %f0, %f0, %f2, 1
+;   br %r14
 
 function %fmax_f64(f64, f64) -> f64 {
 block0(v0: f64, v1: f64):
@@ -173,12 +164,9 @@ block0(v0: f64, v1: f64):
   return v2
 }
 
-; check:  wfmaxdb %f0, %f0, %f2, 1
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; SQRT
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   wfmaxdb %f0, %f0, %f2, 1
+;   br %r14
 
 function %sqrt_f32(f32) -> f32 {
 block0(v0: f32):
@@ -186,8 +174,9 @@ block0(v0: f32):
   return v1
 }
 
-; check:  sqebr %f0, %f0
-; nextln: br %r14
+; block0:
+;   sqebr %f0, %f0
+;   br %r14
 
 function %sqrt_f64(f64) -> f64 {
 block0(v0: f64):
@@ -195,12 +184,9 @@ block0(v0: f64):
   return v1
 }
 
-; check:  sqdbr %f0, %f0
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; FABS
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   sqdbr %f0, %f0
+;   br %r14
 
 function %fabs_f32(f32) -> f32 {
 block0(v0: f32):
@@ -208,8 +194,9 @@ block0(v0: f32):
   return v1
 }
 
-; check:  lpebr %f0, %f0
-; nextln: br %r14
+; block0:
+;   lpebr %f0, %f0
+;   br %r14
 
 function %fabs_f64(f64) -> f64 {
 block0(v0: f64):
@@ -217,12 +204,9 @@ block0(v0: f64):
   return v1
 }
 
-; check:  lpdbr %f0, %f0
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; FNEG
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   lpdbr %f0, %f0
+;   br %r14
 
 function %fneg_f32(f32) -> f32 {
 block0(v0: f32):
@@ -230,8 +214,9 @@ block0(v0: f32):
   return v1
 }
 
-; check:  lcebr %f0, %f0
-; nextln: br %r14
+; block0:
+;   lcebr %f0, %f0
+;   br %r14
 
 function %fneg_f64(f64) -> f64 {
 block0(v0: f64):
@@ -239,12 +224,9 @@ block0(v0: f64):
   return v1
 }
 
-; check:  lcdbr %f0, %f0
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; FPROMOTE/FDEMOTE
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   lcdbr %f0, %f0
+;   br %r14
 
 function %fpromote_f32(f32) -> f64 {
 block0(v0: f32):
@@ -252,8 +234,9 @@ block0(v0: f32):
   return v1
 }
 
-; check:  ldebr %f0, %f0
-; nextln: br %r14
+; block0:
+;   ldebr %f0, %f0
+;   br %r14
 
 function %fdemote_f64(f64) -> f32 {
 block0(v0: f64):
@@ -261,12 +244,9 @@ block0(v0: f64):
   return v1
 }
 
-; check:  ledbr %f0, %f0
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; CEIL
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   ledbr %f0, %f0
+;   br %r14
 
 function %ceil_f32(f32) -> f32 {
 block0(v0: f32):
@@ -274,8 +254,9 @@ block0(v0: f32):
   return v1
 }
 
-; check:  fiebr %f0, %f0, 6
-; nextln: br %r14
+; block0:
+;   fiebr %f0, %f0, 6
+;   br %r14
 
 function %ceil_f64(f64) -> f64 {
 block0(v0: f64):
@@ -283,12 +264,9 @@ block0(v0: f64):
   return v1
 }
 
-; check:  fidbr %f0, %f0, 6
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; FLOOR
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   fidbr %f0, %f0, 6
+;   br %r14
 
 function %floor_f32(f32) -> f32 {
 block0(v0: f32):
@@ -296,8 +274,9 @@ block0(v0: f32):
   return v1
 }
 
-; check:  fiebr %f0, %f0, 7
-; nextln: br %r14
+; block0:
+;   fiebr %f0, %f0, 7
+;   br %r14
 
 function %floor_f64(f64) -> f64 {
 block0(v0: f64):
@@ -305,12 +284,9 @@ block0(v0: f64):
   return v1
 }
 
-; check:  fidbr %f0, %f0, 7
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; TRUNC
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   fidbr %f0, %f0, 7
+;   br %r14
 
 function %trunc_f32(f32) -> f32 {
 block0(v0: f32):
@@ -318,8 +294,9 @@ block0(v0: f32):
   return v1
 }
 
-; check:  fiebr %f0, %f0, 5
-; nextln: br %r14
+; block0:
+;   fiebr %f0, %f0, 5
+;   br %r14
 
 function %trunc_f64(f64) -> f64 {
 block0(v0: f64):
@@ -327,12 +304,9 @@ block0(v0: f64):
   return v1
 }
 
-; check:  fidbr %f0, %f0, 5
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; NEAREST
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   fidbr %f0, %f0, 5
+;   br %r14
 
 function %nearest_f32(f32) -> f32 {
 block0(v0: f32):
@@ -340,8 +314,9 @@ block0(v0: f32):
   return v1
 }
 
-; check:  fiebr %f0, %f0, 4
-; nextln: br %r14
+; block0:
+;   fiebr %f0, %f0, 4
+;   br %r14
 
 function %nearest_f64(f64) -> f64 {
 block0(v0: f64):
@@ -349,12 +324,9 @@ block0(v0: f64):
   return v1
 }
 
-; check:  fidbr %f0, %f0, 4
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; FMA
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   fidbr %f0, %f0, 4
+;   br %r14
 
 function %fma_f32(f32, f32, f32) -> f32 {
 block0(v0: f32, v1: f32, v2: f32):
@@ -362,11 +334,11 @@ block0(v0: f32, v1: f32, v2: f32):
   return v3
 }
 
-; FIXME: regalloc
-
-; check:  maebr %f4, %f0, %f2
-; nextln: ler %f0, %f4
-; nextln: br %r14
+; block0:
+;   ldr %f1, %f0
+;   ldr %f0, %f4
+;   maebr %f0, %f1, %f2
+;   br %r14
 
 function %fma_f64(f64, f64, f64) -> f64 {
 block0(v0: f64, v1: f64, v2: f64):
@@ -374,13 +346,11 @@ block0(v0: f64, v1: f64, v2: f64):
   return v3
 }
 
-; check:  madbr %f4, %f0, %f2
-; nextln: ldr %f0, %f4
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; FCOPYSIGN
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   ldr %f1, %f0
+;   ldr %f0, %f4
+;   madbr %f0, %f1, %f2
+;   br %r14
 
 function %fcopysign_f32(f32, f32) -> f32 {
 block0(v0: f32, v1: f32):
@@ -388,8 +358,9 @@ block0(v0: f32, v1: f32):
   return v2
 }
 
-; check:  cpsdr %f0, %f2, %f0
-; nextln: br %r14
+; block0:
+;   cpsdr %f0, %f2, %f0
+;   br %r14
 
 function %fcopysign_f64(f64, f64) -> f64 {
 block0(v0: f64, v1: f64):
@@ -397,12 +368,9 @@ block0(v0: f64, v1: f64):
   return v2
 }
 
-; check:  cpsdr %f0, %f2, %f0
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; FCVT_TO_UINT/FCVT_TO_SINT
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   cpsdr %f0, %f2, %f0
+;   br %r14
 
 function %fcvt_to_uint_f32_i32(f32) -> i32 {
 block0(v0: f32):
@@ -410,11 +378,12 @@ block0(v0: f32):
   return v1
 }
 
-; check:  cebr %f0, %f0
-; nextln: jno 6 ; trap
-; nextln: clfebr %r2, 5, %f0, 0
-; nextln: jno 6 ; trap
-; nextln: br %r14
+; block0:
+;   cebr %f0, %f0
+;   jno 6 ; trap
+;   clfebr %r2, 5, %f0, 0
+;   jno 6 ; trap
+;   br %r14
 
 function %fcvt_to_sint_f32_i32(f32) -> i32 {
 block0(v0: f32):
@@ -422,11 +391,12 @@ block0(v0: f32):
   return v1
 }
 
-; check:  cebr %f0, %f0
-; nextln: jno 6 ; trap
-; nextln: cfebra %r2, 5, %f0, 0
-; nextln: jno 6 ; trap
-; nextln: br %r14
+; block0:
+;   cebr %f0, %f0
+;   jno 6 ; trap
+;   cfebra %r2, 5, %f0, 0
+;   jno 6 ; trap
+;   br %r14
 
 function %fcvt_to_uint_f32_i64(f32) -> i64 {
 block0(v0: f32):
@@ -434,11 +404,12 @@ block0(v0: f32):
   return v1
 }
 
-; check:  cebr %f0, %f0
-; nextln: jno 6 ; trap
-; nextln: clgebr %r2, 5, %f0, 0
-; nextln: jno 6 ; trap
-; nextln: br %r14
+; block0:
+;   cebr %f0, %f0
+;   jno 6 ; trap
+;   clgebr %r2, 5, %f0, 0
+;   jno 6 ; trap
+;   br %r14
 
 function %fcvt_to_sint_f32_i64(f32) -> i64 {
 block0(v0: f32):
@@ -446,11 +417,12 @@ block0(v0: f32):
   return v1
 }
 
-; check:  cebr %f0, %f0
-; nextln: jno 6 ; trap
-; nextln: cgebra %r2, 5, %f0, 0
-; nextln: jno 6 ; trap
-; nextln: br %r14
+; block0:
+;   cebr %f0, %f0
+;   jno 6 ; trap
+;   cgebra %r2, 5, %f0, 0
+;   jno 6 ; trap
+;   br %r14
 
 function %fcvt_to_uint_f64_i32(f64) -> i32 {
 block0(v0: f64):
@@ -458,11 +430,12 @@ block0(v0: f64):
   return v1
 }
 
-; check:  cdbr %f0, %f0
-; nextln: jno 6 ; trap
-; nextln: clfdbr %r2, 5, %f0, 0
-; nextln: jno 6 ; trap
-; nextln: br %r14
+; block0:
+;   cdbr %f0, %f0
+;   jno 6 ; trap
+;   clfdbr %r2, 5, %f0, 0
+;   jno 6 ; trap
+;   br %r14
 
 function %fcvt_to_sint_f64_i32(f64) -> i32 {
 block0(v0: f64):
@@ -470,11 +443,12 @@ block0(v0: f64):
   return v1
 }
 
-; check:  cdbr %f0, %f0
-; nextln: jno 6 ; trap
-; nextln: cfdbra %r2, 5, %f0, 0
-; nextln: jno 6 ; trap
-; nextln: br %r14
+; block0:
+;   cdbr %f0, %f0
+;   jno 6 ; trap
+;   cfdbra %r2, 5, %f0, 0
+;   jno 6 ; trap
+;   br %r14
 
 function %fcvt_to_uint_f64_i64(f64) -> i64 {
 block0(v0: f64):
@@ -482,11 +456,12 @@ block0(v0: f64):
   return v1
 }
 
-; check:  cdbr %f0, %f0
-; nextln: jno 6 ; trap
-; nextln: clgdbr %r2, 5, %f0, 0
-; nextln: jno 6 ; trap
-; nextln: br %r14
+; block0:
+;   cdbr %f0, %f0
+;   jno 6 ; trap
+;   clgdbr %r2, 5, %f0, 0
+;   jno 6 ; trap
+;   br %r14
 
 function %fcvt_to_sint_f64_i64(f64) -> i64 {
 block0(v0: f64):
@@ -494,15 +469,12 @@ block0(v0: f64):
   return v1
 }
 
-; check:  cdbr %f0, %f0
-; nextln: jno 6 ; trap
-; nextln: cgdbra %r2, 5, %f0, 0
-; nextln: jno 6 ; trap
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; FCVT_FROM_UINT/FCVT_FROM_SINT
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   cdbr %f0, %f0
+;   jno 6 ; trap
+;   cgdbra %r2, 5, %f0, 0
+;   jno 6 ; trap
+;   br %r14
 
 function %fcvt_from_uint_i32_f32(i32) -> f32 {
 block0(v0: i32):
@@ -510,8 +482,9 @@ block0(v0: i32):
   return v1
 }
 
-; check:  celfbr %f0, 0, %r2, 0
-; nextln: br %r14
+; block0:
+;   celfbr %f0, 0, %r2, 0
+;   br %r14
 
 function %fcvt_from_sint_i32_f32(i32) -> f32 {
 block0(v0: i32):
@@ -519,8 +492,9 @@ block0(v0: i32):
   return v1
 }
 
-; check:  cefbra %f0, 0, %r2, 0
-; nextln: br %r14
+; block0:
+;   cefbra %f0, 0, %r2, 0
+;   br %r14
 
 function %fcvt_from_uint_i64_f32(i64) -> f32 {
 block0(v0: i64):
@@ -528,8 +502,9 @@ block0(v0: i64):
   return v1
 }
 
-; check:  celgbr %f0, 0, %r2, 0
-; nextln: br %r14
+; block0:
+;   celgbr %f0, 0, %r2, 0
+;   br %r14
 
 function %fcvt_from_sint_i64_f32(i64) -> f32 {
 block0(v0: i64):
@@ -537,8 +512,9 @@ block0(v0: i64):
   return v1
 }
 
-; check:  cegbra %f0, 0, %r2, 0
-; nextln: br %r14
+; block0:
+;   cegbra %f0, 0, %r2, 0
+;   br %r14
 
 function %fcvt_from_uint_i32_f64(i32) -> f64 {
 block0(v0: i32):
@@ -546,8 +522,9 @@ block0(v0: i32):
   return v1
 }
 
-; check:  cdlfbr %f0, 0, %r2, 0
-; nextln: br %r14
+; block0:
+;   cdlfbr %f0, 0, %r2, 0
+;   br %r14
 
 function %fcvt_from_sint_i32_f64(i32) -> f64 {
 block0(v0: i32):
@@ -555,8 +532,9 @@ block0(v0: i32):
   return v1
 }
 
-; check:  cdfbra %f0, 0, %r2, 0
-; nextln: br %r14
+; block0:
+;   cdfbra %f0, 0, %r2, 0
+;   br %r14
 
 function %fcvt_from_uint_i64_f64(i64) -> f64 {
 block0(v0: i64):
@@ -564,8 +542,9 @@ block0(v0: i64):
   return v1
 }
 
-; check:  cdlgbr %f0, 0, %r2, 0
-; nextln: br %r14
+; block0:
+;   cdlgbr %f0, 0, %r2, 0
+;   br %r14
 
 function %fcvt_from_sint_i64_f64(i64) -> f64 {
 block0(v0: i64):
@@ -573,12 +552,9 @@ block0(v0: i64):
   return v1
 }
 
-; check:  cdgbra %f0, 0, %r2, 0
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; FCVT_TO_UINT_SAT/FCVT_TO_SINT_SAT
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   cdgbra %f0, 0, %r2, 0
+;   br %r14
 
 function %fcvt_to_uint_sat_f32_i32(f32) -> i32 {
 block0(v0: f32):
@@ -586,10 +562,11 @@ block0(v0: f32):
   return v1
 }
 
-; check:  clfebr %r2, 5, %f0, 0
-; nextln: cebr %f0, %f0
-; nextln: lochio %r2, 0
-; nextln: br %r14
+; block0:
+;   clfebr %r2, 5, %f0, 0
+;   cebr %f0, %f0
+;   lochio %r2, 0
+;   br %r14
 
 function %fcvt_to_sint_sat_f32_i32(f32) -> i32 {
 block0(v0: f32):
@@ -597,10 +574,11 @@ block0(v0: f32):
   return v1
 }
 
-; check:  cfebra %r2, 5, %f0, 0
-; nextln: cebr %f0, %f0
-; nextln: lochio %r2, 0
-; nextln: br %r14
+; block0:
+;   cfebra %r2, 5, %f0, 0
+;   cebr %f0, %f0
+;   lochio %r2, 0
+;   br %r14
 
 function %fcvt_to_uint_sat_f32_i64(f32) -> i64 {
 block0(v0: f32):
@@ -608,10 +586,11 @@ block0(v0: f32):
   return v1
 }
 
-; check:  clgebr %r2, 5, %f0, 0
-; nextln: cebr %f0, %f0
-; nextln: locghio %r2, 0
-; nextln: br %r14
+; block0:
+;   clgebr %r2, 5, %f0, 0
+;   cebr %f0, %f0
+;   locghio %r2, 0
+;   br %r14
 
 function %fcvt_to_sint_sat_f32_i64(f32) -> i64 {
 block0(v0: f32):
@@ -619,10 +598,11 @@ block0(v0: f32):
   return v1
 }
 
-; check:  cgebra %r2, 5, %f0, 0
-; nextln: cebr %f0, %f0
-; nextln: locghio %r2, 0
-; nextln: br %r14
+; block0:
+;   cgebra %r2, 5, %f0, 0
+;   cebr %f0, %f0
+;   locghio %r2, 0
+;   br %r14
 
 function %fcvt_to_uint_sat_f64_i32(f64) -> i32 {
 block0(v0: f64):
@@ -630,10 +610,11 @@ block0(v0: f64):
   return v1
 }
 
-; check:  clfdbr %r2, 5, %f0, 0
-; nextln: cdbr %f0, %f0
-; nextln: lochio %r2, 0
-; nextln: br %r14
+; block0:
+;   clfdbr %r2, 5, %f0, 0
+;   cdbr %f0, %f0
+;   lochio %r2, 0
+;   br %r14
 
 function %fcvt_to_sint_sat_f64_i32(f64) -> i32 {
 block0(v0: f64):
@@ -641,10 +622,11 @@ block0(v0: f64):
   return v1
 }
 
-; check:  cfdbra %r2, 5, %f0, 0
-; nextln: cdbr %f0, %f0
-; nextln: lochio %r2, 0
-; nextln: br %r14
+; block0:
+;   cfdbra %r2, 5, %f0, 0
+;   cdbr %f0, %f0
+;   lochio %r2, 0
+;   br %r14
 
 function %fcvt_to_uint_sat_f64_i64(f64) -> i64 {
 block0(v0: f64):
@@ -652,10 +634,11 @@ block0(v0: f64):
   return v1
 }
 
-; check:  clgdbr %r2, 5, %f0, 0
-; nextln: cdbr %f0, %f0
-; nextln: locghio %r2, 0
-; nextln: br %r14
+; block0:
+;   clgdbr %r2, 5, %f0, 0
+;   cdbr %f0, %f0
+;   locghio %r2, 0
+;   br %r14
 
 function %fcvt_to_sint_sat_f64_i64(f64) -> i64 {
 block0(v0: f64):
@@ -663,14 +646,11 @@ block0(v0: f64):
   return v1
 }
 
-; check:  cgdbra %r2, 5, %f0, 0
-; nextln: cdbr %f0, %f0
-; nextln: locghio %r2, 0
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; BITCAST
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   cgdbra %r2, 5, %f0, 0
+;   cdbr %f0, %f0
+;   locghio %r2, 0
+;   br %r14
 
 function %bitcast_i64_f64(i64) -> f64 {
 block0(v0: i64):
@@ -678,8 +658,9 @@ block0(v0: i64):
   return v1
 }
 
-; check:  ldgr %f0, %r2
-; nextln: br %r14
+; block0:
+;   ldgr %f0, %r2
+;   br %r14
 
 function %bitcast_f64_i64(f64) -> i64 {
 block0(v0: f64):
@@ -687,8 +668,9 @@ block0(v0: f64):
   return v1
 }
 
-; check:  lgdr %r2, %f0
-; nextln: br %r14
+; block0:
+;   lgdr %r2, %f0
+;   br %r14
 
 function %bitcast_i32_f32(i32) -> f32 {
 block0(v0: i32):
@@ -696,9 +678,10 @@ block0(v0: i32):
   return v1
 }
 
-; check:  sllg %r2, %r2, 32
-; nextln: ldgr %f0, %r2
-; nextln: br %r14
+; block0:
+;   sllg %r5, %r2, 32
+;   ldgr %f0, %r5
+;   br %r14
 
 function %bitcast_f32_i32(f32) -> i32 {
 block0(v0: f32):
@@ -706,6 +689,8 @@ block0(v0: f32):
   return v1
 }
 
-; check:  lgdr %r2, %f0
-; nextln: srlg %r2, %r2, 32
-; nextln: br %r14
+; block0:
+;   lgdr %r5, %f0
+;   srlg %r2, %r5, 32
+;   br %r14
+
diff --git a/cranelift/filetests/filetests/isa/s390x/fpmem-arch13.clif b/cranelift/filetests/filetests/isa/s390x/fpmem-arch13.clif
index 8bad0ca09a..76224768bc 100644
--- a/cranelift/filetests/filetests/isa/s390x/fpmem-arch13.clif
+++ b/cranelift/filetests/filetests/isa/s390x/fpmem-arch13.clif
@@ -1,4 +1,4 @@
-test compile
+test compile precise-output
 target s390x arch13
 
 function %load_f64_little(i64) -> f64 {
@@ -7,8 +7,9 @@ block0(v0: i64):
   return v1
 }
 
-; check:  vlebrg %f0, 0(%r2), 0
-; nextln: br %r14
+; block0:
+;   vlebrg %f0, 0(%r2), 0
+;   br %r14
 
 function %load_f32_little(i64) -> f32 {
 block0(v0: i64):
@@ -16,8 +17,9 @@ block0(v0: i64):
   return v1
 }
 
-; check:  vlebrf %f0, 0(%r2), 0
-; nextln: br %r14
+; block0:
+;   vlebrf %f0, 0(%r2), 0
+;   br %r14
 
 function %store_f64_little(f64, i64) {
 block0(v0: f64, v1: i64):
@@ -25,8 +27,9 @@ block0(v0: f64, v1: i64):
   return
 }
 
-; check:  vstebrg %f0, 0(%r2), 0
-; nextln: br %r14
+; block0:
+;   vstebrg %f0, 0(%r2), 0
+;   br %r14
 
 function %store_f32_little(f32, i64) {
 block0(v0: f32, v1: i64):
@@ -34,6 +37,7 @@ block0(v0: f32, v1: i64):
   return
 }
 
-; check:  vstebrf %f0, 0(%r2), 0
-; nextln: br %r14
+; block0:
+;   vstebrf %f0, 0(%r2), 0
+;   br %r14
 
diff --git a/cranelift/filetests/filetests/isa/s390x/fpmem.clif b/cranelift/filetests/filetests/isa/s390x/fpmem.clif
index ddf37f84f2..49b1ff1aeb 100644
--- a/cranelift/filetests/filetests/isa/s390x/fpmem.clif
+++ b/cranelift/filetests/filetests/isa/s390x/fpmem.clif
@@ -1,4 +1,4 @@
-test compile
+test compile precise-output
 target s390x
 
 function %load_f64(i64) -> f64 {
@@ -7,8 +7,9 @@ block0(v0: i64):
   return v1
 }
 
-; check:  ld %f0, 0(%r2)
-; nextln: br %r14
+; block0:
+;   ld %f0, 0(%r2)
+;   br %r14
 
 function %load_f32(i64) -> f32 {
 block0(v0: i64):
@@ -16,8 +17,9 @@ block0(v0: i64):
   return v1
 }
 
-; check:  le %f0, 0(%r2)
-; nextln: br %r14
+; block0:
+;   le %f0, 0(%r2)
+;   br %r14
 
 function %load_f64_little(i64) -> f64 {
 block0(v0: i64):
@@ -25,9 +27,10 @@ block0(v0: i64):
   return v1
 }
 
-; check:  lrvg %r2, 0(%r2)
-; nextln: ldgr %f0, %r2
-; nextln: br %r14
+; block0:
+;   lrvg %r5, 0(%r2)
+;   ldgr %f0, %r5
+;   br %r14
 
 function %load_f32_little(i64) -> f32 {
 block0(v0: i64):
@@ -35,10 +38,11 @@ block0(v0: i64):
   return v1
 }
 
-; check:  lrv %r2, 0(%r2)
-; nextln: sllg %r2, %r2, 32
-; nextln: ldgr %f0, %r2
-; nextln: br %r14
+; block0:
+;   lrv %r5, 0(%r2)
+;   sllg %r3, %r5, 32
+;   ldgr %f0, %r3
+;   br %r14
 
 function %store_f64(f64, i64) {
 block0(v0: f64, v1: i64):
@@ -46,8 +50,9 @@ block0(v0: f64, v1: i64):
   return
 }
 
-; check:  std %f0, 0(%r2)
-; nextln: br %r14
+; block0:
+;   std %f0, 0(%r2)
+;   br %r14
 
 function %store_f32(f32, i64) {
 block0(v0: f32, v1: i64):
@@ -55,8 +60,9 @@ block0(v0: f32, v1: i64):
   return
 }
 
-; check:  ste %f0, 0(%r2)
-; nextln: br %r14
+; block0:
+;   ste %f0, 0(%r2)
+;   br %r14
 
 function %store_f64_little(f64, i64) {
 block0(v0: f64, v1: i64):
@@ -64,9 +70,10 @@ block0(v0: f64, v1: i64):
   return
 }
 
-; check:  lgdr %r3, %f0
-; nextln: strvg %r3, 0(%r2)
-; nextln: br %r14
+; block0:
+;   lgdr %r3, %f0
+;   strvg %r3, 0(%r2)
+;   br %r14
 
 function %store_f32_little(f32, i64) {
 block0(v0: f32, v1: i64):
@@ -74,8 +81,9 @@ block0(v0: f32, v1: i64):
   return
 }
 
-; check:  lgdr %r3, %f0
-; nextln: srlg %r3, %r3, 32
-; nextln: strv %r3, 0(%r2)
-; nextln: br %r14
+; block0:
+;   lgdr %r3, %f0
+;   srlg %r4, %r3, 32
+;   strv %r4, 0(%r2)
+;   br %r14
 
diff --git a/cranelift/filetests/filetests/isa/s390x/heap_addr.clif b/cranelift/filetests/filetests/isa/s390x/heap_addr.clif
index c07d5f66a7..7793f4df75 100644
--- a/cranelift/filetests/filetests/isa/s390x/heap_addr.clif
+++ b/cranelift/filetests/filetests/isa/s390x/heap_addr.clif
@@ -1,4 +1,4 @@
-test compile
+test compile precise-output
 target s390x
 
 function %dynamic_heap_check(i64 vmctx, i32) -> i64 {
@@ -11,20 +11,20 @@ block0(v0: i64, v1: i32):
     return v2
 }
 
-; check: Block 0:
-; check:  llgfr %r3, %r3
-; nextln: lghi %r4, 0
-; nextln: ag %r4, 0(%r2)
-; nextln: clgr %r3, %r4
-; nextln: jgnh label1 ; jg label2
-; check: Block 1:
-; check: agr %r2, %r3
-; nextln: lghi %r5, 0
-; nextln: clgr %r3, %r4
-; nextln: locgrh %r2, %r5
-; nextln: br %r14
-; check: Block 2:
-; check:  trap
+; block0:
+;   llgfr %r4, %r3
+;   lghi %r3, 0
+;   ag %r3, 0(%r2)
+;   clgr %r4, %r3
+;   jgnh label1 ; jg label2
+; block1:
+;   agr %r2, %r4
+;   lghi %r5, 0
+;   clgr %r4, %r3
+;   locgrh %r2, %r5
+;   br %r14
+; block2:
+;   trap
 
 function %static_heap_check(i64 vmctx, i32) -> i64 {
     gv0 = vmctx
@@ -35,15 +35,16 @@ block0(v0: i64, v1: i32):
     return v2
 }
 
-; check: Block 0:
-; check:  llgfr %r3, %r3
-; nextln: clgfi %r3, 65536
-; nextln: jgnh label1 ; jg label2
-; check: Block 1:
-; check: agr %r2, %r3
-; nextln: lghi %r4, 0
-; nextln: clgfi %r3, 65536
-; nextln: locgrh %r2, %r4
-; nextln: br %r14
-; check: Block 2:
-; check:  trap
+; block0:
+;   llgfr %r3, %r3
+;   clgfi %r3, 65536
+;   jgnh label1 ; jg label2
+; block1:
+;   agr %r2, %r3
+;   lghi %r4, 0
+;   clgfi %r3, 65536
+;   locgrh %r2, %r4
+;   br %r14
+; block2:
+;   trap
+
diff --git a/cranelift/filetests/filetests/isa/s390x/icmp.clif b/cranelift/filetests/filetests/isa/s390x/icmp.clif
index 50c2120ac7..6d1c2b0ce1 100644
--- a/cranelift/filetests/filetests/isa/s390x/icmp.clif
+++ b/cranelift/filetests/filetests/isa/s390x/icmp.clif
@@ -1,4 +1,4 @@
-test compile
+test compile precise-output
 target s390x
 
 function %icmp_slt_i64(i64, i64) -> b1 {
@@ -7,10 +7,11 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  cgr %r2, %r3
-; nextln: lhi %r2, 0
-; nextln: lochil %r2, 1
-; nextln: br %r14
+; block0:
+;   cgr %r2, %r3
+;   lhi %r2, 0
+;   lochil %r2, 1
+;   br %r14
 
 function %icmp_slt_i64_ext32(i64, i32) -> b1 {
 block0(v0: i64, v1: i32):
@@ -19,10 +20,11 @@ block0(v0: i64, v1: i32):
   return v3
 }
 
-; check:  cgfr %r2, %r3
-; nextln: lhi %r2, 0
-; nextln: lochil %r2, 1
-; nextln: br %r14
+; block0:
+;   cgfr %r2, %r3
+;   lhi %r2, 0
+;   lochil %r2, 1
+;   br %r14
 
 function %icmp_slt_i64_imm16(i64) -> b1 {
 block0(v0: i64):
@@ -31,10 +33,11 @@ block0(v0: i64):
   return v2
 }
 
-; check:  cghi %r2, 1
-; nextln: lhi %r2, 0
-; nextln: lochil %r2, 1
-; nextln: br %r14
+; block0:
+;   cghi %r2, 1
+;   lhi %r2, 0
+;   lochil %r2, 1
+;   br %r14
 
 function %icmp_slt_i64_imm32(i64) -> b1 {
 block0(v0: i64):
@@ -43,10 +46,11 @@ block0(v0: i64):
   return v2
 }
 
-; check:  cgfi %r2, 32768
-; nextln: lhi %r2, 0
-; nextln: lochil %r2, 1
-; nextln: br %r14
+; block0:
+;   cgfi %r2, 32768
+;   lhi %r2, 0
+;   lochil %r2, 1
+;   br %r14
 
 function %icmp_slt_i64_mem(i64, i64) -> b1 {
 block0(v0: i64, v1: i64):
@@ -55,10 +59,11 @@ block0(v0: i64, v1: i64):
   return v3
 }
 
-; check:  cg %r2, 0(%r3)
-; nextln: lhi %r2, 0
-; nextln: lochil %r2, 1
-; nextln: br %r14
+; block0:
+;   cg %r2, 0(%r3)
+;   lhi %r2, 0
+;   lochil %r2, 1
+;   br %r14
 
 function %icmp_slt_i64_sym(i64) -> b1 {
   gv0 = symbol colocated %sym
@@ -69,10 +74,11 @@ block0(v0: i64):
   return v3
 }
 
-; check:  cgrl %r2, %sym + 0
-; nextln: lhi %r2, 0
-; nextln: lochil %r2, 1
-; nextln: br %r14
+; block0:
+;   cgrl %r2, %sym + 0
+;   lhi %r2, 0
+;   lochil %r2, 1
+;   br %r14
 
 function %icmp_slt_i64_mem_ext16(i64, i64) -> b1 {
 block0(v0: i64, v1: i64):
@@ -81,10 +87,11 @@ block0(v0: i64, v1: i64):
   return v3
 }
 
-; check:  cgh %r2, 0(%r3)
-; nextln: lhi %r2, 0
-; nextln: lochil %r2, 1
-; nextln: br %r14
+; block0:
+;   cgh %r2, 0(%r3)
+;   lhi %r2, 0
+;   lochil %r2, 1
+;   br %r14
 
 function %icmp_slt_i64_sym_ext16(i64) -> b1 {
   gv0 = symbol colocated %sym
@@ -95,10 +102,11 @@ block0(v0: i64):
   return v3
 }
 
-; check:  cghrl %r2, %sym + 0
-; nextln: lhi %r2, 0
-; nextln: lochil %r2, 1
-; nextln: br %r14
+; block0:
+;   cghrl %r2, %sym + 0
+;   lhi %r2, 0
+;   lochil %r2, 1
+;   br %r14
 
 function %icmp_slt_i64_mem_ext32(i64, i64) -> b1 {
 block0(v0: i64, v1: i64):
@@ -107,10 +115,11 @@ block0(v0: i64, v1: i64):
   return v3
 }
 
-; check:  cgf %r2, 0(%r3)
-; nextln: lhi %r2, 0
-; nextln: lochil %r2, 1
-; nextln: br %r14
+; block0:
+;   cgf %r2, 0(%r3)
+;   lhi %r2, 0
+;   lochil %r2, 1
+;   br %r14
 
 function %icmp_slt_i64_sym_ext32(i64) -> b1 {
   gv0 = symbol colocated %sym
@@ -121,10 +130,11 @@ block0(v0: i64):
   return v3
 }
 
-; check:  cgfrl %r2, %sym + 0
-; nextln: lhi %r2, 0
-; nextln: lochil %r2, 1
-; nextln: br %r14
+; block0:
+;   cgfrl %r2, %sym + 0
+;   lhi %r2, 0
+;   lochil %r2, 1
+;   br %r14
 
 function %icmp_slt_i32(i32, i32) -> b1 {
 block0(v0: i32, v1: i32):
@@ -132,10 +142,11 @@ block0(v0: i32, v1: i32):
   return v2
 }
 
-; check:  cr %r2, %r3
-; nextln: lhi %r2, 0
-; nextln: lochil %r2, 1
-; nextln: br %r14
+; block0:
+;   cr %r2, %r3
+;   lhi %r2, 0
+;   lochil %r2, 1
+;   br %r14
 
 function %icmp_slt_i32_imm16(i32) -> b1 {
 block0(v0: i32):
@@ -144,10 +155,11 @@ block0(v0: i32):
   return v2
 }
 
-; check:  chi %r2, 1
-; nextln: lhi %r2, 0
-; nextln: lochil %r2, 1
-; nextln: br %r14
+; block0:
+;   chi %r2, 1
+;   lhi %r2, 0
+;   lochil %r2, 1
+;   br %r14
 
 function %icmp_slt_i32_imm(i32) -> b1 {
 block0(v0: i32):
@@ -156,10 +168,11 @@ block0(v0: i32):
   return v2
 }
 
-; check:  cfi %r2, 32768
-; nextln: lhi %r2, 0
-; nextln: lochil %r2, 1
-; nextln: br %r14
+; block0:
+;   cfi %r2, 32768
+;   lhi %r2, 0
+;   lochil %r2, 1
+;   br %r14
 
 function %icmp_slt_i32_mem(i32, i64) -> b1 {
 block0(v0: i32, v1: i64):
@@ -168,10 +181,11 @@ block0(v0: i32, v1: i64):
   return v3
 }
 
-; check:  c %r2, 0(%r3)
-; nextln: lhi %r2, 0
-; nextln: lochil %r2, 1
-; nextln: br %r14
+; block0:
+;   c %r2, 0(%r3)
+;   lhi %r2, 0
+;   lochil %r2, 1
+;   br %r14
 
 function %icmp_slt_i32_memoff(i32, i64) -> b1 {
 block0(v0: i32, v1: i64):
@@ -180,10 +194,11 @@ block0(v0: i32, v1: i64):
   return v3
 }
 
-; check:  cy %r2, 4096(%r3)
-; nextln: lhi %r2, 0
-; nextln: lochil %r2, 1
-; nextln: br %r14
+; block0:
+;   cy %r2, 4096(%r3)
+;   lhi %r2, 0
+;   lochil %r2, 1
+;   br %r14
 
 function %icmp_slt_i32_sym(i32) -> b1 {
   gv0 = symbol colocated %sym
@@ -194,10 +209,11 @@ block0(v0: i32):
   return v3
 }
 
-; check:  crl %r2, %sym + 0
-; nextln: lhi %r2, 0
-; nextln: lochil %r2, 1
-; nextln: br %r14
+; block0:
+;   crl %r2, %sym + 0
+;   lhi %r2, 0
+;   lochil %r2, 1
+;   br %r14
 
 function %icmp_slt_i32_mem_ext16(i32, i64) -> b1 {
 block0(v0: i32, v1: i64):
@@ -206,10 +222,11 @@ block0(v0: i32, v1: i64):
   return v3
 }
 
-; check:  ch %r2, 0(%r3)
-; nextln: lhi %r2, 0
-; nextln: lochil %r2, 1
-; nextln: br %r14
+; block0:
+;   ch %r2, 0(%r3)
+;   lhi %r2, 0
+;   lochil %r2, 1
+;   br %r14
 
 function %icmp_slt_i32_memoff_ext16(i32, i64) -> b1 {
 block0(v0: i32, v1: i64):
@@ -218,10 +235,11 @@ block0(v0: i32, v1: i64):
   return v3
 }
 
-; check:  chy %r2, 4096(%r3)
-; nextln: lhi %r2, 0
-; nextln: lochil %r2, 1
-; nextln: br %r14
+; block0:
+;   chy %r2, 4096(%r3)
+;   lhi %r2, 0
+;   lochil %r2, 1
+;   br %r14
 
 function %icmp_slt_i32_sym_ext16(i32) -> b1 {
   gv0 = symbol colocated %sym
@@ -232,10 +250,11 @@ block0(v0: i32):
   return v3
 }
 
-; check:  chrl %r2, %sym + 0
-; nextln: lhi %r2, 0
-; nextln: lochil %r2, 1
-; nextln: br %r14
+; block0:
+;   chrl %r2, %sym + 0
+;   lhi %r2, 0
+;   lochil %r2, 1
+;   br %r14
 
 function %icmp_slt_i16(i16, i16) -> b1 {
 block0(v0: i16, v1: i16):
@@ -243,12 +262,14 @@ block0(v0: i16, v1: i16):
   return v2
 }
 
-; check:  lhr %r2, %r2
-; nextln: lhr %r3, %r3
-; nextln: cr %r2, %r3
-; nextln: lhi %r2, 0
-; nextln: lochil %r2, 1
-; nextln: br %r14
+; block0:
+;   lgr %r5, %r3
+;   lhr %r3, %r2
+;   lhr %r5, %r5
+;   cr %r3, %r5
+;   lhi %r2, 0
+;   lochil %r2, 1
+;   br %r14
 
 function %icmp_slt_i16_imm(i16) -> b1 {
 block0(v0: i16):
@@ -257,11 +278,12 @@ block0(v0: i16):
   return v2
 }
 
-; check:  lhr %r2, %r2
-; nextln: chi %r2, 1
-; nextln: lhi %r2, 0
-; nextln: lochil %r2, 1
-; nextln: br %r14
+; block0:
+;   lhr %r5, %r2
+;   chi %r5, 1
+;   lhi %r2, 0
+;   lochil %r2, 1
+;   br %r14
 
 function %icmp_slt_i16_mem(i16, i64) -> b1 {
 block0(v0: i16, v1: i64):
@@ -270,11 +292,12 @@ block0(v0: i16, v1: i64):
   return v3
 }
 
-; check:  lhr %r2, %r2
-; nextln: ch %r2, 0(%r3)
-; nextln: lhi %r2, 0
-; nextln: lochil %r2, 1
-; nextln: br %r14
+; block0:
+;   lhr %r4, %r2
+;   ch %r4, 0(%r3)
+;   lhi %r2, 0
+;   lochil %r2, 1
+;   br %r14
 
 function %icmp_slt_i16_sym(i16) -> b1 {
   gv0 = symbol colocated %sym
@@ -285,11 +308,12 @@ block0(v0: i16):
   return v3
 }
 
-; check:  lhr %r2, %r2
-; nextln: chrl %r2, %sym + 0
-; nextln: lhi %r2, 0
-; nextln: lochil %r2, 1
-; nextln: br %r14
+; block0:
+;   lhr %r5, %r2
+;   chrl %r5, %sym + 0
+;   lhi %r2, 0
+;   lochil %r2, 1
+;   br %r14
 
 function %icmp_slt_i8(i8, i8) -> b1 {
 block0(v0: i8, v1: i8):
@@ -297,12 +321,14 @@ block0(v0: i8, v1: i8):
   return v2
 }
 
-; check:  lbr %r2, %r2
-; nextln: lbr %r3, %r3
-; nextln: cr %r2, %r3
-; nextln: lhi %r2, 0
-; nextln: lochil %r2, 1
-; nextln: br %r14
+; block0:
+;   lgr %r5, %r3
+;   lbr %r3, %r2
+;   lbr %r5, %r5
+;   cr %r3, %r5
+;   lhi %r2, 0
+;   lochil %r2, 1
+;   br %r14
 
 function %icmp_slt_i8_imm(i8) -> b1 {
 block0(v0: i8):
@@ -311,11 +337,12 @@ block0(v0: i8):
   return v2
 }
 
-; check:  lbr %r2, %r2
-; nextln: chi %r2, 1
-; nextln: lhi %r2, 0
-; nextln: lochil %r2, 1
-; nextln: br %r14
+; block0:
+;   lbr %r5, %r2
+;   chi %r5, 1
+;   lhi %r2, 0
+;   lochil %r2, 1
+;   br %r14
 
 function %icmp_slt_i8_mem(i8, i64) -> b1 {
 block0(v0: i8, v1: i64):
@@ -324,12 +351,14 @@ block0(v0: i8, v1: i64):
   return v3
 }
 
-; check:  lbr %r2, %r2
-; nextln: lb %r3, 0(%r3)
-; nextln: cr %r2, %r3
-; nextln: lhi %r2, 0
-; nextln: lochil %r2, 1
-; nextln: br %r14
+; block0:
+;   lgr %r5, %r3
+;   lbr %r3, %r2
+;   lb %r5, 0(%r5)
+;   cr %r3, %r5
+;   lhi %r2, 0
+;   lochil %r2, 1
+;   br %r14
 
 function %icmp_ult_i64(i64, i64) -> b1 {
 block0(v0: i64, v1: i64):
@@ -337,10 +366,11 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  clgr %r2, %r3
-; nextln: lhi %r2, 0
-; nextln: lochil %r2, 1
-; nextln: br %r14
+; block0:
+;   clgr %r2, %r3
+;   lhi %r2, 0
+;   lochil %r2, 1
+;   br %r14
 
 function %icmp_ult_i64_ext32(i64, i32) -> b1 {
 block0(v0: i64, v1: i32):
@@ -349,10 +379,11 @@ block0(v0: i64, v1: i32):
   return v3
 }
 
-; check:  clgfr %r2, %r3
-; nextln: lhi %r2, 0
-; nextln: lochil %r2, 1
-; nextln: br %r14
+; block0:
+;   clgfr %r2, %r3
+;   lhi %r2, 0
+;   lochil %r2, 1
+;   br %r14
 
 function %icmp_ult_i64_imm(i64) -> b1 {
 block0(v0: i64):
@@ -361,10 +392,11 @@ block0(v0: i64):
   return v2
 }
 
-; check:  clgfi %r2, 1
-; nextln: lhi %r2, 0
-; nextln: lochil %r2, 1
-; nextln: br %r14
+; block0:
+;   clgfi %r2, 1
+;   lhi %r2, 0
+;   lochil %r2, 1
+;   br %r14
 
 function %icmp_ult_i64_mem(i64, i64) -> b1 {
 block0(v0: i64, v1: i64):
@@ -373,10 +405,11 @@ block0(v0: i64, v1: i64):
   return v3
 }
 
-; check:  clg %r2, 0(%r3)
-; nextln: lhi %r2, 0
-; nextln: lochil %r2, 1
-; nextln: br %r14
+; block0:
+;   clg %r2, 0(%r3)
+;   lhi %r2, 0
+;   lochil %r2, 1
+;   br %r14
 
 function %icmp_ult_i64_sym(i64) -> b1 {
   gv0 = symbol colocated %sym
@@ -387,10 +420,11 @@ block0(v0: i64):
   return v3
 }
 
-; check:  clgrl %r2, %sym + 0
-; nextln: lhi %r2, 0
-; nextln: lochil %r2, 1
-; nextln: br %r14
+; block0:
+;   clgrl %r2, %sym + 0
+;   lhi %r2, 0
+;   lochil %r2, 1
+;   br %r14
 
 function %icmp_ult_i64_mem_ext32(i64, i64) -> b1 {
 block0(v0: i64, v1: i64):
@@ -399,10 +433,11 @@ block0(v0: i64, v1: i64):
   return v3
 }
 
-; check:  clgf %r2, 0(%r3)
-; nextln: lhi %r2, 0
-; nextln: lochil %r2, 1
-; nextln: br %r14
+; block0:
+;   clgf %r2, 0(%r3)
+;   lhi %r2, 0
+;   lochil %r2, 1
+;   br %r14
 
 function %icmp_ult_i64_sym_ext32(i64) -> b1 {
   gv0 = symbol colocated %sym
@@ -413,10 +448,11 @@ block0(v0: i64):
   return v3
 }
 
-; check:  clgfrl %r2, %sym + 0
-; nextln: lhi %r2, 0
-; nextln: lochil %r2, 1
-; nextln: br %r14
+; block0:
+;   clgfrl %r2, %sym + 0
+;   lhi %r2, 0
+;   lochil %r2, 1
+;   br %r14
 
 function %icmp_ult_i64_mem_ext16(i64, i64) -> b1 {
 block0(v0: i64, v1: i64):
@@ -425,11 +461,12 @@ block0(v0: i64, v1: i64):
   return v3
 }
 
-; check:  llgh %r3, 0(%r3)
-; check:  clgr %r2, %r3
-; nextln: lhi %r2, 0
-; nextln: lochil %r2, 1
-; nextln: br %r14
+; block0:
+;   llgh %r4, 0(%r3)
+;   clgr %r2, %r4
+;   lhi %r2, 0
+;   lochil %r2, 1
+;   br %r14
 
 function %icmp_ult_i64_sym_ext16(i64) -> b1 {
   gv0 = symbol colocated %sym
@@ -440,10 +477,11 @@ block0(v0: i64):
   return v3
 }
 
-; check:  clghrl %r2, %sym + 0
-; nextln: lhi %r2, 0
-; nextln: lochil %r2, 1
-; nextln: br %r14
+; block0:
+;   clghrl %r2, %sym + 0
+;   lhi %r2, 0
+;   lochil %r2, 1
+;   br %r14
 
 function %icmp_ult_i32(i32, i32) -> b1 {
 block0(v0: i32, v1: i32):
@@ -451,10 +489,11 @@ block0(v0: i32, v1: i32):
   return v2
 }
 
-; check:  clr %r2, %r3
-; nextln: lhi %r2, 0
-; nextln: lochil %r2, 1
-; nextln: br %r14
+; block0:
+;   clr %r2, %r3
+;   lhi %r2, 0
+;   lochil %r2, 1
+;   br %r14
 
 function %icmp_ult_i32_imm(i32) -> b1 {
 block0(v0: i32):
@@ -463,10 +502,11 @@ block0(v0: i32):
   return v2
 }
 
-; check:  clfi %r2, 1
-; nextln: lhi %r2, 0
-; nextln: lochil %r2, 1
-; nextln: br %r14
+; block0:
+;   clfi %r2, 1
+;   lhi %r2, 0
+;   lochil %r2, 1
+;   br %r14
 
 function %icmp_ult_i32_mem(i32, i64) -> b1 {
 block0(v0: i32, v1: i64):
@@ -475,10 +515,11 @@ block0(v0: i32, v1: i64):
   return v3
 }
 
-; check:  cl %r2, 0(%r3)
-; nextln: lhi %r2, 0
-; nextln: lochil %r2, 1
-; nextln: br %r14
+; block0:
+;   cl %r2, 0(%r3)
+;   lhi %r2, 0
+;   lochil %r2, 1
+;   br %r14
 
 function %icmp_ult_i32_memoff(i32, i64) -> b1 {
 block0(v0: i32, v1: i64):
@@ -487,10 +528,11 @@ block0(v0: i32, v1: i64):
   return v3
 }
 
-; check:  cly %r2, 4096(%r3)
-; nextln: lhi %r2, 0
-; nextln: lochil %r2, 1
-; nextln: br %r14
+; block0:
+;   cly %r2, 4096(%r3)
+;   lhi %r2, 0
+;   lochil %r2, 1
+;   br %r14
 
 function %icmp_ult_i32_sym(i32) -> b1 {
   gv0 = symbol colocated %sym
@@ -501,10 +543,11 @@ block0(v0: i32):
   return v3
 }
 
-; check:  clrl %r2, %sym + 0
-; nextln: lhi %r2, 0
-; nextln: lochil %r2, 1
-; nextln: br %r14
+; block0:
+;   clrl %r2, %sym + 0
+;   lhi %r2, 0
+;   lochil %r2, 1
+;   br %r14
 
 function %icmp_ult_i32_mem_ext16(i32, i64) -> b1 {
 block0(v0: i32, v1: i64):
@@ -513,11 +556,12 @@ block0(v0: i32, v1: i64):
   return v3
 }
 
-; check:  llh %r3, 0(%r3)
-; check:  clr %r2, %r3
-; nextln: lhi %r2, 0
-; nextln: lochil %r2, 1
-; nextln: br %r14
+; block0:
+;   llh %r4, 0(%r3)
+;   clr %r2, %r4
+;   lhi %r2, 0
+;   lochil %r2, 1
+;   br %r14
 
 function %icmp_ult_i32_sym_ext16(i32) -> b1 {
   gv0 = symbol colocated %sym
@@ -528,10 +572,11 @@ block0(v0: i32):
   return v3
 }
 
-; check:  clhrl %r2, %sym + 0
-; nextln: lhi %r2, 0
-; nextln: lochil %r2, 1
-; nextln: br %r14
+; block0:
+;   clhrl %r2, %sym + 0
+;   lhi %r2, 0
+;   lochil %r2, 1
+;   br %r14
 
 function %icmp_ult_i16(i16, i16) -> b1 {
 block0(v0: i16, v1: i16):
@@ -539,12 +584,14 @@ block0(v0: i16, v1: i16):
   return v2
 }
 
-; check:  llhr %r2, %r2
-; nextln: llhr %r3, %r3
-; nextln: clr %r2, %r3
-; nextln: lhi %r2, 0
-; nextln: lochil %r2, 1
-; nextln: br %r14
+; block0:
+;   lgr %r5, %r3
+;   llhr %r3, %r2
+;   llhr %r5, %r5
+;   clr %r3, %r5
+;   lhi %r2, 0
+;   lochil %r2, 1
+;   br %r14
 
 function %icmp_ult_i16_imm(i16) -> b1 {
 block0(v0: i16):
@@ -553,11 +600,12 @@ block0(v0: i16):
   return v2
 }
 
-; check:  llhr %r2, %r2
-; nextln: clfi %r2, 1
-; nextln: lhi %r2, 0
-; nextln: lochil %r2, 1
-; nextln: br %r14
+; block0:
+;   llhr %r5, %r2
+;   clfi %r5, 1
+;   lhi %r2, 0
+;   lochil %r2, 1
+;   br %r14
 
 function %icmp_ult_i16_mem(i16, i64) -> b1 {
 block0(v0: i16, v1: i64):
@@ -566,12 +614,14 @@ block0(v0: i16, v1: i64):
   return v3
 }
 
-; check:  llhr %r2, %r2
-; nextln: llh %r3, 0(%r3)
-; nextln: clr %r2, %r3
-; nextln: lhi %r2, 0
-; nextln: lochil %r2, 1
-; nextln: br %r14
+; block0:
+;   lgr %r5, %r3
+;   llhr %r3, %r2
+;   llh %r5, 0(%r5)
+;   clr %r3, %r5
+;   lhi %r2, 0
+;   lochil %r2, 1
+;   br %r14
 
 function %icmp_ult_i16_mem(i16) -> b1 {
   gv0 = symbol colocated %sym
@@ -582,11 +632,12 @@ block0(v0: i16):
   return v3
 }
 
-; check:  llhr %r2, %r2
-; nextln: clhrl %r2, %sym + 0
-; nextln: lhi %r2, 0
-; nextln: lochil %r2, 1
-; nextln: br %r14
+; block0:
+;   llhr %r5, %r2
+;   clhrl %r5, %sym + 0
+;   lhi %r2, 0
+;   lochil %r2, 1
+;   br %r14
 
 function %icmp_ult_i8(i8, i8) -> b1 {
 block0(v0: i8, v1: i8):
@@ -594,12 +645,14 @@ block0(v0: i8, v1: i8):
   return v2
 }
 
-; check:  llcr %r2, %r2
-; nextln: llcr %r3, %r3
-; nextln: clr %r2, %r3
-; nextln: lhi %r2, 0
-; nextln: lochil %r2, 1
-; nextln: br %r14
+; block0:
+;   lgr %r5, %r3
+;   llcr %r3, %r2
+;   llcr %r5, %r5
+;   clr %r3, %r5
+;   lhi %r2, 0
+;   lochil %r2, 1
+;   br %r14
 
 function %icmp_ult_i8_imm(i8) -> b1 {
 block0(v0: i8):
@@ -608,11 +661,12 @@ block0(v0: i8):
   return v2
 }
 
-; check:  llcr %r2, %r2
-; nextln: clfi %r2, 1
-; nextln: lhi %r2, 0
-; nextln: lochil %r2, 1
-; nextln: br %r14
+; block0:
+;   llcr %r5, %r2
+;   clfi %r5, 1
+;   lhi %r2, 0
+;   lochil %r2, 1
+;   br %r14
 
 function %icmp_ult_i8_mem(i8, i64) -> b1 {
 block0(v0: i8, v1: i64):
@@ -621,10 +675,12 @@ block0(v0: i8, v1: i64):
   return v3
 }
 
-; check:  llcr %r2, %r2
-; nextln: llc %r3, 0(%r3)
-; nextln: clr %r2, %r3
-; nextln: lhi %r2, 0
-; nextln: lochil %r2, 1
-; nextln: br %r14
+; block0:
+;   lgr %r5, %r3
+;   llcr %r3, %r2
+;   llc %r5, 0(%r5)
+;   clr %r3, %r5
+;   lhi %r2, 0
+;   lochil %r2, 1
+;   br %r14
 
diff --git a/cranelift/filetests/filetests/isa/s390x/jumptable.clif b/cranelift/filetests/filetests/isa/s390x/jumptable.clif
index 3517efcf53..69603266cf 100644
--- a/cranelift/filetests/filetests/isa/s390x/jumptable.clif
+++ b/cranelift/filetests/filetests/isa/s390x/jumptable.clif
@@ -1,4 +1,4 @@
-test compile
+test compile precise-output
 target s390x
 
 function %f(i64) -> i64 {
@@ -28,24 +28,32 @@ block5(v5: i64):
   return v6
 }
 
-; check:  clgfi %r2, 3
-; nextln: jghe label1
-; nextln: sllg %r3, %r2, 2
-; nextln: larl %r1, 14 ; agf %r1, 0(%r1, %r3) ; br %r1 ; jt_entries label3 label5 label7
-
-; check:  Block 3
-; check:  lghi %r3, 1
-; nextln: jg
-
-; check:  Block 5
-; check:  lghi %r3, 2
-; nextln: jg
-
-; check:  Block 7
-; check:  lghi %r3, 3
-; nextln: jg
-
-; check:  agr %r2, %r3
-; nextln: br %r14
-
+; block0:
+;   clgfi %r2, 3
+;   jghe label1
+;   sllg %r5, %r2, 2
+;   larl %r1, 14 ; agf %r1, 0(%r1, %r5) ; br %r1 ; jt_entries label3 label5 label7
+; block1:
+;   lghi %r4, 4
+;   jg label2
+; block2:
+;   jg label9
+; block3:
+;   lghi %r4, 1
+;   jg label4
+; block4:
+;   jg label9
+; block5:
+;   lghi %r4, 2
+;   jg label6
+; block6:
+;   jg label9
+; block7:
+;   lghi %r4, 3
+;   jg label8
+; block8:
+;   jg label9
+; block9:
+;   agr %r2, %r4
+;   br %r14
 
diff --git a/cranelift/filetests/filetests/isa/s390x/load-little.clif b/cranelift/filetests/filetests/isa/s390x/load-little.clif
index 6561863d73..876e929f77 100644
--- a/cranelift/filetests/filetests/isa/s390x/load-little.clif
+++ b/cranelift/filetests/filetests/isa/s390x/load-little.clif
@@ -1,4 +1,4 @@
-test compile
+test compile precise-output
 target s390x
 
 function %load_i64(i64) -> i64 {
@@ -7,8 +7,9 @@ block0(v0: i64):
   return v1
 }
 
-; check:  lrvg %r2, 0(%r2)
-; nextln: br %r14
+; block0:
+;   lrvg %r2, 0(%r2)
+;   br %r14
 
 function %load_i64_sym() -> i64 {
   gv0 = symbol colocated %sym
@@ -18,8 +19,9 @@ block0:
   return v1
 }
 
-; check:  larl %r1, %sym + 0 ; lrvg %r2, 0(%r1)
-; nextln: br %r14
+; block0:
+;   larl %r1, %sym + 0 ; lrvg %r2, 0(%r1)
+;   br %r14
 
 function %uload8_i64(i64) -> i64 {
 block0(v0: i64):
@@ -27,8 +29,9 @@ block0(v0: i64):
   return v1
 }
 
-; check:  llgc %r2, 0(%r2)
-; nextln: br %r14
+; block0:
+;   llgc %r2, 0(%r2)
+;   br %r14
 
 function %sload8_i64(i64) -> i64 {
 block0(v0: i64):
@@ -36,8 +39,9 @@ block0(v0: i64):
   return v1
 }
 
-; check:  lgb %r2, 0(%r2)
-; nextln: br %r14
+; block0:
+;   lgb %r2, 0(%r2)
+;   br %r14
 
 function %uload16_i64(i64) -> i64 {
 block0(v0: i64):
@@ -45,9 +49,10 @@ block0(v0: i64):
   return v1
 }
 
-; check:  lrvh %r2, 0(%r2)
-; nextln: llghr %r2, %r2
-; nextln: br %r14
+; block0:
+;   lrvh %r5, 0(%r2)
+;   llghr %r2, %r5
+;   br %r14
 
 function %uload16_i64_sym() -> i64 {
   gv0 = symbol colocated %sym
@@ -57,9 +62,10 @@ block0:
   return v1
 }
 
-; check:  larl %r1, %sym + 0 ; lrvh %r2, 0(%r1)
-; nextln: llghr %r2, %r2
-; nextln: br %r14
+; block0:
+;   larl %r1, %sym + 0 ; lrvh %r3, 0(%r1)
+;   llghr %r2, %r3
+;   br %r14
 
 function %sload16_i64(i64) -> i64 {
 block0(v0: i64):
@@ -67,9 +73,10 @@ block0(v0: i64):
   return v1
 }
 
-; check:  lrvh %r2, 0(%r2)
-; nextln: lghr %r2, %r2
-; nextln: br %r14
+; block0:
+;   lrvh %r5, 0(%r2)
+;   lghr %r2, %r5
+;   br %r14
 
 function %sload16_i64_sym() -> i64 {
   gv0 = symbol colocated %sym
@@ -79,9 +86,10 @@ block0:
   return v1
 }
 
-; check:  larl %r1, %sym + 0 ; lrvh %r2, 0(%r1)
-; nextln: lghr %r2, %r2
-; nextln: br %r14
+; block0:
+;   larl %r1, %sym + 0 ; lrvh %r3, 0(%r1)
+;   lghr %r2, %r3
+;   br %r14
 
 function %uload32_i64(i64) -> i64 {
 block0(v0: i64):
@@ -89,9 +97,10 @@ block0(v0: i64):
   return v1
 }
 
-; check:  lrv %r2, 0(%r2)
-; nextln: llgfr %r2, %r2
-; nextln: br %r14
+; block0:
+;   lrv %r5, 0(%r2)
+;   llgfr %r2, %r5
+;   br %r14
 
 function %uload32_i64_sym() -> i64 {
   gv0 = symbol colocated %sym
@@ -101,9 +110,10 @@ block0:
   return v1
 }
 
-; check:  larl %r1, %sym + 0 ; lrv %r2, 0(%r1)
-; nextln: llgfr %r2, %r2
-; nextln: br %r14
+; block0:
+;   larl %r1, %sym + 0 ; lrv %r3, 0(%r1)
+;   llgfr %r2, %r3
+;   br %r14
 
 function %sload32_i64(i64) -> i64 {
 block0(v0: i64):
@@ -111,9 +121,10 @@ block0(v0: i64):
   return v1
 }
 
-; check:  lrv %r2, 0(%r2)
-; nextln: lgfr %r2, %r2
-; nextln: br %r14
+; block0:
+;   lrv %r5, 0(%r2)
+;   lgfr %r2, %r5
+;   br %r14
 
 function %sload32_i64_sym() -> i64 {
   gv0 = symbol colocated %sym
@@ -123,9 +134,10 @@ block0:
   return v1
 }
 
-; check:  larl %r1, %sym + 0 ; lrv %r2, 0(%r1)
-; nextln: lgfr %r2, %r2
-; nextln: br %r14
+; block0:
+;   larl %r1, %sym + 0 ; lrv %r3, 0(%r1)
+;   lgfr %r2, %r3
+;   br %r14
 
 function %load_i32(i64) -> i32 {
 block0(v0: i64):
@@ -133,8 +145,9 @@ block0(v0: i64):
   return v1
 }
 
-; check:  lrv %r2, 0(%r2)
-; nextln: br %r14
+; block0:
+;   lrv %r2, 0(%r2)
+;   br %r14
 
 function %load_i32_sym() -> i32 {
   gv0 = symbol colocated %sym
@@ -144,8 +157,9 @@ block0:
   return v1
 }
 
-; check:  larl %r1, %sym + 0 ; lrv %r2, 0(%r1)
-; nextln: br %r14
+; block0:
+;   larl %r1, %sym + 0 ; lrv %r2, 0(%r1)
+;   br %r14
 
 function %uload8_i32(i64) -> i32 {
 block0(v0: i64):
@@ -153,8 +167,9 @@ block0(v0: i64):
   return v1
 }
 
-; check:  llc %r2, 0(%r2)
-; nextln: br %r14
+; block0:
+;   llc %r2, 0(%r2)
+;   br %r14
 
 function %sload8_i32(i64) -> i32 {
 block0(v0: i64):
@@ -162,8 +177,9 @@ block0(v0: i64):
   return v1
 }
 
-; check:  lb %r2, 0(%r2)
-; nextln: br %r14
+; block0:
+;   lb %r2, 0(%r2)
+;   br %r14
 
 function %uload16_i32(i64) -> i32 {
 block0(v0: i64):
@@ -171,9 +187,10 @@ block0(v0: i64):
   return v1
 }
 
-; check:  lrvh %r2, 0(%r2)
-; nextln: llhr %r2, %r2
-; nextln: br %r14
+; block0:
+;   lrvh %r5, 0(%r2)
+;   llhr %r2, %r5
+;   br %r14
 
 function %uload16_i32_sym() -> i32 {
   gv0 = symbol colocated %sym
@@ -183,9 +200,10 @@ block0:
   return v1
 }
 
-; check:  larl %r1, %sym + 0 ; lrvh %r2, 0(%r1)
-; nextln: llhr %r2, %r2
-; nextln: br %r14
+; block0:
+;   larl %r1, %sym + 0 ; lrvh %r3, 0(%r1)
+;   llhr %r2, %r3
+;   br %r14
 
 function %sload16_i32(i64) -> i32 {
 block0(v0: i64):
@@ -193,9 +211,10 @@ block0(v0: i64):
   return v1
 }
 
-; check:  lrvh %r2, 0(%r2)
-; nextln: lhr %r2, %r2
-; nextln: br %r14
+; block0:
+;   lrvh %r5, 0(%r2)
+;   lhr %r2, %r5
+;   br %r14
 
 function %sload16_i32_sym() -> i32 {
   gv0 = symbol colocated %sym
@@ -205,9 +224,10 @@ block0:
   return v1
 }
 
-; check:  larl %r1, %sym + 0 ; lrvh %r2, 0(%r1)
-; nextln: lhr %r2, %r2
-; nextln: br %r14
+; block0:
+;   larl %r1, %sym + 0 ; lrvh %r3, 0(%r1)
+;   lhr %r2, %r3
+;   br %r14
 
 function %load_i16(i64) -> i16 {
 block0(v0: i64):
@@ -215,8 +235,9 @@ block0(v0: i64):
   return v1
 }
 
-; check:  lrvh %r2, 0(%r2)
-; nextln: br %r14
+; block0:
+;   lrvh %r2, 0(%r2)
+;   br %r14
 
 function %load_i16_sym() -> i16 {
   gv0 = symbol colocated %sym
@@ -226,8 +247,9 @@ block0:
   return v1
 }
 
-; check:  larl %r1, %sym + 0 ; lrvh %r2, 0(%r1)
-; nextln: br %r14
+; block0:
+;   larl %r1, %sym + 0 ; lrvh %r2, 0(%r1)
+;   br %r14
 
 function %uload8_i16(i64) -> i16 {
 block0(v0: i64):
@@ -235,8 +257,9 @@ block0(v0: i64):
   return v1
 }
 
-; check:  llc %r2, 0(%r2)
-; nextln: br %r14
+; block0:
+;   llc %r2, 0(%r2)
+;   br %r14
 
 function %sload8_i16(i64) -> i16 {
 block0(v0: i64):
@@ -244,8 +267,9 @@ block0(v0: i64):
   return v1
 }
 
-; check:  lb %r2, 0(%r2)
-; nextln: br %r14
+; block0:
+;   lb %r2, 0(%r2)
+;   br %r14
 
 function %load_i8(i64) -> i8 {
 block0(v0: i64):
@@ -253,6 +277,7 @@ block0(v0: i64):
   return v1
 }
 
-; check:  llc %r2, 0(%r2)
-; nextln: br %r14
+; block0:
+;   llc %r2, 0(%r2)
+;   br %r14
 
diff --git a/cranelift/filetests/filetests/isa/s390x/load.clif b/cranelift/filetests/filetests/isa/s390x/load.clif
index 8d46fe0867..1d0a4a10c7 100644
--- a/cranelift/filetests/filetests/isa/s390x/load.clif
+++ b/cranelift/filetests/filetests/isa/s390x/load.clif
@@ -1,4 +1,4 @@
-test compile
+test compile precise-output
 target s390x
 
 function %load_i64(i64) -> i64 {
@@ -7,8 +7,9 @@ block0(v0: i64):
   return v1
 }
 
-; check:  lg %r2, 0(%r2)
-; nextln: br %r14
+; block0:
+;   lg %r2, 0(%r2)
+;   br %r14
 
 function %load_i64_sym() -> i64 {
   gv0 = symbol colocated %sym
@@ -18,8 +19,9 @@ block0:
   return v1
 }
 
-; check:  lgrl %r2, %sym + 0
-; nextln: br %r14
+; block0:
+;   lgrl %r2, %sym + 0
+;   br %r14
 
 function %uload8_i64(i64) -> i64 {
 block0(v0: i64):
@@ -27,8 +29,9 @@ block0(v0: i64):
   return v1
 }
 
-; check:  llgc %r2, 0(%r2)
-; nextln: br %r14
+; block0:
+;   llgc %r2, 0(%r2)
+;   br %r14
 
 function %sload8_i64(i64) -> i64 {
 block0(v0: i64):
@@ -36,8 +39,9 @@ block0(v0: i64):
   return v1
 }
 
-; check:  lgb %r2, 0(%r2)
-; nextln: br %r14
+; block0:
+;   lgb %r2, 0(%r2)
+;   br %r14
 
 function %uload16_i64(i64) -> i64 {
 block0(v0: i64):
@@ -45,8 +49,9 @@ block0(v0: i64):
   return v1
 }
 
-; check:  llgh %r2, 0(%r2)
-; nextln: br %r14
+; block0:
+;   llgh %r2, 0(%r2)
+;   br %r14
 
 function %uload16_i64_sym() -> i64 {
   gv0 = symbol colocated %sym
@@ -56,8 +61,9 @@ block0:
   return v1
 }
 
-; check:  llghrl %r2, %sym + 0
-; nextln: br %r14
+; block0:
+;   llghrl %r2, %sym + 0
+;   br %r14
 
 function %sload16_i64(i64) -> i64 {
 block0(v0: i64):
@@ -65,8 +71,9 @@ block0(v0: i64):
   return v1
 }
 
-; check:  lgh %r2, 0(%r2)
-; nextln: br %r14
+; block0:
+;   lgh %r2, 0(%r2)
+;   br %r14
 
 function %sload16_i64_sym() -> i64 {
   gv0 = symbol colocated %sym
@@ -76,8 +83,9 @@ block0:
   return v1
 }
 
-; check:  lghrl %r2, %sym + 0
-; nextln: br %r14
+; block0:
+;   lghrl %r2, %sym + 0
+;   br %r14
 
 function %uload32_i64(i64) -> i64 {
 block0(v0: i64):
@@ -85,8 +93,9 @@ block0(v0: i64):
   return v1
 }
 
-; check:  llgf %r2, 0(%r2)
-; nextln: br %r14
+; block0:
+;   llgf %r2, 0(%r2)
+;   br %r14
 
 function %uload32_i64_sym() -> i64 {
   gv0 = symbol colocated %sym
@@ -96,8 +105,9 @@ block0:
   return v1
 }
 
-; check:  llgfrl %r2, %sym + 0
-; nextln: br %r14
+; block0:
+;   llgfrl %r2, %sym + 0
+;   br %r14
 
 function %sload32_i64(i64) -> i64 {
 block0(v0: i64):
@@ -105,8 +115,9 @@ block0(v0: i64):
   return v1
 }
 
-; check:  lgf %r2, 0(%r2)
-; nextln: br %r14
+; block0:
+;   lgf %r2, 0(%r2)
+;   br %r14
 
 function %sload32_i64_sym() -> i64 {
   gv0 = symbol colocated %sym
@@ -116,8 +127,9 @@ block0:
   return v1
 }
 
-; check:  lgfrl %r2, %sym + 0
-; nextln: br %r14
+; block0:
+;   lgfrl %r2, %sym + 0
+;   br %r14
 
 function %load_i32(i64) -> i32 {
 block0(v0: i64):
@@ -125,8 +137,9 @@ block0(v0: i64):
   return v1
 }
 
-; check:  l %r2, 0(%r2)
-; nextln: br %r14
+; block0:
+;   l %r2, 0(%r2)
+;   br %r14
 
 function %load_i32_sym() -> i32 {
   gv0 = symbol colocated %sym
@@ -136,8 +149,9 @@ block0:
   return v1
 }
 
-; check:  lrl %r2, %sym + 0
-; nextln: br %r14
+; block0:
+;   lrl %r2, %sym + 0
+;   br %r14
 
 function %load_i32_off(i64) -> i32 {
 block0(v0: i64):
@@ -145,8 +159,9 @@ block0(v0: i64):
   return v1
 }
 
-; check:  ly %r2, 4096(%r2)
-; nextln: br %r14
+; block0:
+;   ly %r2, 4096(%r2)
+;   br %r14
 
 function %uload8_i32(i64) -> i32 {
 block0(v0: i64):
@@ -154,8 +169,9 @@ block0(v0: i64):
   return v1
 }
 
-; check:  llc %r2, 0(%r2)
-; nextln: br %r14
+; block0:
+;   llc %r2, 0(%r2)
+;   br %r14
 
 function %sload8_i32(i64) -> i32 {
 block0(v0: i64):
@@ -163,8 +179,9 @@ block0(v0: i64):
   return v1
 }
 
-; check:  lb %r2, 0(%r2)
-; nextln: br %r14
+; block0:
+;   lb %r2, 0(%r2)
+;   br %r14
 
 function %uload16_i32(i64) -> i32 {
 block0(v0: i64):
@@ -172,8 +189,9 @@ block0(v0: i64):
   return v1
 }
 
-; check:  llh %r2, 0(%r2)
-; nextln: br %r14
+; block0:
+;   llh %r2, 0(%r2)
+;   br %r14
 
 function %uload16_i32_sym() -> i32 {
   gv0 = symbol colocated %sym
@@ -183,8 +201,9 @@ block0:
   return v1
 }
 
-; check:  llhrl %r2, %sym + 0
-; nextln: br %r14
+; block0:
+;   llhrl %r2, %sym + 0
+;   br %r14
 
 function %sload16_i32(i64) -> i32 {
 block0(v0: i64):
@@ -192,8 +211,9 @@ block0(v0: i64):
   return v1
 }
 
-; check:  lh %r2, 0(%r2)
-; nextln: br %r14
+; block0:
+;   lh %r2, 0(%r2)
+;   br %r14
 
 function %sload16_i32_off(i64) -> i32 {
 block0(v0: i64):
@@ -201,8 +221,9 @@ block0(v0: i64):
   return v1
 }
 
-; check:  lhy %r2, 4096(%r2)
-; nextln: br %r14
+; block0:
+;   lhy %r2, 4096(%r2)
+;   br %r14
 
 function %sload16_i32_sym() -> i32 {
   gv0 = symbol colocated %sym
@@ -212,8 +233,9 @@ block0:
   return v1
 }
 
-; check:  lhrl %r2, %sym + 0
-; nextln: br %r14
+; block0:
+;   lhrl %r2, %sym + 0
+;   br %r14
 
 function %load_i16(i64) -> i16 {
 block0(v0: i64):
@@ -221,8 +243,9 @@ block0(v0: i64):
   return v1
 }
 
-; check:  llh %r2, 0(%r2)
-; nextln: br %r14
+; block0:
+;   llh %r2, 0(%r2)
+;   br %r14
 
 function %load_i16_sym() -> i16 {
   gv0 = symbol colocated %sym
@@ -232,8 +255,9 @@ block0:
   return v1
 }
 
-; check:  llhrl %r2, %sym + 0
-; nextln: br %r14
+; block0:
+;   llhrl %r2, %sym + 0
+;   br %r14
 
 function %uload8_i16(i64) -> i16 {
 block0(v0: i64):
@@ -241,8 +265,9 @@ block0(v0: i64):
   return v1
 }
 
-; check:  llc %r2, 0(%r2)
-; nextln: br %r14
+; block0:
+;   llc %r2, 0(%r2)
+;   br %r14
 
 function %sload8_i16(i64) -> i16 {
 block0(v0: i64):
@@ -250,8 +275,9 @@ block0(v0: i64):
   return v1
 }
 
-; check:  lb %r2, 0(%r2)
-; nextln: br %r14
+; block0:
+;   lb %r2, 0(%r2)
+;   br %r14
 
 function %load_i8(i64) -> i8 {
 block0(v0: i64):
@@ -259,6 +285,7 @@ block0(v0: i64):
   return v1
 }
 
-; check:  llc %r2, 0(%r2)
-; nextln: br %r14
+; block0:
+;   llc %r2, 0(%r2)
+;   br %r14
 
diff --git a/cranelift/filetests/filetests/isa/s390x/multivalue-ret.clif b/cranelift/filetests/filetests/isa/s390x/multivalue-ret.clif
index d9197d3072..896f9a49d2 100644
--- a/cranelift/filetests/filetests/isa/s390x/multivalue-ret.clif
+++ b/cranelift/filetests/filetests/isa/s390x/multivalue-ret.clif
@@ -1,4 +1,4 @@
-test compile
+test compile precise-output
 target s390x
 
 ;; Test default (non-SpiderMonkey) ABI.
@@ -11,11 +11,12 @@ block1:
   return v0, v1, v2, v3
 }
 
-; check:  lghi %r2, 1
-; nextln: lghi %r3, 2
-; nextln: lghi %r4, 3
-; nextln: lghi %r5, 4
-; nextln: br %r14
+; block0:
+;   lghi %r2, 1
+;   lghi %r3, 2
+;   lghi %r4, 3
+;   lghi %r5, 4
+;   br %r14
 
 function %f1() -> i64, i64, i64, i64, i64, i64 {
 block1:
@@ -28,20 +29,20 @@ block1:
   return v0, v1, v2, v3, v4, v5
 }
 
-; check:  stmg %r12, %r15, 96(%r15)
-; nextln: lgr %r14, %r2
-; nextln: lghi %r2, 1
-; nextln: lghi %r3, 2
-; nextln: lghi %r4, 3
-; nextln: lghi %r5, 4
-; nextln: lghi %r13, 5
-; nextln: lghi %r12, 6
-; nextln: stg %r13, 0(%r14)
-; nextln: stg %r12, 8(%r14)
-; nextln: lmg %r12, %r15, 96(%r15)
-; nextln: br %r14
+;   stmg %r6, %r15, 48(%r15)
+; block0:
+;   lgr %r12, %r2
+;   lghi %r2, 1
+;   lghi %r3, 2
+;   lghi %r4, 3
+;   lghi %r5, 4
+;   lghi %r10, 5
+;   lghi %r6, 6
+;   stg %r10, 0(%r12)
+;   stg %r6, 8(%r12)
+;   lmg %r6, %r15, 48(%r15)
+;   br %r14
 
-;; Test default (non-SpiderMonkey) ABI.
 function %f3() -> f64, f64, f64, f64 {
 block1:
   v0 = f64const 0x0.0
@@ -51,11 +52,12 @@ block1:
   return v0, v1, v2, v3
 }
 
-; check:  bras %r1, 12 ; data.f64 0 ; ld %f0, 0(%r1)
-; nextln: bras %r1, 12 ; data.f64 1 ; ld %f2, 0(%r1)
-; nextln: bras %r1, 12 ; data.f64 2 ; ld %f4, 0(%r1)
-; nextln: bras %r1, 12 ; data.f64 3 ; ld %f6, 0(%r1)
-; nextln: br %r14
+; block0:
+;   bras %r1, 12 ; data.f64 0 ; ld %f0, 0(%r1)
+;   bras %r1, 12 ; data.f64 1 ; ld %f2, 0(%r1)
+;   bras %r1, 12 ; data.f64 2 ; ld %f4, 0(%r1)
+;   bras %r1, 12 ; data.f64 3 ; ld %f6, 0(%r1)
+;   br %r14
 
 function %f4() -> f64, f64, f64, f64, f64, f64 {
 block1:
@@ -68,12 +70,14 @@ block1:
   return v0, v1, v2, v3, v4, v5
 }
 
-; check:  bras %r1, 12 ; data.f64 0 ; ld %f0, 0(%r1)
-; nextln: bras %r1, 12 ; data.f64 1 ; ld %f2, 0(%r1)
-; nextln: bras %r1, 12 ; data.f64 2 ; ld %f4, 0(%r1)
-; nextln: bras %r1, 12 ; data.f64 3 ; ld %f6, 0(%r1)
-; nextln: bras %r1, 12 ; data.f64 4 ; ld %f1, 0(%r1)
-; nextln: bras %r1, 12 ; data.f64 5 ; ld %f3, 0(%r1)
-; nextln: std %f1, 0(%r2)
-; nextln: std %f3, 8(%r2)
-; nextln: br %r14
+; block0:
+;   bras %r1, 12 ; data.f64 0 ; ld %f0, 0(%r1)
+;   bras %r1, 12 ; data.f64 1 ; ld %f2, 0(%r1)
+;   bras %r1, 12 ; data.f64 2 ; ld %f4, 0(%r1)
+;   bras %r1, 12 ; data.f64 3 ; ld %f6, 0(%r1)
+;   bras %r1, 12 ; data.f64 4 ; ld %f5, 0(%r1)
+;   bras %r1, 12 ; data.f64 5 ; ld %f7, 0(%r1)
+;   std %f5, 0(%r2)
+;   std %f7, 8(%r2)
+;   br %r14
+
diff --git a/cranelift/filetests/filetests/isa/s390x/reftypes.clif b/cranelift/filetests/filetests/isa/s390x/reftypes.clif
index 0ffdcab20e..adb8c53f0c 100644
--- a/cranelift/filetests/filetests/isa/s390x/reftypes.clif
+++ b/cranelift/filetests/filetests/isa/s390x/reftypes.clif
@@ -1,4 +1,4 @@
-test compile
+test compile precise-output
 target s390x
 
 function %f0(r64, r64) -> r64 {
@@ -6,8 +6,9 @@ block0(v0: r64, v1: r64):
   return v1
 }
 
-; check:  lgr %r2, %r3
-; nextln: br %r14
+; block0:
+;   lgr %r2, %r3
+;   br %r14
 
 function %f1(r64) -> b1 {
 block0(v0: r64):
@@ -15,10 +16,11 @@ block0(v0: r64):
   return v1
 }
 
-; check:  cghi %r2, 0
-; nextln: lhi %r2, 0
-; nextln: lochie %r2, 1
-; nextln: br %r14
+; block0:
+;   cghi %r2, 0
+;   lhi %r2, 0
+;   lochie %r2, 1
+;   br %r14
 
 function %f2(r64) -> b1 {
 block0(v0: r64):
@@ -26,10 +28,11 @@ block0(v0: r64):
   return v1
 }
 
-; check:  cghi %r2, -1
-; nextln: lhi %r2, 0
-; nextln: lochie %r2, 1
-; nextln: br %r14
+; block0:
+;   cghi %r2, -1
+;   lhi %r2, 0
+;   lochie %r2, 1
+;   br %r14
 
 function %f3() -> r64 {
 block0:
@@ -37,8 +40,9 @@ block0:
   return v0
 }
 
-; check:  lghi %r2, 0
-; nextln: br %r14
+; block0:
+;   lghi %r2, 0
+;   br %r14
 
 function %f4(r64, r64) -> r64, r64, r64 {
     fn0 = %f(r64) -> b1
@@ -61,41 +65,36 @@ block3(v7: r64, v8: r64):
     return v7, v8, v9
 }
 
-; check: Block 0:
-; check:  stmg %r12, %r15, 96(%r15)
-; nextln: aghi %r15, -192
-; nextln: virtual_sp_offset_adjust 160
-; nextln: lgr %r13, %r2
-; nextln: lgr %r12, %r3
-; nextln: lgr %r2, %r13
-; nextln: bras %r1, 12 ; data %f + 0 ; lg %r3, 0(%r1)
-; nextln: stg %r2, 168(%r15)
-; nextln: stg %r13, 176(%r15)
-; nextln: stg %r12, 184(%r15)
-; nextln: (safepoint: slots [S0, S1, S2]
-; nextln: basr %r14, %r3
-; nextln: lg %r13, 176(%r15)
-; nextln: lg %r12, 184(%r15)
-; nextln: la %r3, 160(%r15)
-; nextln: stg %r13, 0(%r3)
-; nextln: llcr %r2, %r2
-; nextln: chi %r2, 0
-; nextln: jgnlh label1 ; jg label3
-; check: Block 1:
-; check:  jg label2
-; check: Block 2:
-; check:  lgr %r2, %r12
-; nextln: jg label5
-; check: Block 3:
-; check:  jg label4
-; check: Block 4:
-; check:  lgr %r2, %r13
-; nextln: lgr %r13, %r12
-; nextln: jg label5
-; check: Block 5:
-; check:  la %r3, 160(%r15)
-; nextln: lg %r3, 0(%r3)
-; nextln: lgr %r4, %r3
-; nextln: lgr %r3, %r13
-; nextln: lmg %r12, %r15, 288(%r15)
-; nextln: br %r14
+;   stmg %r14, %r15, 112(%r15)
+;   aghi %r15, -184
+;   virtual_sp_offset_adjust 160
+; block0:
+;   lgr %r4, %r3
+;   lgr %r3, %r2
+;   bras %r1, 12 ; data %f + 0 ; lg %r5, 0(%r1)
+;   stg %r3, 168(%r15)
+;   stg %r4, 176(%r15)
+;   basr %r14, %r5
+;   lg %r3, 168(%r15)
+;   la %r4, 160(%r15)
+;   stg %r3, 0(%r4)
+;   llcr %r4, %r2
+;   chi %r4, 0
+;   jgnlh label1 ; jg label3
+; block1:
+;   jg label2
+; block2:
+;   lg %r2, 176(%r15)
+;   jg label5
+; block3:
+;   jg label4
+; block4:
+;   lgr %r2, %r3
+;   lg %r3, 176(%r15)
+;   jg label5
+; block5:
+;   la %r4, 160(%r15)
+;   lg %r4, 0(%r4)
+;   lmg %r14, %r15, 296(%r15)
+;   br %r14
+
diff --git a/cranelift/filetests/filetests/isa/s390x/saturating-ops.clif b/cranelift/filetests/filetests/isa/s390x/saturating-ops.clif
index 193a02eaad..21c328e4ce 100644
--- a/cranelift/filetests/filetests/isa/s390x/saturating-ops.clif
+++ b/cranelift/filetests/filetests/isa/s390x/saturating-ops.clif
@@ -1,12 +1,16 @@
-test compile
+test compile precise-output
 target s390x
 
 ; FIXME: not yet supported
 
 function %uaddsat64(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
-;  v2 = uadd_sat.i64 v0, v1
+;;SKIP  v2 = uadd_sat.i64 v0, v1
   v2 = iconst.i64 0
   return v2
 }
 
+; block0:
+;   lghi %r2, 0
+;   br %r14
+
diff --git a/cranelift/filetests/filetests/isa/s390x/shift-rotate.clif b/cranelift/filetests/filetests/isa/s390x/shift-rotate.clif
index 0066bef498..552dfa62e8 100644
--- a/cranelift/filetests/filetests/isa/s390x/shift-rotate.clif
+++ b/cranelift/filetests/filetests/isa/s390x/shift-rotate.clif
@@ -1,4 +1,4 @@
-test compile
+test compile precise-output
 target s390x
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -11,9 +11,10 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  lcgr %r3, %r3
-; nextln: rllg %r2, %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   lcgr %r3, %r3
+;   rllg %r2, %r2, 0(%r3)
+;   br %r14
 
 function %rotr_i64_imm(i64) -> i64 {
 block0(v0: i64):
@@ -22,8 +23,9 @@ block0(v0: i64):
   return v2
 }
 
-; check:  rllg %r2, %r2, 47
-; nextln: br %r14
+; block0:
+;   rllg %r2, %r2, 47
+;   br %r14
 
 function %rotr_i32_reg(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
@@ -31,9 +33,10 @@ block0(v0: i32, v1: i32):
   return v2
 }
 
-; check:  lcr %r3, %r3
-; nextln: rll %r2, %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   lcr %r3, %r3
+;   rll %r2, %r2, 0(%r3)
+;   br %r14
 
 function %rotr_i32_imm(i32) -> i32 {
 block0(v0: i32):
@@ -42,8 +45,9 @@ block0(v0: i32):
   return v2
 }
 
-; check:  rll %r2, %r2, 15
-; nextln: br %r14
+; block0:
+;   rll %r2, %r2, 15
+;   br %r14
 
 function %rotr_i16_reg(i16, i16) -> i16 {
 block0(v0: i16, v1: i16):
@@ -51,14 +55,15 @@ block0(v0: i16, v1: i16):
   return v2
 }
 
-; check:  llhr %r2, %r2
-; nextln: lcr %r4, %r3
-; nextln: nill %r3, 15
-; nextln: nill %r4, 15
-; nextln: sllk %r4, %r2, 0(%r4)
-; nextln: srlk %r2, %r2, 0(%r3)
-; nextln: ork %r2, %r4, %r2
-; nextln: br %r14
+; block0:
+;   llhr %r4, %r2
+;   lcr %r5, %r3
+;   nill %r3, 15
+;   nill %r5, 15
+;   sllk %r5, %r4, 0(%r5)
+;   srlk %r3, %r4, 0(%r3)
+;   ork %r2, %r5, %r3
+;   br %r14
 
 function %rotr_i16_imm(i16) -> i16 {
 block0(v0: i16):
@@ -67,11 +72,12 @@ block0(v0: i16):
   return v2
 }
 
-; check:  llhr %r2, %r2
-; nextln: sllk %r3, %r2, 6
-; nextln: srlk %r2, %r2, 10
-; nextln: ork %r2, %r3, %r2
-; nextln: br %r14
+; block0:
+;   llhr %r5, %r2
+;   sllk %r3, %r5, 6
+;   srlk %r5, %r5, 10
+;   ork %r2, %r3, %r5
+;   br %r14
 
 function %rotr_i8_reg(i8, i8) -> i8 {
 block0(v0: i8, v1: i8):
@@ -79,14 +85,15 @@ block0(v0: i8, v1: i8):
   return v2
 }
 
-; check:  llcr %r2, %r2
-; nextln: lcr %r4, %r3
-; nextln: nill %r3, 7
-; nextln: nill %r4, 7
-; nextln: sllk %r4, %r2, 0(%r4)
-; nextln: srlk %r2, %r2, 0(%r3)
-; nextln: ork %r2, %r4, %r2
-; nextln: br %r14
+; block0:
+;   llcr %r4, %r2
+;   lcr %r5, %r3
+;   nill %r3, 7
+;   nill %r5, 7
+;   sllk %r5, %r4, 0(%r5)
+;   srlk %r3, %r4, 0(%r3)
+;   ork %r2, %r5, %r3
+;   br %r14
 
 function %rotr_i8_imm(i8) -> i8 {
 block0(v0: i8):
@@ -95,15 +102,12 @@ block0(v0: i8):
   return v2
 }
 
-; check:  llcr %r2, %r2
-; nextln: sllk %r3, %r2, 5
-; nextln: srlk %r2, %r2, 3
-; nextln: ork %r2, %r3, %r2
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; ROTL
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   llcr %r5, %r2
+;   sllk %r3, %r5, 5
+;   srlk %r5, %r5, 3
+;   ork %r2, %r3, %r5
+;   br %r14
 
 function %rotl_i64_reg(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -111,8 +115,9 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  rllg %r2, %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   rllg %r2, %r2, 0(%r3)
+;   br %r14
 
 function %rotl_i64_imm(i64) -> i64 {
 block0(v0: i64):
@@ -121,8 +126,9 @@ block0(v0: i64):
   return v2
 }
 
-; check:  rllg %r2, %r2, 17
-; nextln: br %r14
+; block0:
+;   rllg %r2, %r2, 17
+;   br %r14
 
 function %rotl_i32_reg(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
@@ -130,8 +136,9 @@ block0(v0: i32, v1: i32):
   return v2
 }
 
-; check:  rll %r2, %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   rll %r2, %r2, 0(%r3)
+;   br %r14
 
 function %rotl_i32_imm(i32) -> i32 {
 block0(v0: i32):
@@ -140,8 +147,9 @@ block0(v0: i32):
   return v2
 }
 
-; check:  rll %r2, %r2, 17
-; nextln: br %r14
+; block0:
+;   rll %r2, %r2, 17
+;   br %r14
 
 function %rotl_i16_reg(i16, i16) -> i16 {
 block0(v0: i16, v1: i16):
@@ -149,14 +157,15 @@ block0(v0: i16, v1: i16):
   return v2
 }
 
-; check:  llhr %r2, %r2
-; nextln: lcr %r4, %r3
-; nextln: nill %r3, 15
-; nextln: nill %r4, 15
-; nextln: sllk %r3, %r2, 0(%r3)
-; nextln: srlk %r2, %r2, 0(%r4)
-; nextln: ork %r2, %r3, %r2
-; nextln: br %r14
+; block0:
+;   llhr %r4, %r2
+;   lcr %r5, %r3
+;   nill %r3, 15
+;   nill %r5, 15
+;   sllk %r2, %r4, 0(%r3)
+;   srlk %r3, %r4, 0(%r5)
+;   or %r2, %r3
+;   br %r14
 
 function %rotl_i16_imm(i16) -> i16 {
 block0(v0: i16):
@@ -165,11 +174,12 @@ block0(v0: i16):
   return v2
 }
 
-; check:  llhr %r2, %r2
-; nextln: sllk %r3, %r2, 10
-; nextln: srlk %r2, %r2, 6
-; nextln: ork %r2, %r3, %r2
-; nextln: br %r14
+; block0:
+;   llhr %r5, %r2
+;   sllk %r3, %r5, 10
+;   srlk %r5, %r5, 6
+;   ork %r2, %r3, %r5
+;   br %r14
 
 function %rotl_i8_reg(i8, i8) -> i8 {
 block0(v0: i8, v1: i8):
@@ -177,14 +187,15 @@ block0(v0: i8, v1: i8):
   return v2
 }
 
-; check:  llcr %r2, %r2
-; nextln: lcr %r4, %r3
-; nextln: nill %r3, 7
-; nextln: nill %r4, 7
-; nextln: sllk %r3, %r2, 0(%r3)
-; nextln: srlk %r2, %r2, 0(%r4)
-; nextln: ork %r2, %r3, %r2
-; nextln: br %r14
+; block0:
+;   llcr %r4, %r2
+;   lcr %r5, %r3
+;   nill %r3, 7
+;   nill %r5, 7
+;   sllk %r2, %r4, 0(%r3)
+;   srlk %r3, %r4, 0(%r5)
+;   or %r2, %r3
+;   br %r14
 
 function %rotr_i8_imm(i8) -> i8 {
 block0(v0: i8):
@@ -193,15 +204,12 @@ block0(v0: i8):
   return v2
 }
 
-; check:  llcr %r2, %r2
-; nextln: sllk %r3, %r2, 3
-; nextln: srlk %r2, %r2, 5
-; nextln: ork %r2, %r3, %r2
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; USHR
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   llcr %r5, %r2
+;   sllk %r3, %r5, 3
+;   srlk %r5, %r5, 5
+;   ork %r2, %r3, %r5
+;   br %r14
 
 function %ushr_i64_reg(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -209,8 +217,9 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  srlg %r2, %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   srlg %r2, %r2, 0(%r3)
+;   br %r14
 
 function %ushr_i64_imm(i64) -> i64 {
 block0(v0: i64):
@@ -219,8 +228,9 @@ block0(v0: i64):
   return v2
 }
 
-; check:  srlg %r2, %r2, 17
-; nextln: br %r14
+; block0:
+;   srlg %r2, %r2, 17
+;   br %r14
 
 function %ushr_i32_reg(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
@@ -228,8 +238,10 @@ block0(v0: i32, v1: i32):
   return v2
 }
 
-; check:  srlk %r2, %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   nill %r3, 31
+;   srlk %r2, %r2, 0(%r3)
+;   br %r14
 
 function %ushr_i32_imm(i32) -> i32 {
 block0(v0: i32):
@@ -238,8 +250,9 @@ block0(v0: i32):
   return v2
 }
 
-; check:  srlk %r2, %r2, 17
-; nextln: br %r14
+; block0:
+;   srlk %r2, %r2, 17
+;   br %r14
 
 function %ushr_i16_reg(i16, i16) -> i16 {
 block0(v0: i16, v1: i16):
@@ -247,10 +260,11 @@ block0(v0: i16, v1: i16):
   return v2
 }
 
-; check:  llhr %r2, %r2
-; nextln: nill %r3, 15
-; nextln: srlk %r2, %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   llhr %r4, %r2
+;   nill %r3, 15
+;   srlk %r2, %r4, 0(%r3)
+;   br %r14
 
 function %ushr_i16_imm(i16) -> i16 {
 block0(v0: i16):
@@ -259,9 +273,10 @@ block0(v0: i16):
   return v2
 }
 
-; check:  llhr %r2, %r2
-; nextln: srlk %r2, %r2, 10
-; nextln: br %r14
+; block0:
+;   llhr %r5, %r2
+;   srlk %r2, %r5, 10
+;   br %r14
 
 function %ushr_i8_reg(i8, i8) -> i8 {
 block0(v0: i8, v1: i8):
@@ -269,10 +284,11 @@ block0(v0: i8, v1: i8):
   return v2
 }
 
-; check:  llcr %r2, %r2
-; nextln: nill %r3, 7
-; nextln: srlk %r2, %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   llcr %r4, %r2
+;   nill %r3, 7
+;   srlk %r2, %r4, 0(%r3)
+;   br %r14
 
 function %ushr_i8_imm(i8) -> i8 {
 block0(v0: i8):
@@ -281,13 +297,10 @@ block0(v0: i8):
   return v2
 }
 
-; check:  llcr %r2, %r2
-; nextln: srlk %r2, %r2, 3
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; ISHL
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   llcr %r5, %r2
+;   srlk %r2, %r5, 3
+;   br %r14
 
 function %ishl_i64_reg(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -295,8 +308,9 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  sllg %r2, %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   sllg %r2, %r2, 0(%r3)
+;   br %r14
 
 function %ishl_i64_imm(i64) -> i64 {
 block0(v0: i64):
@@ -305,8 +319,9 @@ block0(v0: i64):
   return v2
 }
 
-; check:  sllg %r2, %r2, 17
-; nextln: br %r14
+; block0:
+;   sllg %r2, %r2, 17
+;   br %r14
 
 function %ishl_i32_reg(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
@@ -314,8 +329,10 @@ block0(v0: i32, v1: i32):
   return v2
 }
 
-; check:  sllk %r2, %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   nill %r3, 31
+;   sllk %r2, %r2, 0(%r3)
+;   br %r14
 
 function %ishl_i32_imm(i32) -> i32 {
 block0(v0: i32):
@@ -324,8 +341,9 @@ block0(v0: i32):
   return v2
 }
 
-; check:  sllk %r2, %r2, 17
-; nextln: br %r14
+; block0:
+;   sllk %r2, %r2, 17
+;   br %r14
 
 function %ishl_i16_reg(i16, i16) -> i16 {
 block0(v0: i16, v1: i16):
@@ -333,9 +351,10 @@ block0(v0: i16, v1: i16):
   return v2
 }
 
-; check:  nill %r3, 15
-; nextln: sllk %r2, %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   nill %r3, 15
+;   sllk %r2, %r2, 0(%r3)
+;   br %r14
 
 function %ishl_i16_imm(i16) -> i16 {
 block0(v0: i16):
@@ -344,8 +363,9 @@ block0(v0: i16):
   return v2
 }
 
-; check:  sllk %r2, %r2, 10
-; nextln: br %r14
+; block0:
+;   sllk %r2, %r2, 10
+;   br %r14
 
 function %ishl_i8_reg(i8, i8) -> i8 {
 block0(v0: i8, v1: i8):
@@ -353,9 +373,10 @@ block0(v0: i8, v1: i8):
   return v2
 }
 
-; check:  nill %r3, 7
-; nextln: sllk %r2, %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   nill %r3, 7
+;   sllk %r2, %r2, 0(%r3)
+;   br %r14
 
 function %ishl_i8_imm(i8) -> i8 {
 block0(v0: i8):
@@ -364,12 +385,9 @@ block0(v0: i8):
   return v2
 }
 
-; check:  sllk %r2, %r2, 3
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; SSHR
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   sllk %r2, %r2, 3
+;   br %r14
 
 function %sshr_i64_reg(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@@ -377,8 +395,9 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  srag %r2, %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   srag %r2, %r2, 0(%r3)
+;   br %r14
 
 function %sshr_i64_imm(i64) -> i64 {
 block0(v0: i64):
@@ -387,8 +406,9 @@ block0(v0: i64):
   return v2
 }
 
-; check:  srag %r2, %r2, 17
-; nextln: br %r14
+; block0:
+;   srag %r2, %r2, 17
+;   br %r14
 
 function %sshr_i32_reg(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
@@ -396,8 +416,10 @@ block0(v0: i32, v1: i32):
   return v2
 }
 
-; check:  srak %r2, %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   nill %r3, 31
+;   srak %r2, %r2, 0(%r3)
+;   br %r14
 
 function %sshr_i32_imm(i32) -> i32 {
 block0(v0: i32):
@@ -406,8 +428,9 @@ block0(v0: i32):
   return v2
 }
 
-; check:  srak %r2, %r2, 17
-; nextln: br %r14
+; block0:
+;   srak %r2, %r2, 17
+;   br %r14
 
 function %sshr_i16_reg(i16, i16) -> i16 {
 block0(v0: i16, v1: i16):
@@ -415,10 +438,11 @@ block0(v0: i16, v1: i16):
   return v2
 }
 
-; check:  lhr %r2, %r2
-; nextln: nill %r3, 15
-; nextln: srak %r2, %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   lhr %r4, %r2
+;   nill %r3, 15
+;   srak %r2, %r4, 0(%r3)
+;   br %r14
 
 function %sshr_i16_imm(i16) -> i16 {
 block0(v0: i16):
@@ -427,9 +451,10 @@ block0(v0: i16):
   return v2
 }
 
-; check:  lhr %r2, %r2
-; nextln: srak %r2, %r2, 10
-; nextln: br %r14
+; block0:
+;   lhr %r5, %r2
+;   srak %r2, %r5, 10
+;   br %r14
 
 function %sshr_i8_reg(i8, i8) -> i8 {
 block0(v0: i8, v1: i8):
@@ -437,10 +462,11 @@ block0(v0: i8, v1: i8):
   return v2
 }
 
-; check:  lbr %r2, %r2
-; nextln: nill %r3, 7
-; nextln: srak %r2, %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   lbr %r4, %r2
+;   nill %r3, 7
+;   srak %r2, %r4, 0(%r3)
+;   br %r14
 
 function %sshr_i8_imm(i8) -> i8 {
 block0(v0: i8):
@@ -449,7 +475,8 @@ block0(v0: i8):
   return v2
 }
 
-; check:  lbr %r2, %r2
-; nextln: srak %r2, %r2, 3
-; nextln: br %r14
+; block0:
+;   lbr %r5, %r2
+;   srak %r2, %r5, 3
+;   br %r14
 
diff --git a/cranelift/filetests/filetests/isa/s390x/stack-limit.clif b/cranelift/filetests/filetests/isa/s390x/stack-limit.clif
index 0ef7320340..e2f802ab24 100644
--- a/cranelift/filetests/filetests/isa/s390x/stack-limit.clif
+++ b/cranelift/filetests/filetests/isa/s390x/stack-limit.clif
@@ -1,4 +1,4 @@
-test compile
+test compile precise-output
 target s390x
 
 function %foo() {
@@ -6,12 +6,16 @@ block0:
     return
 }
 
+; block0:
+;   br %r14
+
 function %stack_limit_leaf_zero(i64 stack_limit) {
 block0(v0: i64):
     return
 }
 
-; check:  br %r14
+; block0:
+;   br %r14
 
 function %stack_limit_gv_leaf_zero(i64 vmctx) {
     gv0 = vmctx
@@ -22,8 +26,8 @@ block0(v0: i64):
     return
 }
 
-; check:  br %r14
-
+; block0:
+;   br %r14
 
 function %stack_limit_call_zero(i64 stack_limit) {
     fn0 = %foo()
@@ -32,14 +36,15 @@ block0(v0: i64):
     return
 }
 
-; check:  clgrtle %r15, %r2
-; nextln: stmg %r14, %r15, 112(%r15)
-; nextln: aghi %r15, -160
-; nextln: virtual_sp_offset_adjust 160
-; nextln: bras %r1, 12 ; data %foo + 0 ; lg %r2, 0(%r1)
-; nextln: basr %r14, %r2
-; nextln: lmg %r14, %r15, 272(%r15)
-; nextln: br %r14
+;   clgrtle %r15, %r2
+;   stmg %r14, %r15, 112(%r15)
+;   aghi %r15, -160
+;   virtual_sp_offset_adjust 160
+; block0:
+;   bras %r1, 12 ; data %foo + 0 ; lg %r4, 0(%r1)
+;   basr %r14, %r4
+;   lmg %r14, %r15, 272(%r15)
+;   br %r14
 
 function %stack_limit_gv_call_zero(i64 vmctx) {
     gv0 = vmctx
@@ -52,16 +57,17 @@ block0(v0: i64):
     return
 }
 
-; check:  lg %r1, 0(%r2)
-; nextln: lg %r1, 4(%r1)
-; nextln: clgrtle %r15, %r1
-; nextln: stmg %r14, %r15, 112(%r15)
-; nextln: aghi %r15, -160
-; nextln: virtual_sp_offset_adjust 160
-; nextln: bras %r1, 12 ; data %foo + 0 ; lg %r2, 0(%r1)
-; nextln: basr %r14, %r2
-; nextln: lmg %r14, %r15, 272(%r15)
-; nextln: br %r14
+;   lg %r1, 0(%r2)
+;   lg %r1, 4(%r1)
+;   clgrtle %r15, %r1
+;   stmg %r14, %r15, 112(%r15)
+;   aghi %r15, -160
+;   virtual_sp_offset_adjust 160
+; block0:
+;   bras %r1, 12 ; data %foo + 0 ; lg %r4, 0(%r1)
+;   basr %r14, %r4
+;   lmg %r14, %r15, 272(%r15)
+;   br %r14
 
 function %stack_limit(i64 stack_limit) {
     ss0 = explicit_slot 168
@@ -69,11 +75,12 @@ block0(v0: i64):
     return
 }
 
-; check: la %r1, 168(%r2)
-; nextln: clgrtle %r15, %r1
-; nextln: aghi %r15, -168
-; nextln: aghi %r15, 168
-; nextln: br %r14
+;   la %r1, 168(%r2)
+;   clgrtle %r15, %r1
+;   aghi %r15, -168
+; block0:
+;   aghi %r15, 168
+;   br %r14
 
 function %large_stack_limit(i64 stack_limit) {
     ss0 = explicit_slot 400000
@@ -81,12 +88,13 @@ block0(v0: i64):
     return
 }
 
-; check:  clgrtle %r15, %r2
-; nextln: lay %r1, 400000(%r2)
-; nextln: clgrtle %r15, %r1
-; nextln: agfi %r15, -400000
-; nextln: agfi %r15, 400000
-; nextln: br %r14
+;   clgrtle %r15, %r2
+;   lay %r1, 400000(%r2)
+;   clgrtle %r15, %r1
+;   agfi %r15, -400000
+; block0:
+;   agfi %r15, 400000
+;   br %r14
 
 function %huge_stack_limit(i64 stack_limit) {
     ss0 = explicit_slot 4000000
@@ -94,13 +102,14 @@ block0(v0: i64):
     return
 }
 
-; check:  clgrtle %r15, %r2
-; nextln: lgr %r1, %r2
-; nextln: algfi %r1, 4000000
-; nextln: clgrtle %r15, %r1
-; nextln: agfi %r15, -4000000
-; nextln: agfi %r15, 4000000
-; nextln: br %r14
+;   clgrtle %r15, %r2
+;   lgr %r1, %r2
+;   algfi %r1, 4000000
+;   clgrtle %r15, %r1
+;   agfi %r15, -4000000
+; block0:
+;   agfi %r15, 4000000
+;   br %r14
 
 function %limit_preamble(i64 vmctx) {
     gv0 = vmctx
@@ -112,13 +121,14 @@ block0(v0: i64):
     return
 }
 
-; check:  lg %r1, 0(%r2)
-; nextln: lg %r1, 4(%r1)
-; nextln: la %r1, 24(%r1)
-; nextln: clgrtle %r15, %r1
-; nextln: aghi %r15, -24
-; nextln: aghi %r15, 24
-; nextln: br %r14
+;   lg %r1, 0(%r2)
+;   lg %r1, 4(%r1)
+;   la %r1, 24(%r1)
+;   clgrtle %r15, %r1
+;   aghi %r15, -24
+; block0:
+;   aghi %r15, 24
+;   br %r14
 
 function %limit_preamble_large(i64 vmctx) {
     gv0 = vmctx
@@ -130,14 +140,15 @@ block0(v0: i64):
     return
 }
 
-; check:  lg %r1, 0(%r2)
-; nextln: lg %r1, 4(%r1)
-; nextln: clgrtle %r15, %r1
-; nextln: lay %r1, 400000(%r1)
-; nextln: clgrtle %r15, %r1
-; nextln: agfi %r15, -400000
-; nextln: agfi %r15, 400000
-; nextln: br %r14
+;   lg %r1, 0(%r2)
+;   lg %r1, 4(%r1)
+;   clgrtle %r15, %r1
+;   lay %r1, 400000(%r1)
+;   clgrtle %r15, %r1
+;   agfi %r15, -400000
+; block0:
+;   agfi %r15, 400000
+;   br %r14
 
 function %limit_preamble_huge(i64 vmctx) {
     gv0 = vmctx
@@ -149,14 +160,15 @@ block0(v0: i64):
     return
 }
 
-; check:  lg %r1, 0(%r2)
-; nextln: lg %r1, 4(%r1)
-; nextln: clgrtle %r15, %r1
-; nextln: algfi %r1, 4000000
-; nextln: clgrtle %r15, %r1
-; nextln: agfi %r15, -4000000
-; nextln: agfi %r15, 4000000
-; nextln: br %r14
+;   lg %r1, 0(%r2)
+;   lg %r1, 4(%r1)
+;   clgrtle %r15, %r1
+;   algfi %r1, 4000000
+;   clgrtle %r15, %r1
+;   agfi %r15, -4000000
+; block0:
+;   agfi %r15, 4000000
+;   br %r14
 
 function %limit_preamble_huge_offset(i64 vmctx) {
     gv0 = vmctx
@@ -167,9 +179,11 @@ block0(v0: i64):
     return
 }
 
-; check:  lgfi %r1, 1000000 ; lg %r1, 0(%r1,%r2)
-; nextln: la %r1, 24(%r1)
-; nextln: clgrtle %r15, %r1
-; nextln: aghi %r15, -24
-; nextln: aghi %r15, 24
-; nextln: br %r14
+;   lgfi %r1, 1000000 ; lg %r1, 0(%r1,%r2)
+;   la %r1, 24(%r1)
+;   clgrtle %r15, %r1
+;   aghi %r15, -24
+; block0:
+;   aghi %r15, 24
+;   br %r14
+
diff --git a/cranelift/filetests/filetests/isa/s390x/stack.clif b/cranelift/filetests/filetests/isa/s390x/stack.clif
index 1ac80b9fd0..d75edd6f88 100644
--- a/cranelift/filetests/filetests/isa/s390x/stack.clif
+++ b/cranelift/filetests/filetests/isa/s390x/stack.clif
@@ -1,4 +1,4 @@
-test compile
+test compile precise-output
 target s390x
 
 ; FIXME: Should allocate register save area.
@@ -11,10 +11,11 @@ block0:
   return v0
 }
 
-; check:  aghi %r15, -8
-; nextln: la %r2, 0(%r15)
-; nextln: aghi %r15, 8
-; nextln: br %r14
+;   aghi %r15, -8
+; block0:
+;   la %r2, 0(%r15)
+;   aghi %r15, 8
+;   br %r14
 
 function %stack_addr_big() -> i64 {
 ss0 = explicit_slot 100000
@@ -25,12 +26,11 @@ block0:
   return v0
 }
 
-; check:  agfi %r15, -100008
-; nextln: la %r2, 0(%r15)
-; nextln: agfi %r15, 100008
-; nextln: br %r14
-
-; FIXME: don't use stack_addr legalization for stack_load and stack_store
+;   agfi %r15, -100008
+; block0:
+;   la %r2, 0(%r15)
+;   agfi %r15, 100008
+;   br %r14
 
 function %stack_load_small() -> i64 {
 ss0 = explicit_slot 8
@@ -40,11 +40,12 @@ block0:
   return v0
 }
 
-; check:  aghi %r15, -8
-; nextln: la %r2, 0(%r15)
-; nextln: lg %r2, 0(%r2)
-; nextln: aghi %r15, 8
-; nextln: br %r14
+;   aghi %r15, -8
+; block0:
+;   la %r4, 0(%r15)
+;   lg %r2, 0(%r4)
+;   aghi %r15, 8
+;   br %r14
 
 function %stack_load_big() -> i64 {
 ss0 = explicit_slot 100000
@@ -55,12 +56,12 @@ block0:
   return v0
 }
 
-; check:  agfi %r15, -100008
-; nextln: la %r2, 0(%r15)
-; nextln: lg %r2, 0(%r2)
-; nextln: agfi %r15, 100008
-; nextln: br %r14
-
+;   agfi %r15, -100008
+; block0:
+;   la %r4, 0(%r15)
+;   lg %r2, 0(%r4)
+;   agfi %r15, 100008
+;   br %r14
 
 function %stack_store_small(i64) {
 ss0 = explicit_slot 8
@@ -70,11 +71,12 @@ block0(v0: i64):
   return
 }
 
-; check:  aghi %r15, -8
-; nextln: la %r3, 0(%r15)
-; nextln: stg %r2, 0(%r3)
-; nextln: aghi %r15, 8
-; nextln: br %r14
+;   aghi %r15, -8
+; block0:
+;   la %r4, 0(%r15)
+;   stg %r2, 0(%r4)
+;   aghi %r15, 8
+;   br %r14
 
 function %stack_store_big(i64) {
 ss0 = explicit_slot 100000
@@ -85,9 +87,10 @@ block0(v0: i64):
   return
 }
 
-; check:  agfi %r15, -100008
-; nextln: la %r3, 0(%r15)
-; nextln: stg %r2, 0(%r3)
-; nextln: agfi %r15, 100008
-; nextln: br %r14
+;   agfi %r15, -100008
+; block0:
+;   la %r4, 0(%r15)
+;   stg %r2, 0(%r4)
+;   agfi %r15, 100008
+;   br %r14
 
diff --git a/cranelift/filetests/filetests/isa/s390x/store-little.clif b/cranelift/filetests/filetests/isa/s390x/store-little.clif
index 65c9ffab93..79b172ff72 100644
--- a/cranelift/filetests/filetests/isa/s390x/store-little.clif
+++ b/cranelift/filetests/filetests/isa/s390x/store-little.clif
@@ -1,4 +1,4 @@
-test compile
+test compile precise-output
 target s390x
 
 function %store_i64(i64, i64) {
@@ -7,8 +7,9 @@ block0(v0: i64, v1: i64):
   return
 }
 
-; check:  strvg %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   strvg %r2, 0(%r3)
+;   br %r14
 
 function %store_i64_sym(i64) {
   gv0 = symbol colocated %sym
@@ -18,8 +19,9 @@ block0(v0: i64):
   return
 }
 
-; check:  larl %r1, %sym + 0 ; strvg %r2, 0(%r1)
-; nextln: br %r14
+; block0:
+;   larl %r1, %sym + 0 ; strvg %r2, 0(%r1)
+;   br %r14
 
 function %store_imm_i64(i64) {
 block0(v0: i64):
@@ -28,9 +30,10 @@ block0(v0: i64):
   return
 }
 
-; check:  lghi %r3, 12345
-; nextln: strvg %r3, 0(%r2)
-; nextln: br %r14
+; block0:
+;   lghi %r4, 12345
+;   strvg %r4, 0(%r2)
+;   br %r14
 
 function %istore8_i64(i64, i64) {
 block0(v0: i64, v1: i64):
@@ -38,8 +41,9 @@ block0(v0: i64, v1: i64):
   return
 }
 
-; check:  stc %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   stc %r2, 0(%r3)
+;   br %r14
 
 function %istore8_imm_i64(i64) {
 block0(v0: i64):
@@ -48,8 +52,9 @@ block0(v0: i64):
   return
 }
 
-; check:  mvi 0(%r2), 123
-; nextln: br %r14
+; block0:
+;   mvi 0(%r2), 123
+;   br %r14
 
 function %istore16_i64(i64, i64) {
 block0(v0: i64, v1: i64):
@@ -57,8 +62,9 @@ block0(v0: i64, v1: i64):
   return
 }
 
-; check:  strvh %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   strvh %r2, 0(%r3)
+;   br %r14
 
 function %istore16_i64_sym(i64) {
   gv0 = symbol colocated %sym
@@ -68,8 +74,9 @@ block0(v0: i64):
   return
 }
 
-; check:  larl %r1, %sym + 0 ; strvh %r2, 0(%r1)
-; nextln: br %r14
+; block0:
+;   larl %r1, %sym + 0 ; strvh %r2, 0(%r1)
+;   br %r14
 
 function %istore16_imm_i64(i64) {
 block0(v0: i64):
@@ -78,8 +85,9 @@ block0(v0: i64):
   return
 }
 
-; check:  mvhhi 0(%r2), 14640
-; nextln: br %r14
+; block0:
+;   mvhhi 0(%r2), 14640
+;   br %r14
 
 function %istore32_i64(i64, i64) {
 block0(v0: i64, v1: i64):
@@ -87,8 +95,9 @@ block0(v0: i64, v1: i64):
   return
 }
 
-; check:  strv %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   strv %r2, 0(%r3)
+;   br %r14
 
 function %istore32_i64_sym(i64) {
   gv0 = symbol colocated %sym
@@ -98,8 +107,9 @@ block0(v0: i64):
   return
 }
 
-; check:  larl %r1, %sym + 0 ; strv %r2, 0(%r1)
-; nextln: br %r14
+; block0:
+;   larl %r1, %sym + 0 ; strv %r2, 0(%r1)
+;   br %r14
 
 function %istore32_imm_i64(i64) {
 block0(v0: i64):
@@ -108,9 +118,10 @@ block0(v0: i64):
   return
 }
 
-; check:  lghi %r3, 12345
-; nextln: strv %r3, 0(%r2)
-; nextln: br %r14
+; block0:
+;   lghi %r4, 12345
+;   strv %r4, 0(%r2)
+;   br %r14
 
 function %store_i32(i32, i64) {
 block0(v0: i32, v1: i64):
@@ -118,8 +129,9 @@ block0(v0: i32, v1: i64):
   return
 }
 
-; check:  strv %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   strv %r2, 0(%r3)
+;   br %r14
 
 function %store_i32_sym(i32) {
   gv0 = symbol colocated %sym
@@ -129,8 +141,9 @@ block0(v0: i32):
   return
 }
 
-; check:  larl %r1, %sym + 0 ; strv %r2, 0(%r1)
-; nextln: br %r14
+; block0:
+;   larl %r1, %sym + 0 ; strv %r2, 0(%r1)
+;   br %r14
 
 function %store_imm_i32(i64) {
 block0(v0: i64):
@@ -139,9 +152,10 @@ block0(v0: i64):
   return
 }
 
-; check:  lhi %r3, 12345
-; nextln: strv %r3, 0(%r2)
-; nextln: br %r14
+; block0:
+;   lhi %r4, 12345
+;   strv %r4, 0(%r2)
+;   br %r14
 
 function %istore8_i32(i32, i64) {
 block0(v0: i32, v1: i64):
@@ -149,8 +163,9 @@ block0(v0: i32, v1: i64):
   return
 }
 
-; check:  stc %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   stc %r2, 0(%r3)
+;   br %r14
 
 function %istore8_imm_i32(i64) {
 block0(v0: i64):
@@ -159,8 +174,9 @@ block0(v0: i64):
   return
 }
 
-; check:  mvi 0(%r2), 123
-; nextln: br %r14
+; block0:
+;   mvi 0(%r2), 123
+;   br %r14
 
 function %istore16_i32(i32, i64) {
 block0(v0: i32, v1: i64):
@@ -168,8 +184,9 @@ block0(v0: i32, v1: i64):
   return
 }
 
-; check:  strvh %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   strvh %r2, 0(%r3)
+;   br %r14
 
 function %istore16_i32_sym(i32) {
   gv0 = symbol colocated %sym
@@ -179,8 +196,9 @@ block0(v0: i32):
   return
 }
 
-; check:  larl %r1, %sym + 0 ; strvh %r2, 0(%r1)
-; nextln: br %r14
+; block0:
+;   larl %r1, %sym + 0 ; strvh %r2, 0(%r1)
+;   br %r14
 
 function %istore16_imm_i32(i64) {
 block0(v0: i64):
@@ -189,8 +207,9 @@ block0(v0: i64):
   return
 }
 
-; check:  mvhhi 0(%r2), 14640
-; nextln: br %r14
+; block0:
+;   mvhhi 0(%r2), 14640
+;   br %r14
 
 function %store_i16(i16, i64) {
 block0(v0: i16, v1: i64):
@@ -198,8 +217,9 @@ block0(v0: i16, v1: i64):
   return
 }
 
-; check:  strvh %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   strvh %r2, 0(%r3)
+;   br %r14
 
 function %store_i16_sym(i16) {
   gv0 = symbol colocated %sym
@@ -209,8 +229,9 @@ block0(v0: i16):
   return
 }
 
-; check:  larl %r1, %sym + 0 ; strvh %r2, 0(%r1)
-; nextln: br %r14
+; block0:
+;   larl %r1, %sym + 0 ; strvh %r2, 0(%r1)
+;   br %r14
 
 function %store_imm_i16(i64) {
 block0(v0: i64):
@@ -219,8 +240,9 @@ block0(v0: i64):
   return
 }
 
-; check:  mvhhi 0(%r2), 14640
-; nextln: br %r14
+; block0:
+;   mvhhi 0(%r2), 14640
+;   br %r14
 
 function %istore8_i16(i16, i64) {
 block0(v0: i16, v1: i64):
@@ -228,8 +250,9 @@ block0(v0: i16, v1: i64):
   return
 }
 
-; check:  stc %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   stc %r2, 0(%r3)
+;   br %r14
 
 function %istore8_imm_i16(i64) {
 block0(v0: i64):
@@ -238,8 +261,9 @@ block0(v0: i64):
   return
 }
 
-; check:  mvi 0(%r2), 123
-; nextln: br %r14
+; block0:
+;   mvi 0(%r2), 123
+;   br %r14
 
 function %store_i8(i8, i64) {
 block0(v0: i8, v1: i64):
@@ -247,8 +271,9 @@ block0(v0: i8, v1: i64):
   return
 }
 
-; check:  stc %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   stc %r2, 0(%r3)
+;   br %r14
 
 function %store_i8_off(i8, i64) {
 block0(v0: i8, v1: i64):
@@ -256,8 +281,9 @@ block0(v0: i8, v1: i64):
   return
 }
 
-; check:  stcy %r2, 4096(%r3)
-; nextln: br %r14
+; block0:
+;   stcy %r2, 4096(%r3)
+;   br %r14
 
 function %store_imm_i8(i64) {
 block0(v0: i64):
@@ -266,8 +292,9 @@ block0(v0: i64):
   return
 }
 
-; check:  mvi 0(%r2), 123
-; nextln: br %r14
+; block0:
+;   mvi 0(%r2), 123
+;   br %r14
 
 function %store_imm_i8_off(i64) {
 block0(v0: i64):
@@ -276,6 +303,7 @@ block0(v0: i64):
   return
 }
 
-; check:  mviy 4096(%r2), 123
-; nextln: br %r14
+; block0:
+;   mviy 4096(%r2), 123
+;   br %r14
 
diff --git a/cranelift/filetests/filetests/isa/s390x/store.clif b/cranelift/filetests/filetests/isa/s390x/store.clif
index c7f896ae29..b0cea254e4 100644
--- a/cranelift/filetests/filetests/isa/s390x/store.clif
+++ b/cranelift/filetests/filetests/isa/s390x/store.clif
@@ -1,4 +1,4 @@
-test compile
+test compile precise-output
 target s390x
 
 function %store_i64(i64, i64) {
@@ -7,8 +7,9 @@ block0(v0: i64, v1: i64):
   return
 }
 
-; check:  stg %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   stg %r2, 0(%r3)
+;   br %r14
 
 function %store_i64_sym(i64) {
   gv0 = symbol colocated %sym
@@ -18,8 +19,9 @@ block0(v0: i64):
   return
 }
 
-; check:  stgrl %r2, %sym + 0
-; nextln: br %r14
+; block0:
+;   stgrl %r2, %sym + 0
+;   br %r14
 
 function %store_imm_i64(i64) {
 block0(v0: i64):
@@ -28,8 +30,9 @@ block0(v0: i64):
   return
 }
 
-; check:  mvghi 0(%r2), 12345
-; nextln: br %r14
+; block0:
+;   mvghi 0(%r2), 12345
+;   br %r14
 
 function %istore8_i64(i64, i64) {
 block0(v0: i64, v1: i64):
@@ -37,8 +40,9 @@ block0(v0: i64, v1: i64):
   return
 }
 
-; check:  stc %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   stc %r2, 0(%r3)
+;   br %r14
 
 function %istore8_imm_i64(i64) {
 block0(v0: i64):
@@ -47,8 +51,9 @@ block0(v0: i64):
   return
 }
 
-; check:  mvi 0(%r2), 123
-; nextln: br %r14
+; block0:
+;   mvi 0(%r2), 123
+;   br %r14
 
 function %istore16_i64(i64, i64) {
 block0(v0: i64, v1: i64):
@@ -56,8 +61,9 @@ block0(v0: i64, v1: i64):
   return
 }
 
-; check:  sth %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   sth %r2, 0(%r3)
+;   br %r14
 
 function %istore16_i64_sym(i64) {
   gv0 = symbol colocated %sym
@@ -67,8 +73,9 @@ block0(v0: i64):
   return
 }
 
-; check:  sthrl %r2, %sym + 0
-; nextln: br %r14
+; block0:
+;   sthrl %r2, %sym + 0
+;   br %r14
 
 function %istore16_imm_i64(i64) {
 block0(v0: i64):
@@ -77,8 +84,9 @@ block0(v0: i64):
   return
 }
 
-; check:  mvhhi 0(%r2), 12345
-; nextln: br %r14
+; block0:
+;   mvhhi 0(%r2), 12345
+;   br %r14
 
 function %istore32_i64(i64, i64) {
 block0(v0: i64, v1: i64):
@@ -86,8 +94,9 @@ block0(v0: i64, v1: i64):
   return
 }
 
-; check:  st %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   st %r2, 0(%r3)
+;   br %r14
 
 function %istore32_i64_sym(i64) {
   gv0 = symbol colocated %sym
@@ -97,8 +106,9 @@ block0(v0: i64):
   return
 }
 
-; check:  strl %r2, %sym + 0
-; nextln: br %r14
+; block0:
+;   strl %r2, %sym + 0
+;   br %r14
 
 function %istore32_imm_i64(i64) {
 block0(v0: i64):
@@ -107,8 +117,9 @@ block0(v0: i64):
   return
 }
 
-; check:  mvhi 0(%r2), 12345
-; nextln: br %r14
+; block0:
+;   mvhi 0(%r2), 12345
+;   br %r14
 
 function %store_i32(i32, i64) {
 block0(v0: i32, v1: i64):
@@ -116,8 +127,9 @@ block0(v0: i32, v1: i64):
   return
 }
 
-; check:  st %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   st %r2, 0(%r3)
+;   br %r14
 
 function %store_i32_sym(i32) {
   gv0 = symbol colocated %sym
@@ -127,8 +139,9 @@ block0(v0: i32):
   return
 }
 
-; check:  strl %r2, %sym + 0
-; nextln: br %r14
+; block0:
+;   strl %r2, %sym + 0
+;   br %r14
 
 function %store_i32_off(i32, i64) {
 block0(v0: i32, v1: i64):
@@ -136,8 +149,9 @@ block0(v0: i32, v1: i64):
   return
 }
 
-; check:  sty %r2, 4096(%r3)
-; nextln: br %r14
+; block0:
+;   sty %r2, 4096(%r3)
+;   br %r14
 
 function %store_imm_i32(i64) {
 block0(v0: i64):
@@ -146,8 +160,9 @@ block0(v0: i64):
   return
 }
 
-; check:  mvhi 0(%r2), 12345
-; nextln: br %r14
+; block0:
+;   mvhi 0(%r2), 12345
+;   br %r14
 
 function %istore8_i32(i32, i64) {
 block0(v0: i32, v1: i64):
@@ -155,8 +170,9 @@ block0(v0: i32, v1: i64):
   return
 }
 
-; check:  stc %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   stc %r2, 0(%r3)
+;   br %r14
 
 function %istore8_imm_i32(i64) {
 block0(v0: i64):
@@ -165,8 +181,9 @@ block0(v0: i64):
   return
 }
 
-; check:  mvi 0(%r2), 123
-; nextln: br %r14
+; block0:
+;   mvi 0(%r2), 123
+;   br %r14
 
 function %istore16_i32(i32, i64) {
 block0(v0: i32, v1: i64):
@@ -174,8 +191,9 @@ block0(v0: i32, v1: i64):
   return
 }
 
-; check:  sth %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   sth %r2, 0(%r3)
+;   br %r14
 
 function %istore16_i32_sym(i32) {
   gv0 = symbol colocated %sym
@@ -185,8 +203,9 @@ block0(v0: i32):
   return
 }
 
-; check:  sthrl %r2, %sym + 0
-; nextln: br %r14
+; block0:
+;   sthrl %r2, %sym + 0
+;   br %r14
 
 function %istore16_imm_i32(i64) {
 block0(v0: i64):
@@ -195,8 +214,9 @@ block0(v0: i64):
   return
 }
 
-; check:  mvhhi 0(%r2), 12345
-; nextln: br %r14
+; block0:
+;   mvhhi 0(%r2), 12345
+;   br %r14
 
 function %store_i16(i16, i64) {
 block0(v0: i16, v1: i64):
@@ -204,8 +224,9 @@ block0(v0: i16, v1: i64):
   return
 }
 
-; check:  sth %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   sth %r2, 0(%r3)
+;   br %r14
 
 function %store_i16_sym(i16) {
   gv0 = symbol colocated %sym
@@ -215,8 +236,9 @@ block0(v0: i16):
   return
 }
 
-; check:  sthrl %r2, %sym + 0
-; nextln: br %r14
+; block0:
+;   sthrl %r2, %sym + 0
+;   br %r14
 
 function %store_i16_off(i16, i64) {
 block0(v0: i16, v1: i64):
@@ -224,8 +246,9 @@ block0(v0: i16, v1: i64):
   return
 }
 
-; check:  sthy %r2, 4096(%r3)
-; nextln: br %r14
+; block0:
+;   sthy %r2, 4096(%r3)
+;   br %r14
 
 function %store_imm_i16(i64) {
 block0(v0: i64):
@@ -234,8 +257,9 @@ block0(v0: i64):
   return
 }
 
-; check:  mvhhi 0(%r2), 12345
-; nextln: br %r14
+; block0:
+;   mvhhi 0(%r2), 12345
+;   br %r14
 
 function %istore8_i16(i16, i64) {
 block0(v0: i16, v1: i64):
@@ -243,8 +267,9 @@ block0(v0: i16, v1: i64):
   return
 }
 
-; check:  stc %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   stc %r2, 0(%r3)
+;   br %r14
 
 function %istore8_imm_i16(i64) {
 block0(v0: i64):
@@ -253,8 +278,9 @@ block0(v0: i64):
   return
 }
 
-; check:  mvi 0(%r2), 123
-; nextln: br %r14
+; block0:
+;   mvi 0(%r2), 123
+;   br %r14
 
 function %store_i8(i8, i64) {
 block0(v0: i8, v1: i64):
@@ -262,8 +288,9 @@ block0(v0: i8, v1: i64):
   return
 }
 
-; check:  stc %r2, 0(%r3)
-; nextln: br %r14
+; block0:
+;   stc %r2, 0(%r3)
+;   br %r14
 
 function %store_i8_off(i8, i64) {
 block0(v0: i8, v1: i64):
@@ -271,8 +298,9 @@ block0(v0: i8, v1: i64):
   return
 }
 
-; check:  stcy %r2, 4096(%r3)
-; nextln: br %r14
+; block0:
+;   stcy %r2, 4096(%r3)
+;   br %r14
 
 function %store_imm_i8(i64) {
 block0(v0: i64):
@@ -281,8 +309,9 @@ block0(v0: i64):
   return
 }
 
-; check:  mvi 0(%r2), 123
-; nextln: br %r14
+; block0:
+;   mvi 0(%r2), 123
+;   br %r14
 
 function %store_imm_i8_off(i64) {
 block0(v0: i64):
@@ -291,6 +320,7 @@ block0(v0: i64):
   return
 }
 
-; check:  mviy 4096(%r2), 123
-; nextln: br %r14
+; block0:
+;   mviy 4096(%r2), 123
+;   br %r14
 
diff --git a/cranelift/filetests/filetests/isa/s390x/symbols.clif b/cranelift/filetests/filetests/isa/s390x/symbols.clif
index 98d0cc8e30..c995ea94eb 100644
--- a/cranelift/filetests/filetests/isa/s390x/symbols.clif
+++ b/cranelift/filetests/filetests/isa/s390x/symbols.clif
@@ -1,4 +1,4 @@
-test compile
+test compile precise-output
 target s390x
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -13,8 +13,9 @@ block0:
   return v0
 }
 
-; check:  bras %r1, 12 ; data %my_global + 0 ; lg %r2, 0(%r1)
-; nextln: br %r14
+; block0:
+;   bras %r1, 12 ; data %my_global + 0 ; lg %r2, 0(%r1)
+;   br %r14
 
 function %symbol_value_colocated() -> i64 {
   gv0 = symbol colocated %my_global_colo
@@ -24,12 +25,9 @@ block0:
   return v0
 }
 
-; check:  larl %r2, %my_global_colo + 0
-; nextln: br %r14
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; FUNC_ADDR
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   larl %r2, %my_global_colo + 0
+;   br %r14
 
 function %func_addr() -> i64 {
     fn0 = %my_func(i64) -> i64
@@ -39,8 +37,9 @@ block0:
     return v0
 }
 
-; check:  bras %r1, 12 ; data %my_func + 0 ; lg %r2, 0(%r1)
-; nextln: br %r14
+; block0:
+;   bras %r1, 12 ; data %my_func + 0 ; lg %r2, 0(%r1)
+;   br %r14
 
 function %func_addr_colocated() -> i64 {
     fn0 = colocated %my_func_colo(i64) -> i64
@@ -50,5 +49,7 @@ block0:
     return v0
 }
 
-; check:  larl %r2, %my_func_colo + 0
-; nextln: br %r14
+; block0:
+;   larl %r2, %my_func_colo + 0
+;   br %r14
+
diff --git a/cranelift/filetests/filetests/isa/s390x/traps.clif b/cranelift/filetests/filetests/isa/s390x/traps.clif
index a6b70cecea..d6a191b3ea 100644
--- a/cranelift/filetests/filetests/isa/s390x/traps.clif
+++ b/cranelift/filetests/filetests/isa/s390x/traps.clif
@@ -1,4 +1,4 @@
-test compile
+test compile precise-output
 target s390x
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -10,18 +10,16 @@ block0:
   trap user0
 }
 
-; check: trap
+; block0:
+;   trap
 
 function %resumable_trap() {
 block0:
   trap user0
 }
 
-; check: trap
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; TRAPZ
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   trap
 
 function %trapz(i64) {
 block0(v0: i64):
@@ -31,18 +29,13 @@ block0(v0: i64):
   return
 }
 
-; FIXME: Does not use TrapIf internally as trapz is expanded.
-; check: Block 0
-; check:  clgfi %r2, 42
-; nextln: jge label1 ; jg label2
-; check: Block 1:
-; check:  br %r14
-; check: Block 2:
-; check:  trap
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; TRAPNZ/RESUMABLE_TRAPNZ
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   clgfi %r2, 42
+;   jge label1 ; jg label2
+; block1:
+;   br %r14
+; block2:
+;   trap
 
 function %trapnz(i64) {
 block0(v0: i64):
@@ -52,14 +45,13 @@ block0(v0: i64):
   return
 }
 
-; FIXME: Does not use TrapIf internally as trapnz is expanded.
-; check: Block 0
-; check:  clgfi %r2, 42
-; nextln: jgne label1 ; jg label2
-; check: Block 1:
-; check:  br %r14
-; check: Block 2:
-; check:  trap
+; block0:
+;   clgfi %r2, 42
+;   jgne label1 ; jg label2
+; block1:
+;   br %r14
+; block2:
+;   trap
 
 function %resumable_trapnz(i64) {
 block0(v0: i64):
@@ -69,18 +61,13 @@ block0(v0: i64):
   return
 }
 
-; FIXME: Does not use TrapIf internally as resumable_trapnz is expanded.
-; check: Block 0
-; check:  clgfi %r2, 42
-; nextln: jgne label1 ; jg label2
-; check: Block 1:
-; check:  br %r14
-; check: Block 2:
-; check:  trap
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; DEBUGTRAP
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; block0:
+;   clgfi %r2, 42
+;   jgne label1 ; jg label2
+; block1:
+;   br %r14
+; block2:
+;   trap
 
 function %h() {
 block0:
@@ -88,4 +75,7 @@ block0:
   return
 }
 
-; check: debugtrap
+; block0:
+;   debugtrap
+;   br %r14
+
diff --git a/cranelift/filetests/filetests/isa/x64/amode-opt.clif b/cranelift/filetests/filetests/isa/x64/amode-opt.clif
index a8aa7966ac..eca1ac1499 100644
--- a/cranelift/filetests/filetests/isa/x64/amode-opt.clif
+++ b/cranelift/filetests/filetests/isa/x64/amode-opt.clif
@@ -8,19 +8,13 @@ block0(v0: i64, v1: i64):
     return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 7)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   movq    0(%rdi,%rsi,1), %rsi
-;   Inst 3:   movq    %rsi, %rax
-;   Inst 4:   movq    %rbp, %rsp
-;   Inst 5:   popq    %rbp
-;   Inst 6:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    0(%rdi,%rsi,1), %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %amode_add_imm(i64) -> i64 {
 block0(v0: i64):
@@ -30,19 +24,13 @@ block0(v0: i64):
     return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 7)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   movq    42(%rdi), %rsi
-;   Inst 3:   movq    %rsi, %rax
-;   Inst 4:   movq    %rbp, %rsp
-;   Inst 5:   popq    %rbp
-;   Inst 6:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    42(%rdi), %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %amode_add_imm_order(i64) -> i64 {
 block0(v0: i64):
@@ -52,19 +40,13 @@ block0(v0: i64):
     return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 7)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   movq    42(%rdi), %rsi
-;   Inst 3:   movq    %rsi, %rax
-;   Inst 4:   movq    %rbp, %rsp
-;   Inst 5:   popq    %rbp
-;   Inst 6:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    42(%rdi), %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %amode_add_uext_imm(i64) -> i64 {
 block0(v0: i64):
@@ -75,17 +57,11 @@ block0(v0: i64):
     return v4
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 7)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   movq    42(%rdi), %rsi
-;   Inst 3:   movq    %rsi, %rax
-;   Inst 4:   movq    %rbp, %rsp
-;   Inst 5:   popq    %rbp
-;   Inst 6:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    42(%rdi), %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
diff --git a/cranelift/filetests/filetests/isa/x64/b1.clif b/cranelift/filetests/filetests/isa/x64/b1.clif
index 9d69d70e1c..eb971b36fa 100644
--- a/cranelift/filetests/filetests/isa/x64/b1.clif
+++ b/cranelift/filetests/filetests/isa/x64/b1.clif
@@ -7,21 +7,15 @@ block0(v0: b1, v1: i32, v2: i32):
     return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 9)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   testb   $1, %dil
-;   Inst 3:   movl    %edx, %edi
-;   Inst 4:   cmovnzl %esi, %edi
-;   Inst 5:   movq    %rdi, %rax
-;   Inst 6:   movq    %rbp, %rsp
-;   Inst 7:   popq    %rbp
-;   Inst 8:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   testb   $1, %dil
+;   cmovnzl %esi, %edx, %edx
+;   movq    %rdx, %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %f1(b1) -> i32 {
 block0(v0: b1):
@@ -35,32 +29,21 @@ block2:
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (successor: Block 1)
-;   (successor: Block 2)
-;   (instruction range: 0 .. 4)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   testb   $1, %dil
-;   Inst 3:   jnz     label1; j label2
-; Block 1:
-;   (original IR block: block1)
-;   (instruction range: 4 .. 8)
-;   Inst 4:   movl    $1, %eax
-;   Inst 5:   movq    %rbp, %rsp
-;   Inst 6:   popq    %rbp
-;   Inst 7:   ret
-; Block 2:
-;   (original IR block: block2)
-;   (instruction range: 8 .. 12)
-;   Inst 8:   movl    $2, %eax
-;   Inst 9:   movq    %rbp, %rsp
-;   Inst 10:   popq    %rbp
-;   Inst 11:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   testb   $1, %dil
+;   jnz     label1; j label2
+; block1:
+;   movl    $1, %eax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+; block2:
+;   movl    $2, %eax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %f2(b1) -> i32 {
 block0(v0: b1):
@@ -74,30 +57,19 @@ block2:
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (successor: Block 1)
-;   (successor: Block 2)
-;   (instruction range: 0 .. 4)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   testb   $1, %dil
-;   Inst 3:   jz      label1; j label2
-; Block 1:
-;   (original IR block: block1)
-;   (instruction range: 4 .. 8)
-;   Inst 4:   movl    $1, %eax
-;   Inst 5:   movq    %rbp, %rsp
-;   Inst 6:   popq    %rbp
-;   Inst 7:   ret
-; Block 2:
-;   (original IR block: block2)
-;   (instruction range: 8 .. 12)
-;   Inst 8:   movl    $2, %eax
-;   Inst 9:   movq    %rbp, %rsp
-;   Inst 10:   popq    %rbp
-;   Inst 11:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   testb   $1, %dil
+;   jz      label1; j label2
+; block1:
+;   movl    $1, %eax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+; block2:
+;   movl    $2, %eax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
diff --git a/cranelift/filetests/filetests/isa/x64/basic.clif b/cranelift/filetests/filetests/isa/x64/basic.clif
index 537f0ba993..ba779d4cf6 100644
--- a/cranelift/filetests/filetests/isa/x64/basic.clif
+++ b/cranelift/filetests/filetests/isa/x64/basic.clif
@@ -7,17 +7,12 @@ block0(v0: i32, v1: i32):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 7)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   addl    %esi, %edi
-;   Inst 3:   movq    %rdi, %rax
-;   Inst 4:   movq    %rbp, %rsp
-;   Inst 5:   popq    %rbp
-;   Inst 6:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   addl    %edi, %esi, %edi
+;   movq    %rdi, %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
diff --git a/cranelift/filetests/filetests/isa/x64/bextend.clif b/cranelift/filetests/filetests/isa/x64/bextend.clif
index fca9d8efde..8c79762d56 100644
--- a/cranelift/filetests/filetests/isa/x64/bextend.clif
+++ b/cranelift/filetests/filetests/isa/x64/bextend.clif
@@ -7,17 +7,11 @@ block0(v0: b8):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 7)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   movsbq  %dil, %rsi
-;   Inst 3:   movq    %rsi, %rax
-;   Inst 4:   movq    %rbp, %rsp
-;   Inst 5:   popq    %rbp
-;   Inst 6:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movsbq  %dil, %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
diff --git a/cranelift/filetests/filetests/isa/x64/branches.clif b/cranelift/filetests/filetests/isa/x64/branches.clif
index d0d7462366..0a6dbfacc8 100644
--- a/cranelift/filetests/filetests/isa/x64/branches.clif
+++ b/cranelift/filetests/filetests/isa/x64/branches.clif
@@ -16,32 +16,21 @@ block2:
   return v4
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (successor: Block 1)
-;   (successor: Block 2)
-;   (instruction range: 0 .. 4)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   cmpl    %esi, %edi
-;   Inst 3:   jz      label1; j label2
-; Block 1:
-;   (original IR block: block1)
-;   (instruction range: 4 .. 8)
-;   Inst 4:   movl    $1, %eax
-;   Inst 5:   movq    %rbp, %rsp
-;   Inst 6:   popq    %rbp
-;   Inst 7:   ret
-; Block 2:
-;   (original IR block: block2)
-;   (instruction range: 8 .. 12)
-;   Inst 8:   movl    $2, %eax
-;   Inst 9:   movq    %rbp, %rsp
-;   Inst 10:   popq    %rbp
-;   Inst 11:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   cmpl    %esi, %edi
+;   jz      label1; j label2
+; block1:
+;   movl    $1, %eax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+; block2:
+;   movl    $2, %eax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %f1(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
@@ -58,32 +47,21 @@ block2:
   return v4
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (successor: Block 1)
-;   (successor: Block 2)
-;   (instruction range: 0 .. 4)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   cmpl    %esi, %edi
-;   Inst 3:   jnz     label1; j label2
-; Block 1:
-;   (original IR block: block1)
-;   (instruction range: 4 .. 8)
-;   Inst 4:   movl    $1, %eax
-;   Inst 5:   movq    %rbp, %rsp
-;   Inst 6:   popq    %rbp
-;   Inst 7:   ret
-; Block 2:
-;   (original IR block: block2)
-;   (instruction range: 8 .. 12)
-;   Inst 8:   movl    $2, %eax
-;   Inst 9:   movq    %rbp, %rsp
-;   Inst 10:   popq    %rbp
-;   Inst 11:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   cmpl    %esi, %edi
+;   jnz     label1; j label2
+; block1:
+;   movl    $1, %eax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+; block2:
+;   movl    $2, %eax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %f2(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
@@ -100,32 +78,21 @@ block2:
   return v4
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (successor: Block 1)
-;   (successor: Block 2)
-;   (instruction range: 0 .. 4)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   cmpl    %esi, %edi
-;   Inst 3:   jz      label1; j label2
-; Block 1:
-;   (original IR block: block1)
-;   (instruction range: 4 .. 8)
-;   Inst 4:   movl    $1, %eax
-;   Inst 5:   movq    %rbp, %rsp
-;   Inst 6:   popq    %rbp
-;   Inst 7:   ret
-; Block 2:
-;   (original IR block: block2)
-;   (instruction range: 8 .. 12)
-;   Inst 8:   movl    $2, %eax
-;   Inst 9:   movq    %rbp, %rsp
-;   Inst 10:   popq    %rbp
-;   Inst 11:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   cmpl    %esi, %edi
+;   jz      label1; j label2
+; block1:
+;   movl    $1, %eax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+; block2:
+;   movl    $2, %eax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %f3(f32, f32) -> i32 {
 block0(v0: f32, v1: f32):
@@ -142,31 +109,20 @@ block2:
   return v4
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (successor: Block 2)
-;   (successor: Block 1)
-;   (instruction range: 0 .. 5)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   ucomiss %xmm1, %xmm0
-;   Inst 3:   jp      label2
-;   Inst 4:   jnz     label2; j label1
-; Block 1:
-;   (original IR block: block1)
-;   (instruction range: 5 .. 9)
-;   Inst 5:   movl    $1, %eax
-;   Inst 6:   movq    %rbp, %rsp
-;   Inst 7:   popq    %rbp
-;   Inst 8:   ret
-; Block 2:
-;   (original IR block: block2)
-;   (instruction range: 9 .. 13)
-;   Inst 9:   movl    $2, %eax
-;   Inst 10:   movq    %rbp, %rsp
-;   Inst 11:   popq    %rbp
-;   Inst 12:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   ucomiss %xmm1, %xmm0
+;   jp      label2
+;   jnz     label2; j label1
+; block1:
+;   movl    $1, %eax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+; block2:
+;   movl    $2, %eax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
diff --git a/cranelift/filetests/filetests/isa/x64/call-conv.clif b/cranelift/filetests/filetests/isa/x64/call-conv.clif
index 56f16f9888..cfb2bd3009 100644
--- a/cranelift/filetests/filetests/isa/x64/call-conv.clif
+++ b/cranelift/filetests/filetests/isa/x64/call-conv.clif
@@ -9,23 +9,19 @@ block0(v0: i32):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 11)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   subq    $32, %rsp
-;   Inst 3:   virtual_sp_offset_adjust 32
-;   Inst 4:   movq    %rdi, %rcx
-;   Inst 5:   call    *%rdi
-;   Inst 6:   addq    $32, %rsp
-;   Inst 7:   virtual_sp_offset_adjust -32
-;   Inst 8:   movq    %rbp, %rsp
-;   Inst 9:   popq    %rbp
-;   Inst 10:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   subq    %rsp, $32, %rsp
+;   virtual_sp_offset_adjust 32
+;   movq    %rdi, %rcx
+;   movq    %rcx, %rdi
+;   call    *%rdi
+;   addq    %rsp, $32, %rsp
+;   virtual_sp_offset_adjust -32
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %two_args(i32, f32) system_v {
     ;; system_v has params in %rdi, %xmm0, fascall in %rcx, %xmm1
@@ -37,29 +33,26 @@ block0(v0: i32, v1: f32):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 17)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   movq    %rdi, %rsi
-;   Inst 3:   movaps  %xmm0, %xmm6
-;   Inst 4:   subq    $32, %rsp
-;   Inst 5:   virtual_sp_offset_adjust 32
-;   Inst 6:   movq    %rsi, %rcx
-;   Inst 7:   movaps  %xmm6, %xmm1
-;   Inst 8:   call    *%rsi
-;   Inst 9:   addq    $32, %rsp
-;   Inst 10:   virtual_sp_offset_adjust -32
-;   Inst 11:   movq    %rsi, %rdi
-;   Inst 12:   movaps  %xmm6, %xmm0
-;   Inst 13:   call    *%rsi
-;   Inst 14:   movq    %rbp, %rsp
-;   Inst 15:   popq    %rbp
-;   Inst 16:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %rax
+;   movdqa  %xmm0, %xmm6
+;   subq    %rsp, $32, %rsp
+;   virtual_sp_offset_adjust 32
+;   movq    %rax, %rcx
+;   movdqa  %xmm6, %xmm1
+;   movq    %rax, %rdi
+;   movdqa  %xmm1, %xmm6
+;   call    *%rdi
+;   addq    %rsp, $32, %rsp
+;   virtual_sp_offset_adjust -32
+;   movq    %rdi, %rax
+;   movdqa  %xmm6, %xmm0
+;   call    *%rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %fastcall_to_systemv(i32) windows_fastcall {
     ;; fastcall preserves xmm6+, rbx, rbp, rdi, rsi, r12-r15
@@ -70,44 +63,39 @@ block0(v0: i32):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 32)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   subq    $176, %rsp
-;   Inst 3:   movdqu  %xmm6, 0(%rsp)
-;   Inst 4:   movdqu  %xmm7, 16(%rsp)
-;   Inst 5:   movdqu  %xmm8, 32(%rsp)
-;   Inst 6:   movdqu  %xmm9, 48(%rsp)
-;   Inst 7:   movdqu  %xmm10, 64(%rsp)
-;   Inst 8:   movdqu  %xmm11, 80(%rsp)
-;   Inst 9:   movdqu  %xmm12, 96(%rsp)
-;   Inst 10:   movdqu  %xmm13, 112(%rsp)
-;   Inst 11:   movdqu  %xmm14, 128(%rsp)
-;   Inst 12:   movdqu  %xmm15, 144(%rsp)
-;   Inst 13:   movq    %rsi, 160(%rsp)
-;   Inst 14:   movq    %rdi, 168(%rsp)
-;   Inst 15:   call    *%rcx
-;   Inst 16:   movdqu  0(%rsp), %xmm6
-;   Inst 17:   movdqu  16(%rsp), %xmm7
-;   Inst 18:   movdqu  32(%rsp), %xmm8
-;   Inst 19:   movdqu  48(%rsp), %xmm9
-;   Inst 20:   movdqu  64(%rsp), %xmm10
-;   Inst 21:   movdqu  80(%rsp), %xmm11
-;   Inst 22:   movdqu  96(%rsp), %xmm12
-;   Inst 23:   movdqu  112(%rsp), %xmm13
-;   Inst 24:   movdqu  128(%rsp), %xmm14
-;   Inst 25:   movdqu  144(%rsp), %xmm15
-;   Inst 26:   movq    160(%rsp), %rsi
-;   Inst 27:   movq    168(%rsp), %rdi
-;   Inst 28:   addq    $176, %rsp
-;   Inst 29:   movq    %rbp, %rsp
-;   Inst 30:   popq    %rbp
-;   Inst 31:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+;   subq    %rsp, $176, %rsp
+;   movq    %rsi, 0(%rsp)
+;   movq    %rdi, 8(%rsp)
+;   movdqu  %xmm6, 16(%rsp)
+;   movdqu  %xmm7, 32(%rsp)
+;   movdqu  %xmm8, 48(%rsp)
+;   movdqu  %xmm9, 64(%rsp)
+;   movdqu  %xmm10, 80(%rsp)
+;   movdqu  %xmm11, 96(%rsp)
+;   movdqu  %xmm12, 112(%rsp)
+;   movdqu  %xmm13, 128(%rsp)
+;   movdqu  %xmm14, 144(%rsp)
+;   movdqu  %xmm15, 160(%rsp)
+; block0:
+;   call    *%rcx
+;   movq    0(%rsp), %rsi
+;   movq    8(%rsp), %rdi
+;   movdqu  16(%rsp), %xmm6
+;   movdqu  32(%rsp), %xmm7
+;   movdqu  48(%rsp), %xmm8
+;   movdqu  64(%rsp), %xmm9
+;   movdqu  80(%rsp), %xmm10
+;   movdqu  96(%rsp), %xmm11
+;   movdqu  112(%rsp), %xmm12
+;   movdqu  128(%rsp), %xmm13
+;   movdqu  144(%rsp), %xmm14
+;   movdqu  160(%rsp), %xmm15
+;   addq    %rsp, $176, %rsp
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %many_args(
     ;; rdi, rsi, rdx, rcx, r8, r9,
@@ -139,56 +127,58 @@ block0(
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 44)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   subq    $32, %rsp
-;   Inst 3:   movq    %r12, 0(%rsp)
-;   Inst 4:   movq    %r13, 8(%rsp)
-;   Inst 5:   movq    %r14, 16(%rsp)
-;   Inst 6:   movq    %rdx, %rax
-;   Inst 7:   movq    %rcx, %r10
-;   Inst 8:   movq    %r8, %r11
-;   Inst 9:   movq    %r9, %r12
-;   Inst 10:   movq    16(%rbp), %r13
-;   Inst 11:   movq    24(%rbp), %r14
-;   Inst 12:   movss   32(%rbp), %xmm8
-;   Inst 13:   movsd   40(%rbp), %xmm9
-;   Inst 14:   subq    $144, %rsp
-;   Inst 15:   virtual_sp_offset_adjust 144
-;   Inst 16:   movq    %rdi, %rcx
-;   Inst 17:   movq    %rsi, %rdx
-;   Inst 18:   movq    %rax, %r8
-;   Inst 19:   movq    %r10, %r9
-;   Inst 20:   movq    %r11, 32(%rsp)
-;   Inst 21:   movq    %r12, 40(%rsp)
-;   Inst 22:   movsd   %xmm0, 48(%rsp)
-;   Inst 23:   movsd   %xmm1, 56(%rsp)
-;   Inst 24:   movsd   %xmm2, 64(%rsp)
-;   Inst 25:   movsd   %xmm3, 72(%rsp)
-;   Inst 26:   movsd   %xmm4, 80(%rsp)
-;   Inst 27:   movsd   %xmm5, 88(%rsp)
-;   Inst 28:   movsd   %xmm6, 96(%rsp)
-;   Inst 29:   movsd   %xmm7, 104(%rsp)
-;   Inst 30:   movq    %r13, 112(%rsp)
-;   Inst 31:   movl    %r14d, 120(%rsp)
-;   Inst 32:   movss   %xmm8, 128(%rsp)
-;   Inst 33:   movsd   %xmm9, 136(%rsp)
-;   Inst 34:   call    *%rdi
-;   Inst 35:   addq    $144, %rsp
-;   Inst 36:   virtual_sp_offset_adjust -144
-;   Inst 37:   movq    0(%rsp), %r12
-;   Inst 38:   movq    8(%rsp), %r13
-;   Inst 39:   movq    16(%rsp), %r14
-;   Inst 40:   addq    $32, %rsp
-;   Inst 41:   movq    %rbp, %rsp
-;   Inst 42:   popq    %rbp
-;   Inst 43:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+;   subq    %rsp, $96, %rsp
+;   movq    %rbx, 48(%rsp)
+;   movq    %r12, 56(%rsp)
+;   movq    %r13, 64(%rsp)
+;   movq    %r14, 72(%rsp)
+;   movq    %r15, 80(%rsp)
+; block0:
+;   movq    %rsi, %rbx
+;   movq    %rdx, %r14
+;   movq    %rcx, %r12
+;   movq    %r8, %r13
+;   movq    %r9, %r15
+;   movq    16(%rbp), %rax
+;   movq    24(%rbp), %r11
+;   movss   32(%rbp), %xmm9
+;   movsd   40(%rbp), %xmm8
+;   subq    %rsp, $144, %rsp
+;   virtual_sp_offset_adjust 144
+;   movq    %rdi, %rcx
+;   movq    %rbx, %rdx
+;   movq    %r14, %r8
+;   movq    %r12, %r9
+;   movq    %r13, %rsi
+;   movq    %rsi, 32(%rsp)
+;   movq    %r15, %rsi
+;   movq    %rsi, 40(%rsp)
+;   movsd   %xmm0, 48(%rsp)
+;   movsd   %xmm1, 56(%rsp)
+;   movsd   %xmm2, 64(%rsp)
+;   movsd   %xmm3, 72(%rsp)
+;   movsd   %xmm4, 80(%rsp)
+;   movsd   %xmm5, 88(%rsp)
+;   movsd   %xmm6, 96(%rsp)
+;   movsd   %xmm7, 104(%rsp)
+;   movq    %rax, 112(%rsp)
+;   movl    %r11d, 120(%rsp)
+;   movss   %xmm9, 128(%rsp)
+;   movsd   %xmm8, 136(%rsp)
+;   call    *%rdi
+;   addq    %rsp, $144, %rsp
+;   virtual_sp_offset_adjust -144
+;   movq    48(%rsp), %rbx
+;   movq    56(%rsp), %r12
+;   movq    64(%rsp), %r13
+;   movq    72(%rsp), %r14
+;   movq    80(%rsp), %r15
+;   addq    %rsp, $96, %rsp
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %many_ints(i64, i64, i64, i64, i64) system_v {
     ;; rdi => rcx
@@ -202,29 +192,31 @@ block0(v0: i64, v1:i64, v2:i64, v3:i64, v4:i64):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 17)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   movq    %rdx, %rax
-;   Inst 3:   movq    %rcx, %r9
-;   Inst 4:   movq    %r8, %r10
-;   Inst 5:   subq    $48, %rsp
-;   Inst 6:   virtual_sp_offset_adjust 48
-;   Inst 7:   movq    %rdi, %rcx
-;   Inst 8:   movq    %rsi, %rdx
-;   Inst 9:   movq    %rax, %r8
-;   Inst 10:   movq    %r10, 32(%rsp)
-;   Inst 11:   call    *%rdi
-;   Inst 12:   addq    $48, %rsp
-;   Inst 13:   virtual_sp_offset_adjust -48
-;   Inst 14:   movq    %rbp, %rsp
-;   Inst 15:   popq    %rbp
-;   Inst 16:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+;   subq    %rsp, $16, %rsp
+;   movq    %rbx, 0(%rsp)
+; block0:
+;   movq    %rsi, %r11
+;   movq    %rdx, %r9
+;   movq    %rcx, %rax
+;   movq    %r8, %rbx
+;   subq    %rsp, $48, %rsp
+;   virtual_sp_offset_adjust 48
+;   movq    %rdi, %rcx
+;   movq    %r11, %rdx
+;   movq    %r9, %r8
+;   movq    %rax, %r9
+;   movq    %rbx, %r11
+;   movq    %r11, 32(%rsp)
+;   call    *%rdi
+;   addq    %rsp, $48, %rsp
+;   virtual_sp_offset_adjust -48
+;   movq    0(%rsp), %rbx
+;   addq    %rsp, $16, %rsp
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %many_args2(i32, f32, i64, f64, i32, i32, i32, f32, f64, f32, f64) system_v {
     sig0 = (i32, f32, i64, f64, i32, i32, i32, f32, f64, f32, f64) windows_fastcall
@@ -233,37 +225,37 @@ block0(v0: i32, v1: f32, v2: i64, v3: f64, v4: i32, v5: i32, v6: i32, v7: f32, v
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 25)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   movaps  %xmm1, %xmm6
-;   Inst 3:   movq    %rcx, %rax
-;   Inst 4:   movq    %r8, %r9
-;   Inst 5:   movaps  %xmm3, %xmm7
-;   Inst 6:   subq    $96, %rsp
-;   Inst 7:   virtual_sp_offset_adjust 96
-;   Inst 8:   movq    %rdi, %rcx
-;   Inst 9:   movaps  %xmm0, %xmm1
-;   Inst 10:   movq    %rsi, %r8
-;   Inst 11:   movaps  %xmm6, %xmm3
-;   Inst 12:   movl    %edx, 32(%rsp)
-;   Inst 13:   movl    %eax, 40(%rsp)
-;   Inst 14:   movl    %r9d, 48(%rsp)
-;   Inst 15:   movss   %xmm2, 56(%rsp)
-;   Inst 16:   movsd   %xmm7, 64(%rsp)
-;   Inst 17:   movss   %xmm4, 72(%rsp)
-;   Inst 18:   movsd   %xmm5, 80(%rsp)
-;   Inst 19:   call    *%rdi
-;   Inst 20:   addq    $96, %rsp
-;   Inst 21:   virtual_sp_offset_adjust -96
-;   Inst 22:   movq    %rbp, %rsp
-;   Inst 23:   popq    %rbp
-;   Inst 24:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movdqa  %xmm0, %xmm6
+;   movq    %rsi, %rax
+;   movdqa  %xmm1, %xmm14
+;   movq    %rcx, %r11
+;   movq    %r8, %r9
+;   movdqa  %xmm3, %xmm10
+;   subq    %rsp, $96, %rsp
+;   virtual_sp_offset_adjust 96
+;   movq    %rdi, %rcx
+;   movdqa  %xmm6, %xmm1
+;   movq    %rax, %r8
+;   movdqa  %xmm14, %xmm3
+;   movl    %edx, 32(%rsp)
+;   movq    %r11, %rdx
+;   movl    %edx, 40(%rsp)
+;   movq    %r9, %rax
+;   movl    %eax, 48(%rsp)
+;   movss   %xmm2, 56(%rsp)
+;   movdqa  %xmm10, %xmm2
+;   movsd   %xmm2, 64(%rsp)
+;   movss   %xmm4, 72(%rsp)
+;   movsd   %xmm5, 80(%rsp)
+;   call    *%rdi
+;   addq    %rsp, $96, %rsp
+;   virtual_sp_offset_adjust -96
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %wasmtime_mix1(i32) wasmtime_system_v {
     sig0 = (i32) system_v
@@ -272,20 +264,14 @@ block0(v0: i32):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 8)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   movq    %rdi, %rsi
-;   Inst 3:   movq    %rsi, %rdi
-;   Inst 4:   call    *%rsi
-;   Inst 5:   movq    %rbp, %rsp
-;   Inst 6:   popq    %rbp
-;   Inst 7:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %rcx
+;   call    *%rcx
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %wasmtime_mix2(i32) system_v {
     sig0 = (i32) wasmtime_system_v
@@ -294,20 +280,14 @@ block0(v0: i32):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 8)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   movq    %rdi, %rsi
-;   Inst 3:   movq    %rsi, %rdi
-;   Inst 4:   call    *%rsi
-;   Inst 5:   movq    %rbp, %rsp
-;   Inst 6:   popq    %rbp
-;   Inst 7:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %rcx
+;   call    *%rcx
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %wasmtime_mix2() -> i32, i32 system_v {
     sig0 = () -> i32, i32 wasmtime_system_v
@@ -317,26 +297,20 @@ block0:
     return v0, v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 14)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   movl    $1, %esi
-;   Inst 3:   subq    $16, %rsp
-;   Inst 4:   virtual_sp_offset_adjust 16
-;   Inst 5:   lea     0(%rsp), %rdi
-;   Inst 6:   call    *%rsi
-;   Inst 7:   movq    0(%rsp), %rsi
-;   Inst 8:   addq    $16, %rsp
-;   Inst 9:   virtual_sp_offset_adjust -16
-;   Inst 10:   movq    %rsi, %rdx
-;   Inst 11:   movq    %rbp, %rsp
-;   Inst 12:   popq    %rbp
-;   Inst 13:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movl    $1, %edx
+;   subq    %rsp, $16, %rsp
+;   virtual_sp_offset_adjust 16
+;   lea     0(%rsp), %rdi
+;   call    *%rdx
+;   movq    0(%rsp), %rdx
+;   addq    %rsp, $16, %rsp
+;   virtual_sp_offset_adjust -16
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %wasmtime_mix3() -> i32, i32 wasmtime_system_v {
     sig0 = () -> i32, i32 system_v
@@ -346,25 +320,21 @@ block0:
     return v0, v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 13)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   subq    $16, %rsp
-;   Inst 3:   movq    %r12, 0(%rsp)
-;   Inst 4:   movq    %rdi, %r12
-;   Inst 5:   movl    $1, %esi
-;   Inst 6:   call    *%rsi
-;   Inst 7:   movl    %edx, 0(%r12)
-;   Inst 8:   movq    0(%rsp), %r12
-;   Inst 9:   addq    $16, %rsp
-;   Inst 10:   movq    %rbp, %rsp
-;   Inst 11:   popq    %rbp
-;   Inst 12:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+;   subq    %rsp, $16, %rsp
+;   movq    %r13, 0(%rsp)
+; block0:
+;   movq    %rdi, %r13
+;   movl    $1, %r9d
+;   call    *%r9
+;   movq    %r13, %rdi
+;   movl    %edx, 0(%rdi)
+;   movq    0(%rsp), %r13
+;   addq    %rsp, $16, %rsp
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %wasmtime_mix4() -> i32, i64, i32 wasmtime_system_v {
     sig0 = () -> i32, i64, i32 system_v
@@ -374,32 +344,28 @@ block0:
     return v0, v1, v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 20)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   subq    $16, %rsp
-;   Inst 3:   movq    %r12, 0(%rsp)
-;   Inst 4:   movq    %rdi, %r12
-;   Inst 5:   movl    $1, %esi
-;   Inst 6:   subq    $16, %rsp
-;   Inst 7:   virtual_sp_offset_adjust 16
-;   Inst 8:   lea     0(%rsp), %rdi
-;   Inst 9:   call    *%rsi
-;   Inst 10:   movq    0(%rsp), %rsi
-;   Inst 11:   addq    $16, %rsp
-;   Inst 12:   virtual_sp_offset_adjust -16
-;   Inst 13:   movq    %rdx, 0(%r12)
-;   Inst 14:   movl    %esi, 8(%r12)
-;   Inst 15:   movq    0(%rsp), %r12
-;   Inst 16:   addq    $16, %rsp
-;   Inst 17:   movq    %rbp, %rsp
-;   Inst 18:   popq    %rbp
-;   Inst 19:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+;   subq    %rsp, $16, %rsp
+;   movq    %rbx, 0(%rsp)
+; block0:
+;   movq    %rdi, %rbx
+;   movl    $1, %esi
+;   subq    %rsp, $16, %rsp
+;   virtual_sp_offset_adjust 16
+;   lea     0(%rsp), %rdi
+;   call    *%rsi
+;   movq    0(%rsp), %rcx
+;   addq    %rsp, $16, %rsp
+;   virtual_sp_offset_adjust -16
+;   movq    %rbx, %rdi
+;   movq    %rdx, 0(%rdi)
+;   movl    %ecx, 8(%rdi)
+;   movq    0(%rsp), %rbx
+;   addq    %rsp, $16, %rsp
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %wasmtime_mix5() -> f32, i64, i32, f32 wasmtime_system_v {
     sig0 = () -> f32, i64, i32, f32 system_v
@@ -409,27 +375,23 @@ block0:
     return v0, v1, v2, v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 15)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   subq    $16, %rsp
-;   Inst 3:   movq    %r12, 0(%rsp)
-;   Inst 4:   movq    %rdi, %r12
-;   Inst 5:   movl    $1, %esi
-;   Inst 6:   call    *%rsi
-;   Inst 7:   movq    %rax, 0(%r12)
-;   Inst 8:   movl    %edx, 8(%r12)
-;   Inst 9:   movss   %xmm1, 12(%r12)
-;   Inst 10:   movq    0(%rsp), %r12
-;   Inst 11:   addq    $16, %rsp
-;   Inst 12:   movq    %rbp, %rsp
-;   Inst 13:   popq    %rbp
-;   Inst 14:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+;   subq    %rsp, $16, %rsp
+;   movq    %r12, 0(%rsp)
+; block0:
+;   movq    %rdi, %r12
+;   movl    $1, %eax
+;   call    *%rax
+;   movq    %r12, %rdi
+;   movq    %rax, 0(%rdi)
+;   movl    %edx, 8(%rdi)
+;   movss   %xmm1, 12(%rdi)
+;   movq    0(%rsp), %r12
+;   addq    %rsp, $16, %rsp
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %wasmtime_mix6(f32, i64, i32, f32) -> f32, i64, i32, f32 wasmtime_system_v {
     sig0 = (f32, i64, i32, f32) -> f32, i64, i32, f32 system_v
@@ -439,25 +401,21 @@ block0(v0: f32, v1: i64, v2: i32, v3: f32):
     return v5, v6, v7, v8
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 15)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   subq    $16, %rsp
-;   Inst 3:   movq    %r12, 0(%rsp)
-;   Inst 4:   movq    %rdx, %r12
-;   Inst 5:   movl    $1, %eax
-;   Inst 6:   call    *%rax
-;   Inst 7:   movq    %rax, 0(%r12)
-;   Inst 8:   movl    %edx, 8(%r12)
-;   Inst 9:   movss   %xmm1, 12(%r12)
-;   Inst 10:   movq    0(%rsp), %r12
-;   Inst 11:   addq    $16, %rsp
-;   Inst 12:   movq    %rbp, %rsp
-;   Inst 13:   popq    %rbp
-;   Inst 14:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+;   subq    %rsp, $16, %rsp
+;   movq    %rbx, 0(%rsp)
+; block0:
+;   movq    %rdx, %rbx
+;   movl    $1, %eax
+;   call    *%rax
+;   movq    %rbx, %rcx
+;   movq    %rax, 0(%rcx)
+;   movl    %edx, 8(%rcx)
+;   movss   %xmm1, 12(%rcx)
+;   movq    0(%rsp), %rbx
+;   addq    %rsp, $16, %rsp
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
diff --git a/cranelift/filetests/filetests/isa/x64/clz-lzcnt.clif b/cranelift/filetests/filetests/isa/x64/clz-lzcnt.clif
index fcf828607f..38a42e95b4 100644
--- a/cranelift/filetests/filetests/isa/x64/clz-lzcnt.clif
+++ b/cranelift/filetests/filetests/isa/x64/clz-lzcnt.clif
@@ -7,19 +7,13 @@ block0(v0: i64):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 7)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   lzcntq  %rdi, %rsi
-;   Inst 3:   movq    %rsi, %rax
-;   Inst 4:   movq    %rbp, %rsp
-;   Inst 5:   popq    %rbp
-;   Inst 6:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   lzcntq  %rdi, %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %clz(i32) -> i32 {
 block0(v0: i32):
@@ -27,17 +21,11 @@ block0(v0: i32):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 7)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   lzcntl  %edi, %esi
-;   Inst 3:   movq    %rsi, %rax
-;   Inst 4:   movq    %rbp, %rsp
-;   Inst 5:   popq    %rbp
-;   Inst 6:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   lzcntl  %edi, %eax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
diff --git a/cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif b/cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif
index bb60205667..ffa1a37d60 100644
--- a/cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif
+++ b/cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif
@@ -10,25 +10,19 @@ block0(v0: i64, v1: i64):
     return v4, v5
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 13)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   movq    0(%rsi), %rax
-;   Inst 3:   cmpq    %rax, %rdi
-;   Inst 4:   setz    %cl
-;   Inst 5:   andq    $1, %rcx
-;   Inst 6:   cmpq    %rax, %rdi
-;   Inst 7:   cmovzq  %rdi, %rsi
-;   Inst 8:   movq    %rcx, %rax
-;   Inst 9:   movq    %rsi, %rdx
-;   Inst 10:   movq    %rbp, %rsp
-;   Inst 11:   popq    %rbp
-;   Inst 12:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    0(%rsi), %rcx
+;   cmpq    %rcx, %rdi
+;   setz    %al
+;   andq    %rax, $1, %rax
+;   cmpq    %rcx, %rdi
+;   cmovzq  %rdi, %rsi, %rsi
+;   movq    %rsi, %rdx
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %f1(f64, i64) -> i64, f64 {
 block0(v0: f64, v1: i64):
@@ -39,27 +33,20 @@ block0(v0: f64, v1: i64):
     return v4, v5
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 17)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   movsd   0(%rdi), %xmm1
-;   Inst 3:   ucomisd %xmm1, %xmm0
-;   Inst 4:   setnp   %sil
-;   Inst 5:   setz    %dil
-;   Inst 6:   andl    %edi, %esi
-;   Inst 7:   andq    $1, %rsi
-;   Inst 8:   ucomisd %xmm0, %xmm1
-;   Inst 9:   movaps  %xmm0, %xmm1
-;   Inst 10:   jz $next; movsd %xmm0, %xmm1; $next: 
-;   Inst 11:   jnp $next; movsd %xmm0, %xmm1; $next: 
-;   Inst 12:   movq    %rsi, %rax
-;   Inst 13:   movaps  %xmm1, %xmm0
-;   Inst 14:   movq    %rbp, %rsp
-;   Inst 15:   popq    %rbp
-;   Inst 16:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movsd   0(%rdi), %xmm12
+;   ucomisd %xmm12, %xmm0
+;   setnp   %al
+;   setz    %r8b
+;   andl    %eax, %r8d, %eax
+;   andq    %rax, $1, %rax
+;   ucomisd %xmm0, %xmm12
+;   movdqa  %xmm0, %xmm6
+;   mov z, sd; j%xmm6 $next; mov%xmm0 %xmm0, %xmm0; $next: 
+;   mov np, sd; j%xmm6 $next; mov%xmm0 %xmm0, %xmm0; $next: 
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
diff --git a/cranelift/filetests/filetests/isa/x64/ctz-bmi1.clif b/cranelift/filetests/filetests/isa/x64/ctz-bmi1.clif
index ab96df7607..e0ff8122bb 100644
--- a/cranelift/filetests/filetests/isa/x64/ctz-bmi1.clif
+++ b/cranelift/filetests/filetests/isa/x64/ctz-bmi1.clif
@@ -7,19 +7,13 @@ block0(v0: i64):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 7)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   tzcntq  %rdi, %rsi
-;   Inst 3:   movq    %rsi, %rax
-;   Inst 4:   movq    %rbp, %rsp
-;   Inst 5:   popq    %rbp
-;   Inst 6:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   tzcntq  %rdi, %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %ctz(i32) -> i32 {
 block0(v0: i32):
@@ -27,17 +21,11 @@ block0(v0: i32):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 7)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   tzcntl  %edi, %esi
-;   Inst 3:   movq    %rsi, %rax
-;   Inst 4:   movq    %rbp, %rsp
-;   Inst 5:   popq    %rbp
-;   Inst 6:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   tzcntl  %edi, %eax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
diff --git a/cranelift/filetests/filetests/isa/x64/div-checks.clif b/cranelift/filetests/filetests/isa/x64/div-checks.clif
index 5a366914a2..132f939818 100644
--- a/cranelift/filetests/filetests/isa/x64/div-checks.clif
+++ b/cranelift/filetests/filetests/isa/x64/div-checks.clif
@@ -12,8 +12,8 @@ block0(v0: i8, v1: i8):
   v2 = srem.i8 v0, v1
 ; check:  movq    %rdi, %rax
 ; nextln: movl    $$0, %edx
-; nextln: srem $$rax:$$rdx, %sil
-; nextln: shrq    $$8, %rax
+; nextln: srem_seq %al, %dl, %sil, %al, %dl, tmp=(none)
+; nextln: shrq    $$8, %rax, %rax
 
   return v2
 }
@@ -23,7 +23,7 @@ block0(v0: i16, v1: i16):
   v2 = srem.i16 v0, v1
 ; check:  movq    %rdi, %rax
 ; nextln: movl    $$0, %edx
-; nextln: srem $$rax:$$rdx, %si
+; nextln: srem_seq %ax, %dx, %si, %ax, %dx, tmp=(none)
 ; nextln: movq    %rdx, %rax
 
   return v2
@@ -34,7 +34,7 @@ block0(v0: i32, v1: i32):
   v2 = srem.i32 v0, v1
 ; check:  movq    %rdi, %rax
 ; nextln: movl    $$0, %edx
-; nextln: srem $$rax:$$rdx, %esi
+; nextln: srem_seq %eax, %edx, %esi, %eax, %edx, tmp=(none)
 ; nextln: movq    %rdx, %rax
 
   return v2
@@ -45,7 +45,7 @@ block0(v0: i64, v1: i64):
   v2 = srem.i64 v0, v1
 ; check:  movq    %rdi, %rax
 ; nextln: movl    $$0, %edx
-; nextln: srem $$rax:$$rdx, %rsi
+; nextln: srem_seq %rax, %rdx, %rsi, %rax, %rdx, tmp=(none)
 ; nextln: movq    %rdx, %rax
 
   return v2
diff --git a/cranelift/filetests/filetests/isa/x64/fastcall.clif b/cranelift/filetests/filetests/isa/x64/fastcall.clif
index 521a142618..8300606476 100644
--- a/cranelift/filetests/filetests/isa/x64/fastcall.clif
+++ b/cranelift/filetests/filetests/isa/x64/fastcall.clif
@@ -8,120 +8,90 @@ block0(v0: i64, v1: i64, v2: i64, v3: i64):
   return v0
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 8)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   unwind PushFrameRegs { offset_upward_to_caller_sp: 16 }
-;   Inst 2:   movq    %rsp, %rbp
-;   Inst 3:   unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 }
-;   Inst 4:   movq    %rcx, %rax
-;   Inst 5:   movq    %rbp, %rsp
-;   Inst 6:   popq    %rbp
-;   Inst 7:   ret
-; }}
+;   pushq   %rbp
+;   unwind PushFrameRegs { offset_upward_to_caller_sp: 16 }
+;   movq    %rsp, %rbp
+;   unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 }
+; block0:
+;   movq    %rcx, %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %f1(i64, i64, i64, i64) -> i64 windows_fastcall {
 block0(v0: i64, v1: i64, v2: i64, v3: i64):
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 8)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   unwind PushFrameRegs { offset_upward_to_caller_sp: 16 }
-;   Inst 2:   movq    %rsp, %rbp
-;   Inst 3:   unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 }
-;   Inst 4:   movq    %rdx, %rax
-;   Inst 5:   movq    %rbp, %rsp
-;   Inst 6:   popq    %rbp
-;   Inst 7:   ret
-; }}
+;   pushq   %rbp
+;   unwind PushFrameRegs { offset_upward_to_caller_sp: 16 }
+;   movq    %rsp, %rbp
+;   unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 }
+; block0:
+;   movq    %rdx, %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %f2(i64, i64, i64, i64) -> i64 windows_fastcall {
 block0(v0: i64, v1: i64, v2: i64, v3: i64):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 8)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   unwind PushFrameRegs { offset_upward_to_caller_sp: 16 }
-;   Inst 2:   movq    %rsp, %rbp
-;   Inst 3:   unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 }
-;   Inst 4:   movq    %r8, %rax
-;   Inst 5:   movq    %rbp, %rsp
-;   Inst 6:   popq    %rbp
-;   Inst 7:   ret
-; }}
+;   pushq   %rbp
+;   unwind PushFrameRegs { offset_upward_to_caller_sp: 16 }
+;   movq    %rsp, %rbp
+;   unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 }
+; block0:
+;   movq    %r8, %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %f3(i64, i64, i64, i64) -> i64 windows_fastcall {
 block0(v0: i64, v1: i64, v2: i64, v3: i64):
   return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 8)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   unwind PushFrameRegs { offset_upward_to_caller_sp: 16 }
-;   Inst 2:   movq    %rsp, %rbp
-;   Inst 3:   unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 }
-;   Inst 4:   movq    %r9, %rax
-;   Inst 5:   movq    %rbp, %rsp
-;   Inst 6:   popq    %rbp
-;   Inst 7:   ret
-; }}
+;   pushq   %rbp
+;   unwind PushFrameRegs { offset_upward_to_caller_sp: 16 }
+;   movq    %rsp, %rbp
+;   unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 }
+; block0:
+;   movq    %r9, %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %f4(i64, i64, f64, i64) -> f64 windows_fastcall {
 block0(v0: i64, v1: i64, v2: f64, v3: i64):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 8)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   unwind PushFrameRegs { offset_upward_to_caller_sp: 16 }
-;   Inst 2:   movq    %rsp, %rbp
-;   Inst 3:   unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 }
-;   Inst 4:   movaps  %xmm2, %xmm0
-;   Inst 5:   movq    %rbp, %rsp
-;   Inst 6:   popq    %rbp
-;   Inst 7:   ret
-; }}
+;   pushq   %rbp
+;   unwind PushFrameRegs { offset_upward_to_caller_sp: 16 }
+;   movq    %rsp, %rbp
+;   unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 }
+; block0:
+;   movdqa  %xmm2, %xmm0
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %f5(i64, i64, f64, i64) -> i64 windows_fastcall {
 block0(v0: i64, v1: i64, v2: f64, v3: i64):
   return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 8)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   unwind PushFrameRegs { offset_upward_to_caller_sp: 16 }
-;   Inst 2:   movq    %rsp, %rbp
-;   Inst 3:   unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 }
-;   Inst 4:   movq    %r9, %rax
-;   Inst 5:   movq    %rbp, %rsp
-;   Inst 6:   popq    %rbp
-;   Inst 7:   ret
-; }}
+;   pushq   %rbp
+;   unwind PushFrameRegs { offset_upward_to_caller_sp: 16 }
+;   movq    %rsp, %rbp
+;   unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 }
+; block0:
+;   movq    %r9, %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %f6(i64, i64, i64, i64, i64, i64) -> i64 windows_fastcall {
 block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64, v5: i64):
@@ -138,59 +108,33 @@ block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64, v5: i64):
 ;; TODO(#2704): fix regalloc's register priority ordering!
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 15)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   unwind PushFrameRegs { offset_upward_to_caller_sp: 16 }
-;   Inst 2:   movq    %rsp, %rbp
-;   Inst 3:   unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 16 }
-;   Inst 4:   subq    $16, %rsp
-;   Inst 5:   movq    %rsi, 0(%rsp)
-;   Inst 6:   unwind SaveReg { clobber_offset: 0, reg: r16J }
-;   Inst 7:   movq    48(%rbp), %rsi
-;   Inst 8:   movq    56(%rbp), %rsi
-;   Inst 9:   movq    %rsi, %rax
-;   Inst 10:   movq    0(%rsp), %rsi
-;   Inst 11:   addq    $16, %rsp
-;   Inst 12:   movq    %rbp, %rsp
-;   Inst 13:   popq    %rbp
-;   Inst 14:   ret
-; }}
+;   pushq   %rbp
+;   unwind PushFrameRegs { offset_upward_to_caller_sp: 16 }
+;   movq    %rsp, %rbp
+;   unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 }
+; block0:
+;   movq    48(%rbp), %r11
+;   movq    56(%rbp), %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %f7(i128, i64, i128, i128) -> i128 windows_fastcall {
 block0(v0: i128, v1: i64, v2: i128, v3: i128):
   return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 20)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   unwind PushFrameRegs { offset_upward_to_caller_sp: 16 }
-;   Inst 2:   movq    %rsp, %rbp
-;   Inst 3:   unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 16 }
-;   Inst 4:   subq    $16, %rsp
-;   Inst 5:   movq    %rsi, 0(%rsp)
-;   Inst 6:   unwind SaveReg { clobber_offset: 0, reg: r16J }
-;   Inst 7:   movq    %rdi, 8(%rsp)
-;   Inst 8:   unwind SaveReg { clobber_offset: 8, reg: r17J }
-;   Inst 9:   movq    48(%rbp), %rsi
-;   Inst 10:   movq    56(%rbp), %rsi
-;   Inst 11:   movq    64(%rbp), %rdi
-;   Inst 12:   movq    %rsi, %rax
-;   Inst 13:   movq    %rdi, %rdx
-;   Inst 14:   movq    0(%rsp), %rsi
-;   Inst 15:   movq    8(%rsp), %rdi
-;   Inst 16:   addq    $16, %rsp
-;   Inst 17:   movq    %rbp, %rsp
-;   Inst 18:   popq    %rbp
-;   Inst 19:   ret
-; }}
+;   pushq   %rbp
+;   unwind PushFrameRegs { offset_upward_to_caller_sp: 16 }
+;   movq    %rsp, %rbp
+;   unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 }
+; block0:
+;   movq    48(%rbp), %r11
+;   movq    56(%rbp), %rax
+;   movq    64(%rbp), %rdx
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %f8(i64) -> i64 windows_fastcall {
   sig0 = (i64, i64, f64, f64, i64, i64) -> i64 windows_fastcall
@@ -202,37 +146,26 @@ block0(v0: i64):
   return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 25)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   unwind PushFrameRegs { offset_upward_to_caller_sp: 16 }
-;   Inst 2:   movq    %rsp, %rbp
-;   Inst 3:   unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 16 }
-;   Inst 4:   subq    $16, %rsp
-;   Inst 5:   movq    %rsi, 0(%rsp)
-;   Inst 6:   unwind SaveReg { clobber_offset: 0, reg: r16J }
-;   Inst 7:   movq    %rcx, %rsi
-;   Inst 8:   cvtsi2sd %rsi, %xmm3
-;   Inst 9:   subq    $48, %rsp
-;   Inst 10:   virtual_sp_offset_adjust 48
-;   Inst 11:   movq    %rsi, %rcx
-;   Inst 12:   movq    %rsi, %rdx
-;   Inst 13:   movaps  %xmm3, %xmm2
-;   Inst 14:   movq    %rsi, 32(%rsp)
-;   Inst 15:   movq    %rsi, 40(%rsp)
-;   Inst 16:   load_ext_name %g+0, %rsi
-;   Inst 17:   call    *%rsi
-;   Inst 18:   addq    $48, %rsp
-;   Inst 19:   virtual_sp_offset_adjust -48
-;   Inst 20:   movq    0(%rsp), %rsi
-;   Inst 21:   addq    $16, %rsp
-;   Inst 22:   movq    %rbp, %rsp
-;   Inst 23:   popq    %rbp
-;   Inst 24:   ret
-; }}
+;   pushq   %rbp
+;   unwind PushFrameRegs { offset_upward_to_caller_sp: 16 }
+;   movq    %rsp, %rbp
+;   unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 }
+; block0:
+;   cvtsi2sd %rcx, %xmm2
+;   subq    %rsp, $48, %rsp
+;   virtual_sp_offset_adjust 48
+;   movq    %rcx, %rdx
+;   movq    %rdx, %r8
+;   movdqa  %xmm2, %xmm3
+;   movq    %r8, 32(%rsp)
+;   movq    %r8, 40(%rsp)
+;   load_ext_name %g+0, %r9
+;   call    *%r9
+;   addq    %rsp, $48, %rsp
+;   virtual_sp_offset_adjust -48
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %f9(i64) -> f64 windows_fastcall {
 block0(v0: i64):
@@ -284,95 +217,91 @@ block0(v0: i64):
   return v39
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 85)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   unwind PushFrameRegs { offset_upward_to_caller_sp: 16 }
-;   Inst 2:   movq    %rsp, %rbp
-;   Inst 3:   unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 160 }
-;   Inst 4:   subq    $224, %rsp
-;   Inst 5:   movdqu  %xmm6, 64(%rsp)
-;   Inst 6:   unwind SaveReg { clobber_offset: 0, reg: r6V }
-;   Inst 7:   movdqu  %xmm7, 80(%rsp)
-;   Inst 8:   unwind SaveReg { clobber_offset: 16, reg: r7V }
-;   Inst 9:   movdqu  %xmm8, 96(%rsp)
-;   Inst 10:   unwind SaveReg { clobber_offset: 32, reg: r8V }
-;   Inst 11:   movdqu  %xmm9, 112(%rsp)
-;   Inst 12:   unwind SaveReg { clobber_offset: 48, reg: r9V }
-;   Inst 13:   movdqu  %xmm10, 128(%rsp)
-;   Inst 14:   unwind SaveReg { clobber_offset: 64, reg: r10V }
-;   Inst 15:   movdqu  %xmm11, 144(%rsp)
-;   Inst 16:   unwind SaveReg { clobber_offset: 80, reg: r11V }
-;   Inst 17:   movdqu  %xmm12, 160(%rsp)
-;   Inst 18:   unwind SaveReg { clobber_offset: 96, reg: r12V }
-;   Inst 19:   movdqu  %xmm13, 176(%rsp)
-;   Inst 20:   unwind SaveReg { clobber_offset: 112, reg: r13V }
-;   Inst 21:   movdqu  %xmm14, 192(%rsp)
-;   Inst 22:   unwind SaveReg { clobber_offset: 128, reg: r14V }
-;   Inst 23:   movdqu  %xmm15, 208(%rsp)
-;   Inst 24:   unwind SaveReg { clobber_offset: 144, reg: r15V }
-;   Inst 25:   movsd   0(%rcx), %xmm0
-;   Inst 26:   movdqu  %xmm0, rsp(48 + virtual offset)
-;   Inst 27:   movsd   8(%rcx), %xmm1
-;   Inst 28:   movsd   16(%rcx), %xmm0
-;   Inst 29:   movdqu  %xmm0, rsp(32 + virtual offset)
-;   Inst 30:   movsd   24(%rcx), %xmm3
-;   Inst 31:   movsd   32(%rcx), %xmm4
-;   Inst 32:   movsd   40(%rcx), %xmm5
-;   Inst 33:   movsd   48(%rcx), %xmm6
-;   Inst 34:   movsd   56(%rcx), %xmm7
-;   Inst 35:   movsd   64(%rcx), %xmm8
-;   Inst 36:   movsd   72(%rcx), %xmm9
-;   Inst 37:   movsd   80(%rcx), %xmm10
-;   Inst 38:   movsd   88(%rcx), %xmm11
-;   Inst 39:   movsd   96(%rcx), %xmm12
-;   Inst 40:   movsd   104(%rcx), %xmm13
-;   Inst 41:   movsd   112(%rcx), %xmm14
-;   Inst 42:   movsd   120(%rcx), %xmm15
-;   Inst 43:   movsd   128(%rcx), %xmm2
-;   Inst 44:   movdqu  %xmm2, rsp(0 + virtual offset)
-;   Inst 45:   movsd   136(%rcx), %xmm2
-;   Inst 46:   movsd   144(%rcx), %xmm0
-;   Inst 47:   movdqu  %xmm0, rsp(16 + virtual offset)
-;   Inst 48:   movdqu  rsp(48 + virtual offset), %xmm0
-;   Inst 49:   addsd   %xmm1, %xmm0
-;   Inst 50:   movdqu  rsp(32 + virtual offset), %xmm1
-;   Inst 51:   addsd   %xmm3, %xmm1
-;   Inst 52:   addsd   %xmm5, %xmm4
-;   Inst 53:   addsd   %xmm7, %xmm6
-;   Inst 54:   addsd   %xmm9, %xmm8
-;   Inst 55:   addsd   %xmm11, %xmm10
-;   Inst 56:   addsd   %xmm13, %xmm12
-;   Inst 57:   addsd   %xmm15, %xmm14
-;   Inst 58:   movdqu  rsp(0 + virtual offset), %xmm3
-;   Inst 59:   addsd   %xmm2, %xmm3
-;   Inst 60:   movdqu  rsp(16 + virtual offset), %xmm2
-;   Inst 61:   addsd   152(%rcx), %xmm2
-;   Inst 62:   addsd   %xmm1, %xmm0
-;   Inst 63:   addsd   %xmm6, %xmm4
-;   Inst 64:   addsd   %xmm10, %xmm8
-;   Inst 65:   addsd   %xmm14, %xmm12
-;   Inst 66:   addsd   %xmm2, %xmm3
-;   Inst 67:   addsd   %xmm4, %xmm0
-;   Inst 68:   addsd   %xmm12, %xmm8
-;   Inst 69:   addsd   %xmm8, %xmm0
-;   Inst 70:   addsd   %xmm3, %xmm0
-;   Inst 71:   movdqu  64(%rsp), %xmm6
-;   Inst 72:   movdqu  80(%rsp), %xmm7
-;   Inst 73:   movdqu  96(%rsp), %xmm8
-;   Inst 74:   movdqu  112(%rsp), %xmm9
-;   Inst 75:   movdqu  128(%rsp), %xmm10
-;   Inst 76:   movdqu  144(%rsp), %xmm11
-;   Inst 77:   movdqu  160(%rsp), %xmm12
-;   Inst 78:   movdqu  176(%rsp), %xmm13
-;   Inst 79:   movdqu  192(%rsp), %xmm14
-;   Inst 80:   movdqu  208(%rsp), %xmm15
-;   Inst 81:   addq    $224, %rsp
-;   Inst 82:   movq    %rbp, %rsp
-;   Inst 83:   popq    %rbp
-;   Inst 84:   ret
-; }}
+;   pushq   %rbp
+;   unwind PushFrameRegs { offset_upward_to_caller_sp: 16 }
+;   movq    %rsp, %rbp
+;   unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 144 }
+;   subq    %rsp, $240, %rsp
+;   movdqu  %xmm6, 96(%rsp)
+;   unwind SaveReg { clobber_offset: 0, reg: p6f }
+;   movdqu  %xmm7, 112(%rsp)
+;   unwind SaveReg { clobber_offset: 16, reg: p7f }
+;   movdqu  %xmm8, 128(%rsp)
+;   unwind SaveReg { clobber_offset: 32, reg: p8f }
+;   movdqu  %xmm9, 144(%rsp)
+;   unwind SaveReg { clobber_offset: 48, reg: p9f }
+;   movdqu  %xmm10, 160(%rsp)
+;   unwind SaveReg { clobber_offset: 64, reg: p10f }
+;   movdqu  %xmm11, 176(%rsp)
+;   unwind SaveReg { clobber_offset: 80, reg: p11f }
+;   movdqu  %xmm12, 192(%rsp)
+;   unwind SaveReg { clobber_offset: 96, reg: p12f }
+;   movdqu  %xmm13, 208(%rsp)
+;   unwind SaveReg { clobber_offset: 112, reg: p13f }
+;   movdqu  %xmm14, 224(%rsp)
+;   unwind SaveReg { clobber_offset: 128, reg: p14f }
+; block0:
+;   movsd   0(%rcx), %xmm0
+;   movsd   8(%rcx), %xmm12
+;   movdqu  %xmm12, rsp(80 + virtual offset)
+;   movsd   16(%rcx), %xmm6
+;   movdqu  %xmm6, rsp(0 + virtual offset)
+;   movsd   24(%rcx), %xmm2
+;   movdqu  %xmm2, rsp(64 + virtual offset)
+;   movsd   32(%rcx), %xmm14
+;   movsd   40(%rcx), %xmm3
+;   movdqu  %xmm3, rsp(48 + virtual offset)
+;   movsd   48(%rcx), %xmm9
+;   movsd   56(%rcx), %xmm7
+;   movdqu  %xmm7, rsp(32 + virtual offset)
+;   movsd   64(%rcx), %xmm13
+;   movsd   72(%rcx), %xmm10
+;   movdqu  %xmm10, rsp(16 + virtual offset)
+;   movsd   80(%rcx), %xmm11
+;   movsd   88(%rcx), %xmm10
+;   movsd   96(%rcx), %xmm5
+;   movsd   104(%rcx), %xmm12
+;   movsd   112(%rcx), %xmm1
+;   movsd   120(%rcx), %xmm2
+;   movsd   128(%rcx), %xmm8
+;   movsd   136(%rcx), %xmm3
+;   movsd   144(%rcx), %xmm4
+;   movdqu  rsp(80 + virtual offset), %xmm6
+;   addsd   %xmm0, %xmm0, %xmm6
+;   movdqu  rsp(0 + virtual offset), %xmm6
+;   movdqu  rsp(64 + virtual offset), %xmm7
+;   addsd   %xmm6, %xmm6, %xmm7
+;   movdqu  rsp(48 + virtual offset), %xmm7
+;   addsd   %xmm14, %xmm14, %xmm7
+;   movdqu  rsp(32 + virtual offset), %xmm7
+;   addsd   %xmm9, %xmm9, %xmm7
+;   movdqu  rsp(16 + virtual offset), %xmm7
+;   addsd   %xmm13, %xmm13, %xmm7
+;   addsd   %xmm11, %xmm11, %xmm10
+;   addsd   %xmm5, %xmm5, %xmm12
+;   addsd   %xmm1, %xmm1, %xmm2
+;   addsd   %xmm8, %xmm8, %xmm3
+;   addsd   %xmm4, 152(%xmm4), %rcx
+;   addsd   %xmm0, %xmm0, %xmm6
+;   addsd   %xmm14, %xmm14, %xmm9
+;   addsd   %xmm13, %xmm13, %xmm11
+;   addsd   %xmm5, %xmm5, %xmm1
+;   addsd   %xmm8, %xmm8, %xmm4
+;   addsd   %xmm0, %xmm0, %xmm14
+;   addsd   %xmm13, %xmm13, %xmm5
+;   addsd   %xmm0, %xmm0, %xmm13
+;   addsd   %xmm0, %xmm0, %xmm8
+;   movdqu  96(%rsp), %xmm6
+;   movdqu  112(%rsp), %xmm7
+;   movdqu  128(%rsp), %xmm8
+;   movdqu  144(%rsp), %xmm9
+;   movdqu  160(%rsp), %xmm10
+;   movdqu  176(%rsp), %xmm11
+;   movdqu  192(%rsp), %xmm12
+;   movdqu  208(%rsp), %xmm13
+;   movdqu  224(%rsp), %xmm14
+;   addq    %rsp, $240, %rsp
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
diff --git a/cranelift/filetests/filetests/isa/x64/floating-point.clif b/cranelift/filetests/filetests/isa/x64/floating-point.clif
index eaf9760328..153904de83 100644
--- a/cranelift/filetests/filetests/isa/x64/floating-point.clif
+++ b/cranelift/filetests/filetests/isa/x64/floating-point.clif
@@ -7,21 +7,17 @@ block0(v0: f64):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 9)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   movabsq $9223372036854775807, %rsi
-;   Inst 3:   movq    %rsi, %xmm1
-;   Inst 4:   andpd   %xmm0, %xmm1
-;   Inst 5:   movaps  %xmm1, %xmm0
-;   Inst 6:   movq    %rbp, %rsp
-;   Inst 7:   popq    %rbp
-;   Inst 8:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movdqa  %xmm0, %xmm5
+;   movabsq $9223372036854775807, %rdx
+;   movq    %rdx, %xmm0
+;   movdqa  %xmm5, %xmm7
+;   andpd   %xmm0, %xmm0, %xmm7
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %f(i64) -> f64 {
 block0(v0: i64):
@@ -30,20 +26,14 @@ block0(v0: i64):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 10)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   movsd   0(%rdi), %xmm0
-;   Inst 3:   movabsq $9223372036854775807, %rsi
-;   Inst 4:   movq    %rsi, %xmm1
-;   Inst 5:   andpd   %xmm0, %xmm1
-;   Inst 6:   movaps  %xmm1, %xmm0
-;   Inst 7:   movq    %rbp, %rsp
-;   Inst 8:   popq    %rbp
-;   Inst 9:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movsd   0(%rdi), %xmm5
+;   movabsq $9223372036854775807, %r8
+;   movq    %r8, %xmm0
+;   andpd   %xmm0, %xmm0, %xmm5
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
diff --git a/cranelift/filetests/filetests/isa/x64/heap.clif b/cranelift/filetests/filetests/isa/x64/heap.clif
index c7f3664750..65fd524663 100644
--- a/cranelift/filetests/filetests/isa/x64/heap.clif
+++ b/cranelift/filetests/filetests/isa/x64/heap.clif
@@ -13,36 +13,24 @@ block0(v0: i32, v1: i64):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (successor: Block 1)
-;   (successor: Block 2)
-;   (instruction range: 0 .. 9)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   movl    %edi, %ecx
-;   Inst 3:   movq    8(%rsi), %rdi
-;   Inst 4:   movq    %rcx, %rax
-;   Inst 5:   addq    $32768, %rax
-;   Inst 6:   jnb ; ud2 heap_oob ;
-;   Inst 7:   cmpq    %rdi, %rax
-;   Inst 8:   jbe     label1; j label2
-; Block 1:
-;   (original IR block: block2)
-;   (instruction range: 9 .. 17)
-;   Inst 9:   addq    0(%rsi), %rcx
-;   Inst 10:   xorq    %rsi, %rsi
-;   Inst 11:   cmpq    %rdi, %rax
-;   Inst 12:   cmovnbeq %rsi, %rcx
-;   Inst 13:   movq    %rcx, %rax
-;   Inst 14:   movq    %rbp, %rsp
-;   Inst 15:   popq    %rbp
-;   Inst 16:   ret
-; Block 2:
-;   (original IR block: block1)
-;   (instruction range: 17 .. 18)
-;   Inst 17:   ud2 heap_oob
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movl    %edi, %eax
+;   movq    8(%rsi), %rcx
+;   movq    %rax, %rdx
+;   addq    %rdx, $32768, %rdx
+;   jnb ; ud2 heap_oob ;
+;   cmpq    %rcx, %rdx
+;   jbe     label1; j label2
+; block1:
+;   addq    %rax, 0(%rsi), %rax
+;   xorq    %r8, %r8, %r8
+;   cmpq    %rcx, %rdx
+;   cmovnbeq %r8, %rax, %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+; block2:
+;   ud2 heap_oob
 
diff --git a/cranelift/filetests/filetests/isa/x64/i128.clif b/cranelift/filetests/filetests/isa/x64/i128.clif
index 01bb461730..51cc8129b6 100644
--- a/cranelift/filetests/filetests/isa/x64/i128.clif
+++ b/cranelift/filetests/filetests/isa/x64/i128.clif
@@ -8,21 +8,16 @@ block0(v0: i128, v1: i128):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 9)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   addq    %rdx, %rdi
-;   Inst 3:   adcq    %rcx, %rsi
-;   Inst 4:   movq    %rdi, %rax
-;   Inst 5:   movq    %rsi, %rdx
-;   Inst 6:   movq    %rbp, %rsp
-;   Inst 7:   popq    %rbp
-;   Inst 8:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   addq    %rdi, %rdx, %rdi
+;   adcq    %rsi, %rcx, %rsi
+;   movq    %rdi, %rax
+;   movq    %rsi, %rdx
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %f1(i128, i128) -> i128 {
 block0(v0: i128, v1: i128):
@@ -30,21 +25,16 @@ block0(v0: i128, v1: i128):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 9)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   subq    %rdx, %rdi
-;   Inst 3:   sbbq    %rcx, %rsi
-;   Inst 4:   movq    %rdi, %rax
-;   Inst 5:   movq    %rsi, %rdx
-;   Inst 6:   movq    %rbp, %rsp
-;   Inst 7:   popq    %rbp
-;   Inst 8:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   subq    %rdi, %rdx, %rdi
+;   sbbq    %rsi, %rcx, %rsi
+;   movq    %rdi, %rax
+;   movq    %rsi, %rdx
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %f2(i128, i128) -> i128 {
 block0(v0: i128, v1: i128):
@@ -52,21 +42,16 @@ block0(v0: i128, v1: i128):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 9)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   andq    %rdx, %rdi
-;   Inst 3:   andq    %rcx, %rsi
-;   Inst 4:   movq    %rdi, %rax
-;   Inst 5:   movq    %rsi, %rdx
-;   Inst 6:   movq    %rbp, %rsp
-;   Inst 7:   popq    %rbp
-;   Inst 8:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   andq    %rdi, %rdx, %rdi
+;   andq    %rsi, %rcx, %rsi
+;   movq    %rdi, %rax
+;   movq    %rsi, %rdx
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %f3(i128, i128) -> i128 {
 block0(v0: i128, v1: i128):
@@ -74,21 +59,16 @@ block0(v0: i128, v1: i128):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 9)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   orq     %rdx, %rdi
-;   Inst 3:   orq     %rcx, %rsi
-;   Inst 4:   movq    %rdi, %rax
-;   Inst 5:   movq    %rsi, %rdx
-;   Inst 6:   movq    %rbp, %rsp
-;   Inst 7:   popq    %rbp
-;   Inst 8:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   orq     %rdi, %rdx, %rdi
+;   orq     %rsi, %rcx, %rsi
+;   movq    %rdi, %rax
+;   movq    %rsi, %rdx
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %f4(i128, i128) -> i128 {
 block0(v0: i128, v1: i128):
@@ -96,21 +76,16 @@ block0(v0: i128, v1: i128):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 9)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   xorq    %rdx, %rdi
-;   Inst 3:   xorq    %rcx, %rsi
-;   Inst 4:   movq    %rdi, %rax
-;   Inst 5:   movq    %rsi, %rdx
-;   Inst 6:   movq    %rbp, %rsp
-;   Inst 7:   popq    %rbp
-;   Inst 8:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   xorq    %rdi, %rdx, %rdi
+;   xorq    %rsi, %rcx, %rsi
+;   movq    %rdi, %rax
+;   movq    %rsi, %rdx
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %f5(i128) -> i128 {
 block0(v0: i128):
@@ -118,24 +93,16 @@ block0(v0: i128):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 12)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   movq    %rsi, %rax
-;   Inst 3:   movq    %rdi, %rsi
-;   Inst 4:   notq    %rsi
-;   Inst 5:   movq    %rax, %rdi
-;   Inst 6:   notq    %rdi
-;   Inst 7:   movq    %rsi, %rax
-;   Inst 8:   movq    %rdi, %rdx
-;   Inst 9:   movq    %rbp, %rsp
-;   Inst 10:   popq    %rbp
-;   Inst 11:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   notq    %rdi, %rdi
+;   notq    %rsi, %rsi
+;   movq    %rdi, %rax
+;   movq    %rsi, %rdx
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %f6(i128, i128) -> i128 {
 block0(v0: i128, v1: i128):
@@ -143,26 +110,24 @@ block0(v0: i128, v1: i128):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 14)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   movq    %rsi, %rax
-;   Inst 3:   movq    %rdi, %rsi
-;   Inst 4:   imulq   %rcx, %rsi
-;   Inst 5:   imulq   %rdx, %rax
-;   Inst 6:   addq    %rax, %rsi
-;   Inst 7:   movq    %rdi, %rax
-;   Inst 8:   mul     %rdx
-;   Inst 9:   addq    %rdx, %rsi
-;   Inst 10:   movq    %rsi, %rdx
-;   Inst 11:   movq    %rbp, %rsp
-;   Inst 12:   popq    %rbp
-;   Inst 13:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %r11
+;   imulq   %r11, %rcx, %r11
+;   movq    %rdi, %rax
+;   imulq   %rsi, %rdx, %rsi
+;   movq    %r11, %rdi
+;   addq    %rdi, %rsi, %rdi
+;   movq    %rdi, %r11
+;   mul     %rax, %rdx, %rax, %rdx
+;   movq    %r11, %r8
+;   addq    %r8, %rdx, %r8
+;   movq    %r8, %r11
+;   movq    %r11, %rdx
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %f7(i64, i64) -> i128 {
 block0(v0: i64, v1: i64):
@@ -170,19 +135,14 @@ block0(v0: i64, v1: i64):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 7)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   movq    %rdi, %rax
-;   Inst 3:   movq    %rsi, %rdx
-;   Inst 4:   movq    %rbp, %rsp
-;   Inst 5:   popq    %rbp
-;   Inst 6:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %rax
+;   movq    %rsi, %rdx
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %f8(i128) -> i64, i64 {
 block0(v0: i128):
@@ -190,19 +150,14 @@ block0(v0: i128):
     return v1, v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 7)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   movq    %rdi, %rax
-;   Inst 3:   movq    %rsi, %rdx
-;   Inst 4:   movq    %rbp, %rsp
-;   Inst 5:   popq    %rbp
-;   Inst 6:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %rax
+;   movq    %rsi, %rdx
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %f9(i128, i128) -> b1 {
 block0(v0: i128, v1: i128):
@@ -228,126 +183,123 @@ block0(v0: i128, v1: i128):
     return v20
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 114)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   subq    $64, %rsp
-;   Inst 3:   movq    %r12, 16(%rsp)
-;   Inst 4:   movq    %r13, 24(%rsp)
-;   Inst 5:   movq    %r14, 32(%rsp)
-;   Inst 6:   movq    %rbx, 40(%rsp)
-;   Inst 7:   movq    %r15, 48(%rsp)
-;   Inst 8:   cmpq    %rdx, %rdi
-;   Inst 9:   setz    %al
-;   Inst 10:   cmpq    %rcx, %rsi
-;   Inst 11:   setz    %r8b
-;   Inst 12:   andq    %r8, %rax
-;   Inst 13:   testq   $1, %rax
-;   Inst 14:   setnz   %al
-;   Inst 15:   movq    %rax, rsp(0 + virtual offset)
-;   Inst 16:   cmpq    %rdx, %rdi
-;   Inst 17:   setnz   %al
-;   Inst 18:   cmpq    %rcx, %rsi
-;   Inst 19:   setnz   %r8b
-;   Inst 20:   orq     %r8, %rax
-;   Inst 21:   testq   $1, %rax
-;   Inst 22:   setnz   %r8b
-;   Inst 23:   cmpq    %rcx, %rsi
-;   Inst 24:   setl    %r9b
-;   Inst 25:   setz    %al
-;   Inst 26:   cmpq    %rdx, %rdi
-;   Inst 27:   setb    %r10b
-;   Inst 28:   andq    %rax, %r10
-;   Inst 29:   orq     %r9, %r10
-;   Inst 30:   andq    $1, %r10
-;   Inst 31:   setnz   %r9b
-;   Inst 32:   cmpq    %rcx, %rsi
-;   Inst 33:   setl    %r10b
-;   Inst 34:   setz    %al
-;   Inst 35:   cmpq    %rdx, %rdi
-;   Inst 36:   setbe   %r11b
-;   Inst 37:   andq    %rax, %r11
-;   Inst 38:   orq     %r10, %r11
-;   Inst 39:   andq    $1, %r11
-;   Inst 40:   setnz   %r10b
-;   Inst 41:   cmpq    %rcx, %rsi
-;   Inst 42:   setnle  %r11b
-;   Inst 43:   setz    %al
-;   Inst 44:   cmpq    %rdx, %rdi
-;   Inst 45:   setnbe  %r12b
-;   Inst 46:   andq    %rax, %r12
-;   Inst 47:   orq     %r11, %r12
-;   Inst 48:   andq    $1, %r12
-;   Inst 49:   setnz   %r11b
-;   Inst 50:   cmpq    %rcx, %rsi
-;   Inst 51:   setnle  %r12b
-;   Inst 52:   setz    %al
-;   Inst 53:   cmpq    %rdx, %rdi
-;   Inst 54:   setnb   %r13b
-;   Inst 55:   andq    %rax, %r13
-;   Inst 56:   orq     %r12, %r13
-;   Inst 57:   andq    $1, %r13
-;   Inst 58:   setnz   %r12b
-;   Inst 59:   cmpq    %rcx, %rsi
-;   Inst 60:   setb    %r13b
-;   Inst 61:   setz    %al
-;   Inst 62:   cmpq    %rdx, %rdi
-;   Inst 63:   setb    %r14b
-;   Inst 64:   andq    %rax, %r14
-;   Inst 65:   orq     %r13, %r14
-;   Inst 66:   andq    $1, %r14
-;   Inst 67:   setnz   %r13b
-;   Inst 68:   cmpq    %rcx, %rsi
-;   Inst 69:   setb    %r14b
-;   Inst 70:   setz    %al
-;   Inst 71:   cmpq    %rdx, %rdi
-;   Inst 72:   setbe   %bl
-;   Inst 73:   andq    %rax, %rbx
-;   Inst 74:   orq     %r14, %rbx
-;   Inst 75:   andq    $1, %rbx
-;   Inst 76:   setnz   %r14b
-;   Inst 77:   cmpq    %rcx, %rsi
-;   Inst 78:   setnbe  %bl
-;   Inst 79:   setz    %r15b
-;   Inst 80:   cmpq    %rdx, %rdi
-;   Inst 81:   setnbe  %al
-;   Inst 82:   andq    %r15, %rax
-;   Inst 83:   orq     %rbx, %rax
-;   Inst 84:   andq    $1, %rax
-;   Inst 85:   setnz   %bl
-;   Inst 86:   cmpq    %rcx, %rsi
-;   Inst 87:   setnbe  %sil
-;   Inst 88:   setz    %cl
-;   Inst 89:   cmpq    %rdx, %rdi
-;   Inst 90:   setnb   %dil
-;   Inst 91:   andq    %rcx, %rdi
-;   Inst 92:   orq     %rsi, %rdi
-;   Inst 93:   andq    $1, %rdi
-;   Inst 94:   setnz   %sil
-;   Inst 95:   movq    rsp(0 + virtual offset), %rax
-;   Inst 96:   andl    %r8d, %eax
-;   Inst 97:   andl    %r10d, %r9d
-;   Inst 98:   andl    %r12d, %r11d
-;   Inst 99:   andl    %r14d, %r13d
-;   Inst 100:   andl    %esi, %ebx
-;   Inst 101:   andl    %r9d, %eax
-;   Inst 102:   andl    %r13d, %r11d
-;   Inst 103:   andl    %r11d, %eax
-;   Inst 104:   andl    %ebx, %eax
-;   Inst 105:   movq    16(%rsp), %r12
-;   Inst 106:   movq    24(%rsp), %r13
-;   Inst 107:   movq    32(%rsp), %r14
-;   Inst 108:   movq    40(%rsp), %rbx
-;   Inst 109:   movq    48(%rsp), %r15
-;   Inst 110:   addq    $64, %rsp
-;   Inst 111:   movq    %rbp, %rsp
-;   Inst 112:   popq    %rbp
-;   Inst 113:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+;   subq    %rsp, $64, %rsp
+;   movq    %rbx, 16(%rsp)
+;   movq    %r12, 24(%rsp)
+;   movq    %r13, 32(%rsp)
+;   movq    %r14, 40(%rsp)
+;   movq    %r15, 48(%rsp)
+; block0:
+;   cmpq    %rdx, %rdi
+;   setz    %al
+;   cmpq    %rcx, %rsi
+;   setz    %r8b
+;   andq    %rax, %r8, %rax
+;   testq   $1, %rax
+;   setnz   %al
+;   cmpq    %rdx, %rdi
+;   setnz   %r8b
+;   cmpq    %rcx, %rsi
+;   setnz   %r9b
+;   orq     %r8, %r9, %r8
+;   testq   $1, %r8
+;   setnz   %r8b
+;   movq    %r8, rsp(8 + virtual offset)
+;   cmpq    %rcx, %rsi
+;   setl    %r9b
+;   setz    %r15b
+;   cmpq    %rdx, %rdi
+;   setb    %r11b
+;   andq    %r11, %r15, %r11
+;   orq     %r11, %r9, %r11
+;   andq    %r11, $1, %r11
+;   setnz   %r9b
+;   cmpq    %rcx, %rsi
+;   setl    %r11b
+;   setz    %bl
+;   cmpq    %rdx, %rdi
+;   setbe   %r14b
+;   andq    %r14, %rbx, %r14
+;   orq     %r14, %r11, %r14
+;   andq    %r14, $1, %r14
+;   setnz   %r8b
+;   movq    %r8, rsp(0 + virtual offset)
+;   cmpq    %rcx, %rsi
+;   setnle  %r15b
+;   setz    %r12b
+;   cmpq    %rdx, %rdi
+;   setnbe  %bl
+;   andq    %rbx, %r12, %rbx
+;   orq     %rbx, %r15, %rbx
+;   andq    %rbx, $1, %rbx
+;   setnz   %r15b
+;   cmpq    %rcx, %rsi
+;   setnle  %bl
+;   setz    %r13b
+;   cmpq    %rdx, %rdi
+;   setnb   %r12b
+;   andq    %r12, %r13, %r12
+;   orq     %r12, %rbx, %r12
+;   andq    %r12, $1, %r12
+;   setnz   %bl
+;   cmpq    %rcx, %rsi
+;   setb    %r12b
+;   setz    %r14b
+;   cmpq    %rdx, %rdi
+;   setb    %r13b
+;   andq    %r13, %r14, %r13
+;   orq     %r13, %r12, %r13
+;   andq    %r13, $1, %r13
+;   setnz   %r12b
+;   cmpq    %rcx, %rsi
+;   setb    %r13b
+;   setz    %r8b
+;   cmpq    %rdx, %rdi
+;   setbe   %r14b
+;   andq    %r14, %r8, %r14
+;   orq     %r14, %r13, %r14
+;   andq    %r14, $1, %r14
+;   setnz   %r13b
+;   cmpq    %rcx, %rsi
+;   setnbe  %r14b
+;   setz    %r11b
+;   cmpq    %rdx, %rdi
+;   setnbe  %r8b
+;   andq    %r8, %r11, %r8
+;   orq     %r8, %r14, %r8
+;   andq    %r8, $1, %r8
+;   setnz   %r14b
+;   cmpq    %rcx, %rsi
+;   setnbe  %sil
+;   setz    %cl
+;   cmpq    %rdx, %rdi
+;   setnb   %r8b
+;   andq    %r8, %rcx, %r8
+;   orq     %r8, %rsi, %r8
+;   andq    %r8, $1, %r8
+;   setnz   %dl
+;   movq    rsp(8 + virtual offset), %r11
+;   andl    %eax, %r11d, %eax
+;   movq    rsp(0 + virtual offset), %rdi
+;   andl    %r9d, %edi, %r9d
+;   andl    %r15d, %ebx, %r15d
+;   andl    %r12d, %r13d, %r12d
+;   andl    %r14d, %edx, %r14d
+;   andl    %eax, %r9d, %eax
+;   andl    %r15d, %r12d, %r15d
+;   andl    %eax, %r15d, %eax
+;   andl    %eax, %r14d, %eax
+;   movq    16(%rsp), %rbx
+;   movq    24(%rsp), %r12
+;   movq    32(%rsp), %r13
+;   movq    40(%rsp), %r14
+;   movq    48(%rsp), %r15
+;   addq    %rsp, $64, %rsp
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %f10(i128) -> i32 {
 block0(v0: i128):
@@ -363,36 +315,25 @@ block2:
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (successor: Block 1)
-;   (successor: Block 2)
-;   (instruction range: 0 .. 8)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   cmpq    $0, %rdi
-;   Inst 3:   setz    %dil
-;   Inst 4:   cmpq    $0, %rsi
-;   Inst 5:   setz    %sil
-;   Inst 6:   andb    %dil, %sil
-;   Inst 7:   jnz     label1; j label2
-; Block 1:
-;   (original IR block: block1)
-;   (instruction range: 8 .. 12)
-;   Inst 8:   movl    $1, %eax
-;   Inst 9:   movq    %rbp, %rsp
-;   Inst 10:   popq    %rbp
-;   Inst 11:   ret
-; Block 2:
-;   (original IR block: block2)
-;   (instruction range: 12 .. 16)
-;   Inst 12:   movl    $2, %eax
-;   Inst 13:   movq    %rbp, %rsp
-;   Inst 14:   popq    %rbp
-;   Inst 15:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   cmpq    $0, %rdi
+;   setz    %dil
+;   cmpq    $0, %rsi
+;   setz    %cl
+;   andb    %cl, %dil, %cl
+;   jnz     label1; j label2
+; block1:
+;   movl    $1, %eax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+; block2:
+;   movl    $2, %eax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %f11(i128) -> i32 {
 block0(v0: i128):
@@ -408,36 +349,25 @@ block2:
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (successor: Block 1)
-;   (successor: Block 2)
-;   (instruction range: 0 .. 8)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   cmpq    $0, %rdi
-;   Inst 3:   setnz   %dil
-;   Inst 4:   cmpq    $0, %rsi
-;   Inst 5:   setnz   %sil
-;   Inst 6:   orb     %dil, %sil
-;   Inst 7:   jnz     label1; j label2
-; Block 1:
-;   (original IR block: block1)
-;   (instruction range: 8 .. 12)
-;   Inst 8:   movl    $1, %eax
-;   Inst 9:   movq    %rbp, %rsp
-;   Inst 10:   popq    %rbp
-;   Inst 11:   ret
-; Block 2:
-;   (original IR block: block2)
-;   (instruction range: 12 .. 16)
-;   Inst 12:   movl    $2, %eax
-;   Inst 13:   movq    %rbp, %rsp
-;   Inst 14:   popq    %rbp
-;   Inst 15:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   cmpq    $0, %rdi
+;   setnz   %dil
+;   cmpq    $0, %rsi
+;   setnz   %cl
+;   orb     %cl, %dil, %cl
+;   jnz     label1; j label2
+; block1:
+;   movl    $1, %eax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+; block2:
+;   movl    $2, %eax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %f12(i64) -> i128 {
 block0(v0: i64):
@@ -445,21 +375,14 @@ block0(v0: i64):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 9)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   movq    %rdi, %rsi
-;   Inst 3:   xorq    %rdi, %rdi
-;   Inst 4:   movq    %rsi, %rax
-;   Inst 5:   movq    %rdi, %rdx
-;   Inst 6:   movq    %rbp, %rsp
-;   Inst 7:   popq    %rbp
-;   Inst 8:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   xorq    %rdx, %rdx, %rdx
+;   movq    %rdi, %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %f13(i64) -> i128 {
 block0(v0: i64):
@@ -467,22 +390,15 @@ block0(v0: i64):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 10)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   movq    %rdi, %rsi
-;   Inst 3:   movq    %rsi, %rdi
-;   Inst 4:   sarq    $63, %rdi
-;   Inst 5:   movq    %rsi, %rax
-;   Inst 6:   movq    %rdi, %rdx
-;   Inst 7:   movq    %rbp, %rsp
-;   Inst 8:   popq    %rbp
-;   Inst 9:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %rdx
+;   sarq    $63, %rdx, %rdx
+;   movq    %rdi, %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %f14(i8) -> i128 {
 block0(v0: i8):
@@ -490,22 +406,15 @@ block0(v0: i8):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 10)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   movsbq  %dil, %rsi
-;   Inst 3:   movq    %rsi, %rdi
-;   Inst 4:   sarq    $63, %rdi
-;   Inst 5:   movq    %rsi, %rax
-;   Inst 6:   movq    %rdi, %rdx
-;   Inst 7:   movq    %rbp, %rsp
-;   Inst 8:   popq    %rbp
-;   Inst 9:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movsbq  %dil, %rax
+;   movq    %rax, %rdx
+;   sarq    $63, %rdx, %rdx
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %f15(i8) -> i128 {
 block0(v0: i8):
@@ -513,21 +422,14 @@ block0(v0: i8):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 9)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   movzbq  %dil, %rsi
-;   Inst 3:   xorq    %rdi, %rdi
-;   Inst 4:   movq    %rsi, %rax
-;   Inst 5:   movq    %rdi, %rdx
-;   Inst 6:   movq    %rbp, %rsp
-;   Inst 7:   popq    %rbp
-;   Inst 8:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movzbq  %dil, %rax
+;   xorq    %rdx, %rdx, %rdx
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %f16(i128) -> i64 {
 block0(v0: i128):
@@ -535,18 +437,13 @@ block0(v0: i128):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 6)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   movq    %rdi, %rax
-;   Inst 3:   movq    %rbp, %rsp
-;   Inst 4:   popq    %rbp
-;   Inst 5:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %f17(i128) -> i8 {
 block0(v0: i128):
@@ -554,18 +451,13 @@ block0(v0: i128):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 6)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   movq    %rdi, %rax
-;   Inst 3:   movq    %rbp, %rsp
-;   Inst 4:   popq    %rbp
-;   Inst 5:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %f18(b1) -> i128 {
 block0(v0: b1):
@@ -573,22 +465,15 @@ block0(v0: b1):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 10)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   movq    %rdi, %rsi
-;   Inst 3:   andq    $1, %rsi
-;   Inst 4:   xorq    %rdi, %rdi
-;   Inst 5:   movq    %rsi, %rax
-;   Inst 6:   movq    %rdi, %rdx
-;   Inst 7:   movq    %rbp, %rsp
-;   Inst 8:   popq    %rbp
-;   Inst 9:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   andq    %rdi, $1, %rdi
+;   xorq    %rdx, %rdx, %rdx
+;   movq    %rdi, %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %f19(i128) -> i128 {
 block0(v0: i128):
@@ -596,60 +481,52 @@ block0(v0: i128):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 48)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   movq    %rdi, %rax
-;   Inst 3:   movq    %rax, %rcx
-;   Inst 4:   shrq    $1, %rcx
-;   Inst 5:   movabsq $8608480567731124087, %rdi
-;   Inst 6:   andq    %rdi, %rcx
-;   Inst 7:   subq    %rcx, %rax
-;   Inst 8:   shrq    $1, %rcx
-;   Inst 9:   andq    %rdi, %rcx
-;   Inst 10:   subq    %rcx, %rax
-;   Inst 11:   shrq    $1, %rcx
-;   Inst 12:   andq    %rdi, %rcx
-;   Inst 13:   subq    %rcx, %rax
-;   Inst 14:   movq    %rax, %rdi
-;   Inst 15:   shrq    $4, %rdi
-;   Inst 16:   addq    %rax, %rdi
-;   Inst 17:   movabsq $1085102592571150095, %rax
-;   Inst 18:   andq    %rax, %rdi
-;   Inst 19:   movabsq $72340172838076673, %rax
-;   Inst 20:   imulq   %rax, %rdi
-;   Inst 21:   shrq    $56, %rdi
-;   Inst 22:   movq    %rsi, %rcx
-;   Inst 23:   shrq    $1, %rcx
-;   Inst 24:   movabsq $8608480567731124087, %rax
-;   Inst 25:   andq    %rax, %rcx
-;   Inst 26:   subq    %rcx, %rsi
-;   Inst 27:   shrq    $1, %rcx
-;   Inst 28:   andq    %rax, %rcx
-;   Inst 29:   subq    %rcx, %rsi
-;   Inst 30:   shrq    $1, %rcx
-;   Inst 31:   andq    %rax, %rcx
-;   Inst 32:   subq    %rcx, %rsi
-;   Inst 33:   movq    %rsi, %rax
-;   Inst 34:   shrq    $4, %rax
-;   Inst 35:   addq    %rsi, %rax
-;   Inst 36:   movabsq $1085102592571150095, %rsi
-;   Inst 37:   andq    %rsi, %rax
-;   Inst 38:   movabsq $72340172838076673, %rsi
-;   Inst 39:   imulq   %rsi, %rax
-;   Inst 40:   shrq    $56, %rax
-;   Inst 41:   addq    %rax, %rdi
-;   Inst 42:   xorq    %rsi, %rsi
-;   Inst 43:   movq    %rdi, %rax
-;   Inst 44:   movq    %rsi, %rdx
-;   Inst 45:   movq    %rbp, %rsp
-;   Inst 46:   popq    %rbp
-;   Inst 47:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %r9
+;   shrq    $1, %r9, %r9
+;   movabsq $8608480567731124087, %rax
+;   andq    %r9, %rax, %r9
+;   subq    %rdi, %r9, %rdi
+;   shrq    $1, %r9, %r9
+;   andq    %r9, %rax, %r9
+;   subq    %rdi, %r9, %rdi
+;   shrq    $1, %r9, %r9
+;   andq    %r9, %rax, %r9
+;   subq    %rdi, %r9, %rdi
+;   movq    %rdi, %rax
+;   shrq    $4, %rax, %rax
+;   addq    %rax, %rdi, %rax
+;   movabsq $1085102592571150095, %r9
+;   andq    %rax, %r9, %rax
+;   movabsq $72340172838076673, %rcx
+;   imulq   %rax, %rcx, %rax
+;   shrq    $56, %rax, %rax
+;   movq    %rsi, %rdi
+;   shrq    $1, %rdi, %rdi
+;   movabsq $8608480567731124087, %rcx
+;   andq    %rdi, %rcx, %rdi
+;   subq    %rsi, %rdi, %rsi
+;   shrq    $1, %rdi, %rdi
+;   andq    %rdi, %rcx, %rdi
+;   subq    %rsi, %rdi, %rsi
+;   shrq    $1, %rdi, %rdi
+;   andq    %rdi, %rcx, %rdi
+;   subq    %rsi, %rdi, %rsi
+;   movq    %rsi, %rcx
+;   shrq    $4, %rcx, %rcx
+;   addq    %rcx, %rsi, %rcx
+;   movabsq $1085102592571150095, %rsi
+;   andq    %rcx, %rsi, %rcx
+;   movabsq $72340172838076673, %rdx
+;   imulq   %rcx, %rdx, %rcx
+;   shrq    $56, %rcx, %rcx
+;   addq    %rax, %rcx, %rax
+;   xorq    %rdx, %rdx, %rdx
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %f20(i128) -> i128 {
 block0(v0: i128):
@@ -657,102 +534,94 @@ block0(v0: i128):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 90)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   movq    %rsi, %rcx
-;   Inst 3:   movabsq $6148914691236517205, %rsi
-;   Inst 4:   movq    %rcx, %rax
-;   Inst 5:   andq    %rsi, %rax
-;   Inst 6:   shrq    $1, %rcx
-;   Inst 7:   andq    %rsi, %rcx
-;   Inst 8:   shlq    $1, %rax
-;   Inst 9:   orq     %rcx, %rax
-;   Inst 10:   movabsq $3689348814741910323, %rsi
-;   Inst 11:   movq    %rax, %rcx
-;   Inst 12:   andq    %rsi, %rcx
-;   Inst 13:   shrq    $2, %rax
-;   Inst 14:   andq    %rsi, %rax
-;   Inst 15:   shlq    $2, %rcx
-;   Inst 16:   orq     %rax, %rcx
-;   Inst 17:   movabsq $1085102592571150095, %rsi
-;   Inst 18:   movq    %rcx, %rax
-;   Inst 19:   andq    %rsi, %rax
-;   Inst 20:   shrq    $4, %rcx
-;   Inst 21:   andq    %rsi, %rcx
-;   Inst 22:   shlq    $4, %rax
-;   Inst 23:   orq     %rcx, %rax
-;   Inst 24:   movabsq $71777214294589695, %rsi
-;   Inst 25:   movq    %rax, %rcx
-;   Inst 26:   andq    %rsi, %rcx
-;   Inst 27:   shrq    $8, %rax
-;   Inst 28:   andq    %rsi, %rax
-;   Inst 29:   shlq    $8, %rcx
-;   Inst 30:   orq     %rax, %rcx
-;   Inst 31:   movabsq $281470681808895, %rsi
-;   Inst 32:   movq    %rcx, %rax
-;   Inst 33:   andq    %rsi, %rax
-;   Inst 34:   shrq    $16, %rcx
-;   Inst 35:   andq    %rsi, %rcx
-;   Inst 36:   shlq    $16, %rax
-;   Inst 37:   orq     %rcx, %rax
-;   Inst 38:   movabsq $4294967295, %rcx
-;   Inst 39:   movq    %rax, %rsi
-;   Inst 40:   andq    %rcx, %rsi
-;   Inst 41:   shrq    $32, %rax
-;   Inst 42:   shlq    $32, %rsi
-;   Inst 43:   orq     %rax, %rsi
-;   Inst 44:   movabsq $6148914691236517205, %rax
-;   Inst 45:   movq    %rdi, %rcx
-;   Inst 46:   andq    %rax, %rcx
-;   Inst 47:   shrq    $1, %rdi
-;   Inst 48:   andq    %rax, %rdi
-;   Inst 49:   shlq    $1, %rcx
-;   Inst 50:   orq     %rdi, %rcx
-;   Inst 51:   movabsq $3689348814741910323, %rdi
-;   Inst 52:   movq    %rcx, %rax
-;   Inst 53:   andq    %rdi, %rax
-;   Inst 54:   shrq    $2, %rcx
-;   Inst 55:   andq    %rdi, %rcx
-;   Inst 56:   shlq    $2, %rax
-;   Inst 57:   orq     %rcx, %rax
-;   Inst 58:   movabsq $1085102592571150095, %rdi
-;   Inst 59:   movq    %rax, %rcx
-;   Inst 60:   andq    %rdi, %rcx
-;   Inst 61:   shrq    $4, %rax
-;   Inst 62:   andq    %rdi, %rax
-;   Inst 63:   shlq    $4, %rcx
-;   Inst 64:   orq     %rax, %rcx
-;   Inst 65:   movabsq $71777214294589695, %rdi
-;   Inst 66:   movq    %rcx, %rax
-;   Inst 67:   andq    %rdi, %rax
-;   Inst 68:   shrq    $8, %rcx
-;   Inst 69:   andq    %rdi, %rcx
-;   Inst 70:   shlq    $8, %rax
-;   Inst 71:   orq     %rcx, %rax
-;   Inst 72:   movabsq $281470681808895, %rdi
-;   Inst 73:   movq    %rax, %rcx
-;   Inst 74:   andq    %rdi, %rcx
-;   Inst 75:   shrq    $16, %rax
-;   Inst 76:   andq    %rdi, %rax
-;   Inst 77:   shlq    $16, %rcx
-;   Inst 78:   orq     %rax, %rcx
-;   Inst 79:   movabsq $4294967295, %rax
-;   Inst 80:   movq    %rcx, %rdi
-;   Inst 81:   andq    %rax, %rdi
-;   Inst 82:   shrq    $32, %rcx
-;   Inst 83:   shlq    $32, %rdi
-;   Inst 84:   orq     %rcx, %rdi
-;   Inst 85:   movq    %rsi, %rax
-;   Inst 86:   movq    %rdi, %rdx
-;   Inst 87:   movq    %rbp, %rsp
-;   Inst 88:   popq    %rbp
-;   Inst 89:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movabsq $6148914691236517205, %r9
+;   movq    %rsi, %rax
+;   andq    %rax, %r9, %rax
+;   shrq    $1, %rsi, %rsi
+;   andq    %rsi, %r9, %rsi
+;   shlq    $1, %rax, %rax
+;   orq     %rax, %rsi, %rax
+;   movabsq $3689348814741910323, %rcx
+;   movq    %rax, %rdx
+;   andq    %rdx, %rcx, %rdx
+;   shrq    $2, %rax, %rax
+;   andq    %rax, %rcx, %rax
+;   shlq    $2, %rdx, %rdx
+;   orq     %rdx, %rax, %rdx
+;   movabsq $1085102592571150095, %r9
+;   movq    %rdx, %rsi
+;   andq    %rsi, %r9, %rsi
+;   shrq    $4, %rdx, %rdx
+;   andq    %rdx, %r9, %rdx
+;   shlq    $4, %rsi, %rsi
+;   orq     %rsi, %rdx, %rsi
+;   movabsq $71777214294589695, %rax
+;   movq    %rsi, %rdx
+;   andq    %rdx, %rax, %rdx
+;   shrq    $8, %rsi, %rsi
+;   andq    %rsi, %rax, %rsi
+;   shlq    $8, %rdx, %rdx
+;   orq     %rdx, %rsi, %rdx
+;   movabsq $281470681808895, %r9
+;   movq    %rdx, %r11
+;   andq    %r11, %r9, %r11
+;   shrq    $16, %rdx, %rdx
+;   andq    %rdx, %r9, %rdx
+;   shlq    $16, %r11, %r11
+;   orq     %r11, %rdx, %r11
+;   movabsq $4294967295, %rcx
+;   movq    %r11, %rax
+;   andq    %rax, %rcx, %rax
+;   shrq    $32, %r11, %r11
+;   shlq    $32, %rax, %rax
+;   orq     %rax, %r11, %rax
+;   movabsq $6148914691236517205, %rdx
+;   movq    %rdi, %r9
+;   andq    %r9, %rdx, %r9
+;   shrq    $1, %rdi, %rdi
+;   andq    %rdi, %rdx, %rdi
+;   shlq    $1, %r9, %r9
+;   orq     %r9, %rdi, %r9
+;   movabsq $3689348814741910323, %rsi
+;   movq    %r9, %rcx
+;   andq    %rcx, %rsi, %rcx
+;   shrq    $2, %r9, %r9
+;   andq    %r9, %rsi, %r9
+;   shlq    $2, %rcx, %rcx
+;   orq     %rcx, %r9, %rcx
+;   movabsq $1085102592571150095, %rdx
+;   movq    %rcx, %r9
+;   andq    %r9, %rdx, %r9
+;   shrq    $4, %rcx, %rcx
+;   andq    %rcx, %rdx, %rcx
+;   shlq    $4, %r9, %r9
+;   orq     %r9, %rcx, %r9
+;   movabsq $71777214294589695, %rsi
+;   movq    %r9, %rcx
+;   andq    %rcx, %rsi, %rcx
+;   shrq    $8, %r9, %r9
+;   andq    %r9, %rsi, %r9
+;   shlq    $8, %rcx, %rcx
+;   orq     %rcx, %r9, %rcx
+;   movabsq $281470681808895, %rdx
+;   movq    %rcx, %r8
+;   andq    %r8, %rdx, %r8
+;   shrq    $16, %rcx, %rcx
+;   andq    %rcx, %rdx, %rcx
+;   shlq    $16, %r8, %r8
+;   orq     %r8, %rcx, %r8
+;   movabsq $4294967295, %rsi
+;   movq    %r8, %rdx
+;   andq    %rdx, %rsi, %rdx
+;   shrq    $32, %r8, %r8
+;   shlq    $32, %rdx, %rdx
+;   orq     %rdx, %r8, %rdx
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %f21(i128, i64) {
 block0(v0: i128, v1: i64):
@@ -760,19 +629,14 @@ block0(v0: i128, v1: i64):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 7)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   movq    %rdi, 0(%rdx)
-;   Inst 3:   movq    %rsi, 8(%rdx)
-;   Inst 4:   movq    %rbp, %rsp
-;   Inst 5:   popq    %rbp
-;   Inst 6:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, 0(%rdx)
+;   movq    %rsi, 8(%rdx)
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %f22(i64) -> i128 {
 block0(v0: i64):
@@ -780,21 +644,14 @@ block0(v0: i64):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 9)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   movq    0(%rdi), %rsi
-;   Inst 3:   movq    8(%rdi), %rdi
-;   Inst 4:   movq    %rsi, %rax
-;   Inst 5:   movq    %rdi, %rdx
-;   Inst 6:   movq    %rbp, %rsp
-;   Inst 7:   popq    %rbp
-;   Inst 8:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    0(%rdi), %rax
+;   movq    8(%rdi), %rdx
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %f23(i128, b1) -> i128 {
 block0(v0: i128, v1: b1):
@@ -813,46 +670,31 @@ block2(v6: i128):
     return v8
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (successor: Block 1)
-;   (successor: Block 2)
-;   (instruction range: 0 .. 4)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   testb   $1, %dl
-;   Inst 3:   jnz     label1; j label2
-; Block 1:
-;   (original IR block: block1)
-;   (instruction range: 4 .. 15)
-;   Inst 4:   xorq    %rdi, %rdi
-;   Inst 5:   xorq    %rsi, %rsi
-;   Inst 6:   movl    $1, %ecx
-;   Inst 7:   xorq    %rax, %rax
-;   Inst 8:   addq    %rcx, %rdi
-;   Inst 9:   adcq    %rax, %rsi
-;   Inst 10:   movq    %rdi, %rax
-;   Inst 11:   movq    %rsi, %rdx
-;   Inst 12:   movq    %rbp, %rsp
-;   Inst 13:   popq    %rbp
-;   Inst 14:   ret
-; Block 2:
-;   (original IR block: block2)
-;   (instruction range: 15 .. 26)
-;   Inst 15:   xorq    %rdi, %rdi
-;   Inst 16:   xorq    %rsi, %rsi
-;   Inst 17:   movl    $2, %ecx
-;   Inst 18:   xorq    %rax, %rax
-;   Inst 19:   addq    %rcx, %rdi
-;   Inst 20:   adcq    %rax, %rsi
-;   Inst 21:   movq    %rdi, %rax
-;   Inst 22:   movq    %rsi, %rdx
-;   Inst 23:   movq    %rbp, %rsp
-;   Inst 24:   popq    %rbp
-;   Inst 25:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   testb   $1, %dl
+;   jnz     label1; j label2
+; block1:
+;   xorq    %rax, %rax, %rax
+;   xorq    %rdx, %rdx, %rdx
+;   movl    $1, %r8d
+;   xorq    %r9, %r9, %r9
+;   addq    %rax, %r8, %rax
+;   adcq    %rdx, %r9, %rdx
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+; block2:
+;   xorq    %rax, %rax, %rax
+;   xorq    %rdx, %rdx, %rdx
+;   movl    $2, %edi
+;   xorq    %rcx, %rcx, %rcx
+;   addq    %rax, %rdi, %rax
+;   adcq    %rdx, %rcx, %rdx
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %f24(i128, i128, i64, i128, i128, i128) -> i128 {
 
@@ -866,43 +708,38 @@ block0(v0: i128, v1: i128, v2: i64, v3: i128, v4: i128, v5: i128):
     return v11
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 31)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   subq    $16, %rsp
-;   Inst 3:   movq    %r12, 0(%rsp)
-;   Inst 4:   movq    %r13, 8(%rsp)
-;   Inst 5:   movq    %r9, %r11
-;   Inst 6:   movq    16(%rbp), %r13
-;   Inst 7:   movq    24(%rbp), %r12
-;   Inst 8:   movq    32(%rbp), %r10
-;   Inst 9:   movq    40(%rbp), %r9
-;   Inst 10:   movq    48(%rbp), %rax
-;   Inst 11:   addq    %rdx, %rdi
-;   Inst 12:   movq    %rsi, %rdx
-;   Inst 13:   adcq    %rcx, %rdx
-;   Inst 14:   xorq    %rsi, %rsi
-;   Inst 15:   addq    %r8, %r11
-;   Inst 16:   adcq    %rsi, %r13
-;   Inst 17:   addq    %r9, %r12
-;   Inst 18:   adcq    %rax, %r10
-;   Inst 19:   addq    %r11, %rdi
-;   Inst 20:   adcq    %r13, %rdx
-;   Inst 21:   addq    %rdi, %r12
-;   Inst 22:   adcq    %rdx, %r10
-;   Inst 23:   movq    %r12, %rax
-;   Inst 24:   movq    %r10, %rdx
-;   Inst 25:   movq    0(%rsp), %r12
-;   Inst 26:   movq    8(%rsp), %r13
-;   Inst 27:   addq    $16, %rsp
-;   Inst 28:   movq    %rbp, %rsp
-;   Inst 29:   popq    %rbp
-;   Inst 30:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+;   subq    %rsp, $64, %rsp
+;   movq    %rbx, 32(%rsp)
+;   movq    %r13, 40(%rsp)
+;   movq    %r15, 48(%rsp)
+; block0:
+;   movq    %rdx, rsp(0 + virtual offset)
+;   movq    16(%rbp), %r11
+;   movq    24(%rbp), %rax
+;   movq    32(%rbp), %rdx
+;   movq    40(%rbp), %r15
+;   movq    48(%rbp), %rbx
+;   movq    rsp(0 + virtual offset), %r13
+;   addq    %rdi, %r13, %rdi
+;   adcq    %rsi, %rcx, %rsi
+;   xorq    %rcx, %rcx, %rcx
+;   addq    %r9, %r8, %r9
+;   adcq    %r11, %rcx, %r11
+;   addq    %rax, %r15, %rax
+;   adcq    %rdx, %rbx, %rdx
+;   addq    %rdi, %r9, %rdi
+;   adcq    %rsi, %r11, %rsi
+;   addq    %rax, %rdi, %rax
+;   adcq    %rdx, %rsi, %rdx
+;   movq    32(%rsp), %rbx
+;   movq    40(%rsp), %r13
+;   movq    48(%rsp), %r15
+;   addq    %rsp, $64, %rsp
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %f25(i128) -> i128, i128, i128, i64, i128, i128 {
 block0(v0: i128):
@@ -910,49 +747,41 @@ block0(v0: i128):
     return v0, v0, v0, v1, v0, v0
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 37)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   subq    $32, %rsp
-;   Inst 3:   movq    %r12, 0(%rsp)
-;   Inst 4:   movq    %r13, 8(%rsp)
-;   Inst 5:   movq    %r14, 16(%rsp)
-;   Inst 6:   movq    %rbx, 24(%rsp)
-;   Inst 7:   movq    %rsi, %rax
-;   Inst 8:   movq    %rdx, %rsi
-;   Inst 9:   movq    %rdi, %r12
-;   Inst 10:   movq    %rdi, %rcx
-;   Inst 11:   movq    %rax, %rdx
-;   Inst 12:   movq    %rdi, %r8
-;   Inst 13:   movq    %rax, %r9
-;   Inst 14:   movq    %rdi, %r10
-;   Inst 15:   movq    %rax, %r11
-;   Inst 16:   movq    %rdi, %r13
-;   Inst 17:   movq    %rax, %r14
-;   Inst 18:   movq    %rax, %rbx
-;   Inst 19:   movq    %rcx, %rax
-;   Inst 20:   movq    %r8, 0(%rsi)
-;   Inst 21:   movq    %r9, 8(%rsi)
-;   Inst 22:   movq    %r10, 16(%rsi)
-;   Inst 23:   movq    %r11, 24(%rsi)
-;   Inst 24:   movq    %r12, 32(%rsi)
-;   Inst 25:   movq    %r13, 40(%rsi)
-;   Inst 26:   movq    %r14, 48(%rsi)
-;   Inst 27:   movq    %rdi, 56(%rsi)
-;   Inst 28:   movq    %rbx, 64(%rsi)
-;   Inst 29:   movq    0(%rsp), %r12
-;   Inst 30:   movq    8(%rsp), %r13
-;   Inst 31:   movq    16(%rsp), %r14
-;   Inst 32:   movq    24(%rsp), %rbx
-;   Inst 33:   addq    $32, %rsp
-;   Inst 34:   movq    %rbp, %rsp
-;   Inst 35:   popq    %rbp
-;   Inst 36:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+;   subq    %rsp, $32, %rsp
+;   movq    %rbx, 0(%rsp)
+;   movq    %r13, 8(%rsp)
+;   movq    %r14, 16(%rsp)
+;   movq    %r15, 24(%rsp)
+; block0:
+;   movq    %rdx, %r14
+;   movq    %rdi, %rax
+;   movq    %rsi, %rdx
+;   movq    %rdi, %rbx
+;   movq    %rsi, %r13
+;   movq    %rdi, %r15
+;   movq    %rsi, %r11
+;   movq    %rdi, %rcx
+;   movq    %rdi, %r8
+;   movq    %rsi, %r9
+;   movq    %rbx, 0(%r14)
+;   movq    %r13, 8(%r14)
+;   movq    %r15, 16(%r14)
+;   movq    %r11, 24(%r14)
+;   movq    %rcx, 32(%r14)
+;   movq    %r8, 40(%r14)
+;   movq    %r9, 48(%r14)
+;   movq    %rdi, 56(%r14)
+;   movq    %rsi, 64(%r14)
+;   movq    0(%rsp), %rbx
+;   movq    8(%rsp), %r13
+;   movq    16(%rsp), %r14
+;   movq    24(%rsp), %r15
+;   addq    %rsp, $32, %rsp
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %f26(i128, i128) -> i128, i128 {
     fn0 = %g(i128, i128) -> i128, i128
@@ -961,33 +790,29 @@ block0(v0: i128, v1: i128):
     return v2, v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 21)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   subq    $16, %rsp
-;   Inst 3:   movq    %r12, 0(%rsp)
-;   Inst 4:   movq    %r8, %r12
-;   Inst 5:   subq    $16, %rsp
-;   Inst 6:   virtual_sp_offset_adjust 16
-;   Inst 7:   lea     0(%rsp), %r8
-;   Inst 8:   load_ext_name %g+0, %rax
-;   Inst 9:   call    *%rax
-;   Inst 10:   movq    0(%rsp), %rsi
-;   Inst 11:   movq    8(%rsp), %rdi
-;   Inst 12:   addq    $16, %rsp
-;   Inst 13:   virtual_sp_offset_adjust -16
-;   Inst 14:   movq    %rsi, 0(%r12)
-;   Inst 15:   movq    %rdi, 8(%r12)
-;   Inst 16:   movq    0(%rsp), %r12
-;   Inst 17:   addq    $16, %rsp
-;   Inst 18:   movq    %rbp, %rsp
-;   Inst 19:   popq    %rbp
-;   Inst 20:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+;   subq    %rsp, $16, %rsp
+;   movq    %r12, 0(%rsp)
+; block0:
+;   movq    %r8, %r12
+;   subq    %rsp, $16, %rsp
+;   virtual_sp_offset_adjust 16
+;   lea     0(%rsp), %r8
+;   load_ext_name %g+0, %r9
+;   call    *%r9
+;   movq    0(%rsp), %r11
+;   movq    8(%rsp), %rdi
+;   addq    %rsp, $16, %rsp
+;   virtual_sp_offset_adjust -16
+;   movq    %r12, %r8
+;   movq    %r11, 0(%r8)
+;   movq    %rdi, 8(%r8)
+;   movq    0(%rsp), %r12
+;   addq    %rsp, $16, %rsp
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %f27(i128) -> i128 {
 block0(v0: i128):
@@ -995,33 +820,26 @@ block0(v0: i128):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 21)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   movabsq $-1, %rcx
-;   Inst 3:   bsrq    %rsi, %rax
-;   Inst 4:   cmovzq  %rcx, %rax
-;   Inst 5:   movl    $63, %esi
-;   Inst 6:   subq    %rax, %rsi
-;   Inst 7:   movabsq $-1, %rax
-;   Inst 8:   bsrq    %rdi, %rcx
-;   Inst 9:   cmovzq  %rax, %rcx
-;   Inst 10:   movl    $63, %edi
-;   Inst 11:   subq    %rcx, %rdi
-;   Inst 12:   addq    $64, %rdi
-;   Inst 13:   cmpq    $64, %rsi
-;   Inst 14:   cmovnzq %rsi, %rdi
-;   Inst 15:   xorq    %rsi, %rsi
-;   Inst 16:   movq    %rdi, %rax
-;   Inst 17:   movq    %rsi, %rdx
-;   Inst 18:   movq    %rbp, %rsp
-;   Inst 19:   popq    %rbp
-;   Inst 20:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movabsq $-1, %r9
+;   bsrq    %rsi, %rax
+;   cmovzq  %r9, %rax, %rax
+;   movl    $63, %r8d
+;   subq    %r8, %rax, %r8
+;   movabsq $-1, %rsi
+;   bsrq    %rdi, %rcx
+;   cmovzq  %rsi, %rcx, %rcx
+;   movl    $63, %eax
+;   subq    %rax, %rcx, %rax
+;   addq    %rax, $64, %rax
+;   cmpq    $64, %r8
+;   cmovnzq %r8, %rax, %rax
+;   xorq    %rdx, %rdx, %rdx
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %f28(i128) -> i128 {
 block0(v0: i128):
@@ -1029,30 +847,22 @@ block0(v0: i128):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 18)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   movq    %rsi, %rax
-;   Inst 3:   movl    $64, %ecx
-;   Inst 4:   bsfq    %rdi, %rsi
-;   Inst 5:   cmovzq  %rcx, %rsi
-;   Inst 6:   movl    $64, %ecx
-;   Inst 7:   bsfq    %rax, %rdi
-;   Inst 8:   cmovzq  %rcx, %rdi
-;   Inst 9:   addq    $64, %rdi
-;   Inst 10:   cmpq    $64, %rsi
-;   Inst 11:   cmovzq  %rdi, %rsi
-;   Inst 12:   xorq    %rdi, %rdi
-;   Inst 13:   movq    %rsi, %rax
-;   Inst 14:   movq    %rdi, %rdx
-;   Inst 15:   movq    %rbp, %rsp
-;   Inst 16:   popq    %rbp
-;   Inst 17:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movl    $64, %r9d
+;   bsfq    %rdi, %rax
+;   cmovzq  %r9, %rax, %rax
+;   movl    $64, %edx
+;   bsfq    %rsi, %rsi
+;   cmovzq  %rdx, %rsi, %rsi
+;   addq    %rsi, $64, %rsi
+;   cmpq    $64, %rax
+;   cmovzq  %rsi, %rax, %rax
+;   xorq    %rdx, %rdx, %rdx
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %f29(i8, i128) -> i8 {
 block0(v0: i8, v1: i128):
@@ -1060,20 +870,16 @@ block0(v0: i8, v1: i128):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 8)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   movq    %rsi, %rcx
-;   Inst 3:   shlb    %cl, %dil
-;   Inst 4:   movq    %rdi, %rax
-;   Inst 5:   movq    %rbp, %rsp
-;   Inst 6:   popq    %rbp
-;   Inst 7:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rsi, %r9
+;   movq    %r9, %rcx
+;   shlb    %cl, %dil, %dil
+;   movq    %rdi, %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %f30(i128, i128) -> i128 {
 block0(v0: i128, v1: i128):
@@ -1081,36 +887,27 @@ block0(v0: i128, v1: i128):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 24)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   movq    %rdi, %rax
-;   Inst 3:   movq    %rsi, %rdi
-;   Inst 4:   movq    %rax, %rsi
-;   Inst 5:   movq    %rdx, %rcx
-;   Inst 6:   shlq    %cl, %rsi
-;   Inst 7:   movq    %rdx, %rcx
-;   Inst 8:   shlq    %cl, %rdi
-;   Inst 9:   movl    $64, %ecx
-;   Inst 10:   subq    %rdx, %rcx
-;   Inst 11:   shrq    %cl, %rax
-;   Inst 12:   xorq    %rcx, %rcx
-;   Inst 13:   testq   $127, %rdx
-;   Inst 14:   cmovzq  %rcx, %rax
-;   Inst 15:   orq     %rdi, %rax
-;   Inst 16:   testq   $64, %rdx
-;   Inst 17:   cmovzq  %rsi, %rcx
-;   Inst 18:   cmovzq  %rax, %rsi
-;   Inst 19:   movq    %rcx, %rax
-;   Inst 20:   movq    %rsi, %rdx
-;   Inst 21:   movq    %rbp, %rsp
-;   Inst 22:   popq    %rbp
-;   Inst 23:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdx, %r8
+;   movq    %r8, %rcx
+;   movq    %rdi, %rdx
+;   shlq    %cl, %rdx, %rdx
+;   shlq    %cl, %rsi, %rsi
+;   movl    $64, %ecx
+;   subq    %rcx, %r8, %rcx
+;   shrq    %cl, %rdi, %rdi
+;   xorq    %rax, %rax, %rax
+;   testq   $127, %r8
+;   cmovzq  %rax, %rdi, %rdi
+;   orq     %rdi, %rsi, %rdi
+;   testq   $64, %r8
+;   cmovzq  %rdx, %rax, %rax
+;   cmovzq  %rdi, %rdx, %rdx
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %f31(i128, i128) -> i128 {
 block0(v0: i128, v1: i128):
@@ -1118,37 +915,29 @@ block0(v0: i128, v1: i128):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 25)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   movq    %rsi, %rax
-;   Inst 3:   movq    %rdx, %rcx
-;   Inst 4:   shrq    %cl, %rdi
-;   Inst 5:   movq    %rax, %rsi
-;   Inst 6:   movq    %rdx, %rcx
-;   Inst 7:   shrq    %cl, %rsi
-;   Inst 8:   movl    $64, %ecx
-;   Inst 9:   subq    %rdx, %rcx
-;   Inst 10:   shlq    %cl, %rax
-;   Inst 11:   xorq    %rcx, %rcx
-;   Inst 12:   testq   $127, %rdx
-;   Inst 13:   cmovzq  %rcx, %rax
-;   Inst 14:   orq     %rdi, %rax
-;   Inst 15:   xorq    %rcx, %rcx
-;   Inst 16:   testq   $64, %rdx
-;   Inst 17:   movq    %rsi, %rdi
-;   Inst 18:   cmovzq  %rax, %rdi
-;   Inst 19:   cmovzq  %rsi, %rcx
-;   Inst 20:   movq    %rdi, %rax
-;   Inst 21:   movq    %rcx, %rdx
-;   Inst 22:   movq    %rbp, %rsp
-;   Inst 23:   popq    %rbp
-;   Inst 24:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdx, %r9
+;   movq    %r9, %rcx
+;   shrq    %cl, %rdi, %rdi
+;   movq    %rsi, %r8
+;   shrq    %cl, %r8, %r8
+;   movl    $64, %ecx
+;   subq    %rcx, %r9, %rcx
+;   shlq    %cl, %rsi, %rsi
+;   xorq    %rdx, %rdx, %rdx
+;   testq   $127, %r9
+;   cmovzq  %rdx, %rsi, %rsi
+;   orq     %rsi, %rdi, %rsi
+;   xorq    %rdx, %rdx, %rdx
+;   testq   $64, %r9
+;   movq    %r8, %rax
+;   cmovzq  %rsi, %rax, %rax
+;   cmovzq  %r8, %rdx, %rdx
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %f32(i128, i128) -> i128 {
 block0(v0: i128, v1: i128):
@@ -1156,38 +945,32 @@ block0(v0: i128, v1: i128):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 26)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   movq    %rdi, %rax
-;   Inst 3:   movq    %rdx, %rcx
-;   Inst 4:   shrq    %cl, %rax
-;   Inst 5:   movq    %rsi, %rdi
-;   Inst 6:   movq    %rdx, %rcx
-;   Inst 7:   sarq    %cl, %rdi
-;   Inst 8:   movl    $64, %ecx
-;   Inst 9:   subq    %rdx, %rcx
-;   Inst 10:   movq    %rsi, %r8
-;   Inst 11:   shlq    %cl, %r8
-;   Inst 12:   xorq    %rcx, %rcx
-;   Inst 13:   testq   $127, %rdx
-;   Inst 14:   cmovzq  %rcx, %r8
-;   Inst 15:   orq     %r8, %rax
-;   Inst 16:   sarq    $63, %rsi
-;   Inst 17:   testq   $64, %rdx
-;   Inst 18:   movq    %rdi, %rcx
-;   Inst 19:   cmovzq  %rax, %rcx
-;   Inst 20:   cmovzq  %rdi, %rsi
-;   Inst 21:   movq    %rcx, %rax
-;   Inst 22:   movq    %rsi, %rdx
-;   Inst 23:   movq    %rbp, %rsp
-;   Inst 24:   popq    %rbp
-;   Inst 25:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdx, %r8
+;   movq    %r8, %rcx
+;   shrq    %cl, %rdi, %rdi
+;   movq    %rsi, %rdx
+;   sarq    %cl, %rdx, %rdx
+;   movl    $64, %ecx
+;   movq    %r8, %r11
+;   subq    %rcx, %r11, %rcx
+;   movq    %rsi, %r8
+;   shlq    %cl, %r8, %r8
+;   xorq    %r9, %r9, %r9
+;   testq   $127, %r11
+;   cmovzq  %r9, %r8, %r8
+;   orq     %rdi, %r8, %rdi
+;   sarq    $63, %rsi, %rsi
+;   testq   $64, %r11
+;   movq    %rdx, %rax
+;   cmovzq  %rdi, %rax, %rax
+;   cmovzq  %rdx, %rsi, %rsi
+;   movq    %rsi, %rdx
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %f33(i128, i128) -> i128 {
 block0(v0: i128, v1: i128):
@@ -1195,60 +978,50 @@ block0(v0: i128, v1: i128):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 48)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   movq    %rdi, %rax
-;   Inst 3:   movq    %rdx, %rcx
-;   Inst 4:   shlq    %cl, %rax
-;   Inst 5:   movq    %rsi, %r8
-;   Inst 6:   movq    %rdx, %rcx
-;   Inst 7:   shlq    %cl, %r8
-;   Inst 8:   movl    $64, %ecx
-;   Inst 9:   subq    %rdx, %rcx
-;   Inst 10:   movq    %rdi, %r9
-;   Inst 11:   shrq    %cl, %r9
-;   Inst 12:   xorq    %rcx, %rcx
-;   Inst 13:   testq   $127, %rdx
-;   Inst 14:   cmovzq  %rcx, %r9
-;   Inst 15:   orq     %r8, %r9
-;   Inst 16:   testq   $64, %rdx
-;   Inst 17:   movq    %rcx, %r8
-;   Inst 18:   cmovzq  %rax, %r8
-;   Inst 19:   cmovzq  %r9, %rax
-;   Inst 20:   movl    $128, %r9d
-;   Inst 21:   subq    %rdx, %r9
-;   Inst 22:   movq    %rdi, %rdx
-;   Inst 23:   movq    %r9, %rcx
-;   Inst 24:   shrq    %cl, %rdx
-;   Inst 25:   movq    %rsi, %rdi
-;   Inst 26:   movq    %r9, %rcx
-;   Inst 27:   shrq    %cl, %rdi
-;   Inst 28:   movl    $64, %ecx
-;   Inst 29:   subq    %r9, %rcx
-;   Inst 30:   shlq    %cl, %rsi
-;   Inst 31:   xorq    %rcx, %rcx
-;   Inst 32:   testq   $127, %r9
-;   Inst 33:   cmovzq  %rcx, %rsi
-;   Inst 34:   orq     %rdx, %rsi
-;   Inst 35:   xorq    %rdx, %rdx
-;   Inst 36:   testq   $64, %r9
-;   Inst 37:   movq    %rdi, %rcx
-;   Inst 38:   cmovzq  %rsi, %rcx
-;   Inst 39:   movq    %rdx, %rsi
-;   Inst 40:   cmovzq  %rdi, %rsi
-;   Inst 41:   orq     %rcx, %r8
-;   Inst 42:   orq     %rsi, %rax
-;   Inst 43:   movq    %rax, %rdx
-;   Inst 44:   movq    %r8, %rax
-;   Inst 45:   movq    %rbp, %rsp
-;   Inst 46:   popq    %rbp
-;   Inst 47:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdx, %r11
+;   movq    %r11, %rcx
+;   movq    %rdi, %rdx
+;   shlq    %cl, %rdx, %rdx
+;   movq    %rsi, %r9
+;   shlq    %cl, %r9, %r9
+;   movl    $64, %ecx
+;   subq    %rcx, %r11, %rcx
+;   movq    %rdi, %r8
+;   shrq    %cl, %r8, %r8
+;   xorq    %rax, %rax, %rax
+;   testq   $127, %r11
+;   cmovzq  %rax, %r8, %r8
+;   orq     %r8, %r9, %r8
+;   testq   $64, %r11
+;   cmovzq  %rdx, %rax, %rax
+;   cmovzq  %r8, %rdx, %rdx
+;   movl    $128, %ecx
+;   movq    %r11, %r8
+;   subq    %rcx, %r8, %rcx
+;   shrq    %cl, %rdi, %rdi
+;   movq    %rsi, %r9
+;   shrq    %cl, %r9, %r9
+;   movq    %rcx, %r8
+;   movl    $64, %ecx
+;   subq    %rcx, %r8, %rcx
+;   shlq    %cl, %rsi, %rsi
+;   xorq    %r11, %r11, %r11
+;   testq   $127, %r8
+;   cmovzq  %r11, %rsi, %rsi
+;   orq     %rsi, %rdi, %rsi
+;   xorq    %r11, %r11, %r11
+;   testq   $64, %r8
+;   movq    %r9, %rdi
+;   cmovzq  %rsi, %rdi, %rdi
+;   cmovzq  %r9, %r11, %r11
+;   orq     %rax, %rdi, %rax
+;   orq     %rdx, %r11, %rdx
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %f34(i128, i128) -> i128 {
 block0(v0: i128, v1: i128):
@@ -1256,55 +1029,49 @@ block0(v0: i128, v1: i128):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 45)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   movq    %rdi, %rax
-;   Inst 3:   movq    %rdx, %rcx
-;   Inst 4:   shrq    %cl, %rax
-;   Inst 5:   movq    %rsi, %r9
-;   Inst 6:   movq    %rdx, %rcx
-;   Inst 7:   shrq    %cl, %r9
-;   Inst 8:   movl    $64, %ecx
-;   Inst 9:   subq    %rdx, %rcx
-;   Inst 10:   movq    %rsi, %r8
-;   Inst 11:   shlq    %cl, %r8
-;   Inst 12:   xorq    %rcx, %rcx
-;   Inst 13:   testq   $127, %rdx
-;   Inst 14:   cmovzq  %rcx, %r8
-;   Inst 15:   orq     %rax, %r8
-;   Inst 16:   xorq    %rcx, %rcx
-;   Inst 17:   testq   $64, %rdx
-;   Inst 18:   movq    %r9, %rax
-;   Inst 19:   cmovzq  %r8, %rax
-;   Inst 20:   movq    %rcx, %r8
-;   Inst 21:   cmovzq  %r9, %r8
-;   Inst 22:   movl    $128, %r9d
-;   Inst 23:   subq    %rdx, %r9
-;   Inst 24:   movq    %rdi, %rdx
-;   Inst 25:   movq    %r9, %rcx
-;   Inst 26:   shlq    %cl, %rdx
-;   Inst 27:   movq    %r9, %rcx
-;   Inst 28:   shlq    %cl, %rsi
-;   Inst 29:   movl    $64, %ecx
-;   Inst 30:   subq    %r9, %rcx
-;   Inst 31:   shrq    %cl, %rdi
-;   Inst 32:   xorq    %rcx, %rcx
-;   Inst 33:   testq   $127, %r9
-;   Inst 34:   cmovzq  %rcx, %rdi
-;   Inst 35:   orq     %rsi, %rdi
-;   Inst 36:   testq   $64, %r9
-;   Inst 37:   cmovzq  %rdx, %rcx
-;   Inst 38:   cmovzq  %rdi, %rdx
-;   Inst 39:   orq     %rcx, %rax
-;   Inst 40:   orq     %rdx, %r8
-;   Inst 41:   movq    %r8, %rdx
-;   Inst 42:   movq    %rbp, %rsp
-;   Inst 43:   popq    %rbp
-;   Inst 44:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdx, %r11
+;   movq    %r11, %rcx
+;   movq    %rdi, %rax
+;   shrq    %cl, %rax, %rax
+;   movq    %rsi, %r8
+;   shrq    %cl, %r8, %r8
+;   movl    $64, %ecx
+;   subq    %rcx, %r11, %rcx
+;   movq    %rsi, %r9
+;   shlq    %cl, %r9, %r9
+;   xorq    %rdx, %rdx, %rdx
+;   testq   $127, %r11
+;   cmovzq  %rdx, %r9, %r9
+;   orq     %r9, %rax, %r9
+;   xorq    %rdx, %rdx, %rdx
+;   testq   $64, %r11
+;   movq    %r8, %rax
+;   cmovzq  %r9, %rax, %rax
+;   cmovzq  %r8, %rdx, %rdx
+;   movl    $128, %ecx
+;   movq    %r11, %r8
+;   subq    %rcx, %r8, %rcx
+;   movq    %rdi, %r11
+;   shlq    %cl, %r11, %r11
+;   shlq    %cl, %rsi, %rsi
+;   movq    %rcx, %r8
+;   movl    $64, %ecx
+;   movq    %r8, %r9
+;   subq    %rcx, %r9, %rcx
+;   shrq    %cl, %rdi, %rdi
+;   xorq    %r8, %r8, %r8
+;   testq   $127, %r9
+;   cmovzq  %r8, %rdi, %rdi
+;   orq     %rdi, %rsi, %rdi
+;   testq   $64, %r9
+;   cmovzq  %r11, %r8, %r8
+;   cmovzq  %rdi, %r11, %r11
+;   orq     %rax, %r8, %rax
+;   orq     %rdx, %r11, %rdx
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
diff --git a/cranelift/filetests/filetests/isa/x64/ifcmp-sp.clif b/cranelift/filetests/filetests/isa/x64/ifcmp-sp.clif
index 0a1c8ac7c6..b3c82b4460 100644
--- a/cranelift/filetests/filetests/isa/x64/ifcmp-sp.clif
+++ b/cranelift/filetests/filetests/isa/x64/ifcmp-sp.clif
@@ -16,30 +16,19 @@ block2:
     return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (successor: Block 1)
-;   (successor: Block 2)
-;   (instruction range: 0 .. 4)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   cmpq    %rsp, %rdi
-;   Inst 3:   jnbe    label1; j label2
-; Block 1:
-;   (original IR block: block1)
-;   (instruction range: 4 .. 8)
-;   Inst 4:   xorl    %eax, %eax
-;   Inst 5:   movq    %rbp, %rsp
-;   Inst 6:   popq    %rbp
-;   Inst 7:   ret
-; Block 2:
-;   (original IR block: block2)
-;   (instruction range: 8 .. 12)
-;   Inst 8:   movl    $1, %eax
-;   Inst 9:   movq    %rbp, %rsp
-;   Inst 10:   popq    %rbp
-;   Inst 11:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   cmpq    %rsp, %rdi
+;   jnbe    label1; j label2
+; block1:
+;   xorl    %eax, %eax, %eax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+; block2:
+;   movl    $1, %eax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
diff --git a/cranelift/filetests/filetests/isa/x64/load-op.clif b/cranelift/filetests/filetests/isa/x64/load-op.clif
index 85c26dec3e..d2fa2b9f82 100644
--- a/cranelift/filetests/filetests/isa/x64/load-op.clif
+++ b/cranelift/filetests/filetests/isa/x64/load-op.clif
@@ -5,7 +5,7 @@ function %add_from_mem_u32_1(i64, i32) -> i32 {
 block0(v0: i64, v1: i32):
   v2 = load.i32 v0
   v3 = iadd.i32 v2, v1
-  ; check: addl    0(%rdi), %esi
+  ; check: addl    %esi, 0(%rdi), %esi
   return v3
 }
 
@@ -13,7 +13,7 @@ function %add_from_mem_u32_2(i64, i32) -> i32 {
 block0(v0: i64, v1: i32):
   v2 = load.i32 v0
   v3 = iadd.i32 v1, v2
-  ; check: addl    0(%rdi), %esi
+  ; check: addl    %esi, 0(%rdi), %esi
   return v3
 }
 
@@ -21,7 +21,7 @@ function %add_from_mem_u64_1(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
   v2 = load.i64 v0
   v3 = iadd.i64 v2, v1
-  ; check: addq    0(%rdi), %rsi
+  ; check: addq    %rsi, 0(%rdi), %rsi
   return v3
 }
 
@@ -29,7 +29,7 @@ function %add_from_mem_u64_2(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
   v2 = load.i64 v0
   v3 = iadd.i64 v1, v2
-  ; check: addq    0(%rdi), %rsi
+  ; check: addq    %rsi, 0(%rdi), %rsi
   return v3
 }
 
@@ -39,8 +39,8 @@ function %add_from_mem_not_narrow(i64, i8) -> i8 {
 block0(v0: i64, v1: i8):
   v2 = load.i8 v0
   v3 = iadd.i8 v2, v1
-  ; check: movzbq  0(%rdi), %rdi
-  ; nextln: addl    %esi, %edi
+  ; check: movzbq  0(%rdi), %rax
+  ; nextln: addl    %eax, %esi, %eax
   return v3
 }
 
@@ -51,12 +51,11 @@ block0(v0: i64, v1: i64):
   store.i64 v3, v1
   v4 = load.i64 v3
   return v4
-  ; check:  movq    0(%rdi), %rax
-  ; nextln: movq    %rax, %rcx
-  ; nextln: addq    %rdi, %rcx
-  ; nextln: movq    %rcx, 0(%rsi)
-  ; nextln: movq    0(%rax,%rdi,1), %rsi
-  ; nextln: movq    %rsi, %rax
+  ; check:  movq    0(%rdi), %r11
+  ; nextln: movq    %r11, %rax
+  ; nextln: addq    %rax, %rdi, %rax
+  ; nextln: movq    %rax, 0(%rsi)
+  ; nextln: movq    0(%r11,%rdi,1), %rax
 }
 
 function %merge_scalar_to_vector(i64) -> i32x4 {
diff --git a/cranelift/filetests/filetests/isa/x64/move-elision.clif b/cranelift/filetests/filetests/isa/x64/move-elision.clif
index 45e631390c..af16a95c83 100644
--- a/cranelift/filetests/filetests/isa/x64/move-elision.clif
+++ b/cranelift/filetests/filetests/isa/x64/move-elision.clif
@@ -13,15 +13,10 @@ block0(v0: i32x4):
     return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 5)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   movq    %rbp, %rsp
-;   Inst 3:   popq    %rbp
-;   Inst 4:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
diff --git a/cranelift/filetests/filetests/isa/x64/popcnt-use-popcnt.clif b/cranelift/filetests/filetests/isa/x64/popcnt-use-popcnt.clif
index e4e7c6b828..09309733a9 100644
--- a/cranelift/filetests/filetests/isa/x64/popcnt-use-popcnt.clif
+++ b/cranelift/filetests/filetests/isa/x64/popcnt-use-popcnt.clif
@@ -7,19 +7,13 @@ block0(v0: i64):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 7)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   popcntq %rdi, %rsi
-;   Inst 3:   movq    %rsi, %rax
-;   Inst 4:   movq    %rbp, %rsp
-;   Inst 5:   popq    %rbp
-;   Inst 6:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   popcntq %rdi, %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %popcnt(i32) -> i32 {
 block0(v0: i32):
@@ -27,17 +21,11 @@ block0(v0: i32):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 7)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   popcntl %edi, %esi
-;   Inst 3:   movq    %rsi, %rax
-;   Inst 4:   movq    %rbp, %rsp
-;   Inst 5:   popq    %rbp
-;   Inst 6:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   popcntl %edi, %eax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
diff --git a/cranelift/filetests/filetests/isa/x64/popcnt.clif b/cranelift/filetests/filetests/isa/x64/popcnt.clif
index 549fa9c4bf..94582c1d12 100644
--- a/cranelift/filetests/filetests/isa/x64/popcnt.clif
+++ b/cranelift/filetests/filetests/isa/x64/popcnt.clif
@@ -7,37 +7,31 @@ block0(v0: i64):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 25)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   movq    %rdi, %rax
-;   Inst 3:   shrq    $1, %rax
-;   Inst 4:   movabsq $8608480567731124087, %rsi
-;   Inst 5:   andq    %rsi, %rax
-;   Inst 6:   subq    %rax, %rdi
-;   Inst 7:   shrq    $1, %rax
-;   Inst 8:   andq    %rsi, %rax
-;   Inst 9:   subq    %rax, %rdi
-;   Inst 10:   shrq    $1, %rax
-;   Inst 11:   andq    %rsi, %rax
-;   Inst 12:   subq    %rax, %rdi
-;   Inst 13:   movq    %rdi, %rsi
-;   Inst 14:   shrq    $4, %rsi
-;   Inst 15:   addq    %rdi, %rsi
-;   Inst 16:   movabsq $1085102592571150095, %rdi
-;   Inst 17:   andq    %rdi, %rsi
-;   Inst 18:   movabsq $72340172838076673, %rdi
-;   Inst 19:   imulq   %rdi, %rsi
-;   Inst 20:   shrq    $56, %rsi
-;   Inst 21:   movq    %rsi, %rax
-;   Inst 22:   movq    %rbp, %rsp
-;   Inst 23:   popq    %rbp
-;   Inst 24:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %rcx
+;   shrq    $1, %rcx, %rcx
+;   movabsq $8608480567731124087, %r8
+;   andq    %rcx, %r8, %rcx
+;   subq    %rdi, %rcx, %rdi
+;   shrq    $1, %rcx, %rcx
+;   andq    %rcx, %r8, %rcx
+;   subq    %rdi, %rcx, %rdi
+;   shrq    $1, %rcx, %rcx
+;   andq    %rcx, %r8, %rcx
+;   subq    %rdi, %rcx, %rdi
+;   movq    %rdi, %rax
+;   shrq    $4, %rax, %rax
+;   addq    %rax, %rdi, %rax
+;   movabsq $1085102592571150095, %rcx
+;   andq    %rax, %rcx, %rax
+;   movabsq $72340172838076673, %r11
+;   imulq   %rax, %r11, %rax
+;   shrq    $56, %rax, %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %popcnt64load(i64) -> i64 {
 block0(v0: i64):
@@ -46,38 +40,32 @@ block0(v0: i64):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 26)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   movq    0(%rdi), %rdi
-;   Inst 3:   movq    %rdi, %rax
-;   Inst 4:   shrq    $1, %rax
-;   Inst 5:   movabsq $8608480567731124087, %rsi
-;   Inst 6:   andq    %rsi, %rax
-;   Inst 7:   subq    %rax, %rdi
-;   Inst 8:   shrq    $1, %rax
-;   Inst 9:   andq    %rsi, %rax
-;   Inst 10:   subq    %rax, %rdi
-;   Inst 11:   shrq    $1, %rax
-;   Inst 12:   andq    %rsi, %rax
-;   Inst 13:   subq    %rax, %rdi
-;   Inst 14:   movq    %rdi, %rsi
-;   Inst 15:   shrq    $4, %rsi
-;   Inst 16:   addq    %rdi, %rsi
-;   Inst 17:   movabsq $1085102592571150095, %rdi
-;   Inst 18:   andq    %rdi, %rsi
-;   Inst 19:   movabsq $72340172838076673, %rdi
-;   Inst 20:   imulq   %rdi, %rsi
-;   Inst 21:   shrq    $56, %rsi
-;   Inst 22:   movq    %rsi, %rax
-;   Inst 23:   movq    %rbp, %rsp
-;   Inst 24:   popq    %rbp
-;   Inst 25:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    0(%rdi), %rdx
+;   movq    %rdx, %r8
+;   shrq    $1, %r8, %r8
+;   movabsq $8608480567731124087, %r9
+;   andq    %r8, %r9, %r8
+;   subq    %rdx, %r8, %rdx
+;   shrq    $1, %r8, %r8
+;   andq    %r8, %r9, %r8
+;   subq    %rdx, %r8, %rdx
+;   shrq    $1, %r8, %r8
+;   andq    %r8, %r9, %r8
+;   subq    %rdx, %r8, %rdx
+;   movq    %rdx, %rax
+;   shrq    $4, %rax, %rax
+;   addq    %rax, %rdx, %rax
+;   movabsq $1085102592571150095, %rdx
+;   andq    %rax, %rdx, %rax
+;   movabsq $72340172838076673, %rsi
+;   imulq   %rax, %rsi, %rax
+;   shrq    $56, %rax, %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %popcnt32(i32) -> i32 {
 block0(v0: i32):
@@ -85,35 +73,29 @@ block0(v0: i32):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 23)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   movq    %rdi, %rax
-;   Inst 3:   shrl    $1, %eax
-;   Inst 4:   movl    $2004318071, %esi
-;   Inst 5:   andl    %esi, %eax
-;   Inst 6:   subl    %eax, %edi
-;   Inst 7:   shrl    $1, %eax
-;   Inst 8:   andl    %esi, %eax
-;   Inst 9:   subl    %eax, %edi
-;   Inst 10:   shrl    $1, %eax
-;   Inst 11:   andl    %esi, %eax
-;   Inst 12:   subl    %eax, %edi
-;   Inst 13:   movq    %rdi, %rsi
-;   Inst 14:   shrl    $4, %esi
-;   Inst 15:   addl    %edi, %esi
-;   Inst 16:   andl    $252645135, %esi
-;   Inst 17:   imull   $16843009, %esi
-;   Inst 18:   shrl    $24, %esi
-;   Inst 19:   movq    %rsi, %rax
-;   Inst 20:   movq    %rbp, %rsp
-;   Inst 21:   popq    %rbp
-;   Inst 22:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %rcx
+;   shrl    $1, %ecx, %ecx
+;   movl    $2004318071, %r8d
+;   andl    %ecx, %r8d, %ecx
+;   subl    %edi, %ecx, %edi
+;   shrl    $1, %ecx, %ecx
+;   andl    %ecx, %r8d, %ecx
+;   subl    %edi, %ecx, %edi
+;   shrl    $1, %ecx, %ecx
+;   andl    %ecx, %r8d, %ecx
+;   subl    %edi, %ecx, %edi
+;   movq    %rdi, %rax
+;   shrl    $4, %eax, %eax
+;   addl    %eax, %edi, %eax
+;   andl    %eax, $252645135, %eax
+;   imull   %eax, $16843009, %eax
+;   shrl    $24, %eax, %eax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %popcnt32load(i64) -> i32 {
 block0(v0: i64):
@@ -122,34 +104,28 @@ block0(v0: i64):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 24)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   movl    0(%rdi), %edi
-;   Inst 3:   movq    %rdi, %rax
-;   Inst 4:   shrl    $1, %eax
-;   Inst 5:   movl    $2004318071, %esi
-;   Inst 6:   andl    %esi, %eax
-;   Inst 7:   subl    %eax, %edi
-;   Inst 8:   shrl    $1, %eax
-;   Inst 9:   andl    %esi, %eax
-;   Inst 10:   subl    %eax, %edi
-;   Inst 11:   shrl    $1, %eax
-;   Inst 12:   andl    %esi, %eax
-;   Inst 13:   subl    %eax, %edi
-;   Inst 14:   movq    %rdi, %rsi
-;   Inst 15:   shrl    $4, %esi
-;   Inst 16:   addl    %edi, %esi
-;   Inst 17:   andl    $252645135, %esi
-;   Inst 18:   imull   $16843009, %esi
-;   Inst 19:   shrl    $24, %esi
-;   Inst 20:   movq    %rsi, %rax
-;   Inst 21:   movq    %rbp, %rsp
-;   Inst 22:   popq    %rbp
-;   Inst 23:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movl    0(%rdi), %edx
+;   movq    %rdx, %r8
+;   shrl    $1, %r8d, %r8d
+;   movl    $2004318071, %r9d
+;   andl    %r8d, %r9d, %r8d
+;   subl    %edx, %r8d, %edx
+;   shrl    $1, %r8d, %r8d
+;   andl    %r8d, %r9d, %r8d
+;   subl    %edx, %r8d, %edx
+;   shrl    $1, %r8d, %r8d
+;   andl    %r8d, %r9d, %r8d
+;   subl    %edx, %r8d, %edx
+;   movq    %rdx, %rax
+;   shrl    $4, %eax, %eax
+;   addl    %eax, %edx, %eax
+;   andl    %eax, $252645135, %eax
+;   imull   %eax, $16843009, %eax
+;   shrl    $24, %eax, %eax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
diff --git a/cranelift/filetests/filetests/isa/x64/probestack.clif b/cranelift/filetests/filetests/isa/x64/probestack.clif
index 586152b192..d00509e318 100644
--- a/cranelift/filetests/filetests/isa/x64/probestack.clif
+++ b/cranelift/filetests/filetests/isa/x64/probestack.clif
@@ -10,21 +10,15 @@ block0:
   return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 11)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   movl    $100000, %eax
-;   Inst 3:   call    LibCall(Probestack)
-;   Inst 4:   subq    $100000, %rsp
-;   Inst 5:   lea     rsp(0 + virtual offset), %rsi
-;   Inst 6:   movq    %rsi, %rax
-;   Inst 7:   addq    $100000, %rsp
-;   Inst 8:   movq    %rbp, %rsp
-;   Inst 9:   popq    %rbp
-;   Inst 10:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+;   movl    $100000, %eax
+;   call    LibCall(Probestack)
+;   subq    %rsp, $100000, %rsp
+; block0:
+;   lea     rsp(0 + virtual offset), %rax
+;   addq    %rsp, $100000, %rsp
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
diff --git a/cranelift/filetests/filetests/isa/x64/select-i128.clif b/cranelift/filetests/filetests/isa/x64/select-i128.clif
index c6b80bd345..5317a22872 100644
--- a/cranelift/filetests/filetests/isa/x64/select-i128.clif
+++ b/cranelift/filetests/filetests/isa/x64/select-i128.clif
@@ -10,25 +10,19 @@ block0(v0: i32, v1: i128, v2: i128):
     return v5
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 11)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   movl    $42, %eax
-;   Inst 3:   cmpl    %eax, %edi
-;   Inst 4:   cmovzq  %rsi, %rcx
-;   Inst 5:   cmovzq  %rdx, %r8
-;   Inst 6:   movq    %rcx, %rax
-;   Inst 7:   movq    %r8, %rdx
-;   Inst 8:   movq    %rbp, %rsp
-;   Inst 9:   popq    %rbp
-;   Inst 10:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movl    $42, %r9d
+;   cmpl    %r9d, %edi
+;   cmovzq  %rsi, %rcx, %rcx
+;   cmovzq  %rdx, %r8, %r8
+;   movq    %rcx, %rax
+;   movq    %r8, %rdx
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
-;; Test for issue: https://github.com/bytecodealliance/wasmtime/issues/3963.
 function %f1(f32, i128, i128) -> i128 {
 block0(v0: f32, v1: i128, v2: i128):
     v3 = fcmp.f32 eq v0, v0
@@ -36,22 +30,17 @@ block0(v0: f32, v1: i128, v2: i128):
     return v4
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 12)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   ucomiss %xmm0, %xmm0
-;   Inst 3:   cmovnzq %rdx, %rdi
-;   Inst 4:   cmovpq  %rdx, %rdi
-;   Inst 5:   cmovnzq %rcx, %rsi
-;   Inst 6:   cmovpq  %rcx, %rsi
-;   Inst 7:   movq    %rdi, %rax
-;   Inst 8:   movq    %rsi, %rdx
-;   Inst 9:   movq    %rbp, %rsp
-;   Inst 10:   popq    %rbp
-;   Inst 11:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   ucomiss %xmm0, %xmm0
+;   cmovnzq %rdx, %rdi, %rdi
+;   cmovpq  %rdx, %rdi, %rdi
+;   cmovnzq %rcx, %rsi, %rsi
+;   cmovpq  %rcx, %rsi, %rsi
+;   movq    %rdi, %rax
+;   movq    %rsi, %rdx
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
diff --git a/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif b/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif
index b59f8ddf45..7112afa311 100644
--- a/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif
+++ b/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif
@@ -8,18 +8,13 @@ block0(v0: f32x4, v1: f32x4):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 6)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   andps   %xmm1, %xmm0
-;   Inst 3:   movq    %rbp, %rsp
-;   Inst 4:   popq    %rbp
-;   Inst 5:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   andps   %xmm0, %xmm0, %xmm1
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %band_f64x2(f64x2, f64x2) -> f64x2 {
 block0(v0: f64x2, v1: f64x2):
@@ -27,18 +22,13 @@ block0(v0: f64x2, v1: f64x2):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 6)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   andpd   %xmm1, %xmm0
-;   Inst 3:   movq    %rbp, %rsp
-;   Inst 4:   popq    %rbp
-;   Inst 5:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   andpd   %xmm0, %xmm0, %xmm1
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %band_i32x4(i32x4, i32x4) -> i32x4 {
 block0(v0: i32x4, v1: i32x4):
@@ -46,18 +36,13 @@ block0(v0: i32x4, v1: i32x4):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 6)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   pand    %xmm1, %xmm0
-;   Inst 3:   movq    %rbp, %rsp
-;   Inst 4:   popq    %rbp
-;   Inst 5:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   pand    %xmm0, %xmm0, %xmm1
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %bor_f32x4(f32x4, f32x4) -> f32x4 {
 block0(v0: f32x4, v1: f32x4):
@@ -65,18 +50,13 @@ block0(v0: f32x4, v1: f32x4):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 6)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   orps    %xmm1, %xmm0
-;   Inst 3:   movq    %rbp, %rsp
-;   Inst 4:   popq    %rbp
-;   Inst 5:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   orps    %xmm0, %xmm0, %xmm1
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %bor_f64x2(f64x2, f64x2) -> f64x2 {
 block0(v0: f64x2, v1: f64x2):
@@ -84,18 +64,13 @@ block0(v0: f64x2, v1: f64x2):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 6)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   orpd    %xmm1, %xmm0
-;   Inst 3:   movq    %rbp, %rsp
-;   Inst 4:   popq    %rbp
-;   Inst 5:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   orpd    %xmm0, %xmm0, %xmm1
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %bor_i32x4(i32x4, i32x4) -> i32x4 {
 block0(v0: i32x4, v1: i32x4):
@@ -103,18 +78,13 @@ block0(v0: i32x4, v1: i32x4):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 6)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   por     %xmm1, %xmm0
-;   Inst 3:   movq    %rbp, %rsp
-;   Inst 4:   popq    %rbp
-;   Inst 5:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   por     %xmm0, %xmm0, %xmm1
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %bxor_f32x4(f32x4, f32x4) -> f32x4 {
 block0(v0: f32x4, v1: f32x4):
@@ -122,18 +92,13 @@ block0(v0: f32x4, v1: f32x4):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 6)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   xorps   %xmm1, %xmm0
-;   Inst 3:   movq    %rbp, %rsp
-;   Inst 4:   popq    %rbp
-;   Inst 5:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   xorps   %xmm0, %xmm0, %xmm1
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %bxor_f64x2(f64x2, f64x2) -> f64x2 {
 block0(v0: f64x2, v1: f64x2):
@@ -141,18 +106,13 @@ block0(v0: f64x2, v1: f64x2):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 6)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   xorpd   %xmm1, %xmm0
-;   Inst 3:   movq    %rbp, %rsp
-;   Inst 4:   popq    %rbp
-;   Inst 5:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   xorpd   %xmm0, %xmm0, %xmm1
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %bxor_i32x4(i32x4, i32x4) -> i32x4 {
 block0(v0: i32x4, v1: i32x4):
@@ -160,18 +120,13 @@ block0(v0: i32x4, v1: i32x4):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 6)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   pxor    %xmm1, %xmm0
-;   Inst 3:   movq    %rbp, %rsp
-;   Inst 4:   popq    %rbp
-;   Inst 5:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   pxor    %xmm0, %xmm0, %xmm1
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %bitselect_i16x8() -> i16x8 {
 block0:
@@ -182,23 +137,18 @@ block0:
     return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 11)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   load_const VCodeConstant(0), %xmm0
-;   Inst 3:   load_const VCodeConstant(0), %xmm1
-;   Inst 4:   load_const VCodeConstant(0), %xmm2
-;   Inst 5:   pand    %xmm0, %xmm1
-;   Inst 6:   pandn   %xmm2, %xmm0
-;   Inst 7:   por     %xmm1, %xmm0
-;   Inst 8:   movq    %rbp, %rsp
-;   Inst 9:   popq    %rbp
-;   Inst 10:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   load_const VCodeConstant(0), %xmm0
+;   load_const VCodeConstant(0), %xmm5
+;   load_const VCodeConstant(0), %xmm4
+;   pand    %xmm5, %xmm5, %xmm0
+;   pandn   %xmm0, %xmm0, %xmm4
+;   por     %xmm0, %xmm0, %xmm5
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %vselect_i16x8(b16x8, i16x8, i16x8) -> i16x8 {
 block0(v0: b16x8, v1: i16x8, v2: i16x8):
@@ -206,19 +156,14 @@ block0(v0: b16x8, v1: i16x8, v2: i16x8):
     return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 7)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   pblendvb %xmm1, %xmm2
-;   Inst 3:   movdqa  %xmm2, %xmm0
-;   Inst 4:   movq    %rbp, %rsp
-;   Inst 5:   popq    %rbp
-;   Inst 6:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   pblendvb %xmm2, %xmm2, %xmm1
+;   movdqa  %xmm2, %xmm0
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %vselect_f32x4(b32x4, f32x4, f32x4) -> f32x4 {
 block0(v0: b32x4, v1: f32x4, v2: f32x4):
@@ -226,19 +171,14 @@ block0(v0: b32x4, v1: f32x4, v2: f32x4):
     return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 7)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   blendvps %xmm1, %xmm2
-;   Inst 3:   movdqa  %xmm2, %xmm0
-;   Inst 4:   movq    %rbp, %rsp
-;   Inst 5:   popq    %rbp
-;   Inst 6:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   blendvps %xmm2, %xmm2, %xmm1
+;   movdqa  %xmm2, %xmm0
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %vselect_f64x2(b64x2, f64x2, f64x2) -> f64x2 {
 block0(v0: b64x2, v1: f64x2, v2: f64x2):
@@ -246,19 +186,14 @@ block0(v0: b64x2, v1: f64x2, v2: f64x2):
     return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 7)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   blendvpd %xmm1, %xmm2
-;   Inst 3:   movdqa  %xmm2, %xmm0
-;   Inst 4:   movq    %rbp, %rsp
-;   Inst 5:   popq    %rbp
-;   Inst 6:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   blendvpd %xmm2, %xmm2, %xmm1
+;   movdqa  %xmm2, %xmm0
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %ishl_i8x16(i32) -> i8x16 {
 block0(v0: i32):
@@ -267,24 +202,19 @@ block0(v0: i32):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 12)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   load_const VCodeConstant(1), %xmm0
-;   Inst 3:   movd    %edi, %xmm1
-;   Inst 4:   psllw   %xmm1, %xmm0
-;   Inst 5:   lea     const(VCodeConstant(0)), %rsi
-;   Inst 6:   shlq    $4, %rdi
-;   Inst 7:   movdqu  0(%rsi,%rdi,1), %xmm1
-;   Inst 8:   pand    %xmm1, %xmm0
-;   Inst 9:   movq    %rbp, %rsp
-;   Inst 10:   popq    %rbp
-;   Inst 11:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   load_const VCodeConstant(1), %xmm0
+;   movd    %edi, %xmm5
+;   psllw   %xmm0, %xmm5, %xmm0
+;   lea     const(VCodeConstant(0)), %rax
+;   shlq    $4, %rdi, %rdi
+;   movdqu  0(%rax,%rdi,1), %xmm13
+;   pand    %xmm0, %xmm0, %xmm13
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %ushr_i8x16_imm() -> i8x16 {
 block0:
@@ -294,21 +224,16 @@ block0:
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 9)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   load_const VCodeConstant(1), %xmm0
-;   Inst 3:   psrlw   $1, %xmm0
-;   Inst 4:   movdqu  const(VCodeConstant(0)), %xmm1
-;   Inst 5:   pand    %xmm1, %xmm0
-;   Inst 6:   movq    %rbp, %rsp
-;   Inst 7:   popq    %rbp
-;   Inst 8:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   load_const VCodeConstant(1), %xmm0
+;   psrlw   %xmm0, $1, %xmm0
+;   movdqu  const(VCodeConstant(0)), %xmm5
+;   pand    %xmm0, %xmm0, %xmm5
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %sshr_i8x16(i32) -> i8x16 {
 block0(v0: i32):
@@ -317,27 +242,21 @@ block0(v0: i32):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 15)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   load_const VCodeConstant(0), %xmm2
-;   Inst 3:   movdqa  %xmm2, %xmm0
-;   Inst 4:   punpcklbw %xmm2, %xmm0
-;   Inst 5:   movdqa  %xmm2, %xmm1
-;   Inst 6:   punpckhbw %xmm2, %xmm1
-;   Inst 7:   addl    $8, %edi
-;   Inst 8:   movd    %edi, %xmm2
-;   Inst 9:   psraw   %xmm2, %xmm0
-;   Inst 10:   psraw   %xmm2, %xmm1
-;   Inst 11:   packsswb %xmm1, %xmm0
-;   Inst 12:   movq    %rbp, %rsp
-;   Inst 13:   popq    %rbp
-;   Inst 14:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   load_const VCodeConstant(0), %xmm9
+;   movdqa  %xmm9, %xmm0
+;   punpcklbw %xmm0, %xmm0, %xmm9
+;   punpckhbw %xmm9, %xmm9, %xmm9
+;   addl    %edi, $8, %edi
+;   movd    %edi, %xmm11
+;   psraw   %xmm0, %xmm11, %xmm0
+;   psraw   %xmm9, %xmm11, %xmm9
+;   packsswb %xmm0, %xmm0, %xmm9
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %sshr_i8x16_imm(i8x16, i32) -> i8x16 {
 block0(v0: i8x16, v1: i32):
@@ -345,25 +264,23 @@ block0(v0: i8x16, v1: i32):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 13)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   movdqa  %xmm0, %xmm1
-;   Inst 3:   punpcklbw %xmm0, %xmm1
-;   Inst 4:   movdqa  %xmm0, %xmm2
-;   Inst 5:   punpckhbw %xmm0, %xmm2
-;   Inst 6:   psraw   $11, %xmm1
-;   Inst 7:   psraw   $11, %xmm2
-;   Inst 8:   packsswb %xmm2, %xmm1
-;   Inst 9:   movdqa  %xmm1, %xmm0
-;   Inst 10:   movq    %rbp, %rsp
-;   Inst 11:   popq    %rbp
-;   Inst 12:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movdqa  %xmm0, %xmm9
+;   punpcklbw %xmm9, %xmm9, %xmm0
+;   punpckhbw %xmm0, %xmm0, %xmm0
+;   movdqa  %xmm9, %xmm12
+;   psraw   %xmm12, $11, %xmm12
+;   movdqa  %xmm12, %xmm9
+;   psraw   %xmm0, $11, %xmm0
+;   movdqa  %xmm9, %xmm1
+;   packsswb %xmm1, %xmm1, %xmm0
+;   movdqa  %xmm1, %xmm9
+;   movdqa  %xmm9, %xmm0
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %sshr_i64x2(i64x2, i32) -> i64x2 {
 block0(v0: i64x2, v1: i32):
@@ -371,24 +288,19 @@ block0(v0: i64x2, v1: i32):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 14)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   pextrd.w $0, %xmm0, %rsi
-;   Inst 3:   pextrd.w $1, %xmm0, %rax
-;   Inst 4:   movq    %rdi, %rcx
-;   Inst 5:   sarq    %cl, %rsi
-;   Inst 6:   movq    %rdi, %rcx
-;   Inst 7:   sarq    %cl, %rax
-;   Inst 8:   uninit  %xmm0
-;   Inst 9:   pinsrd.w $0, %rsi, %xmm0
-;   Inst 10:   pinsrd.w $1, %rax, %xmm0
-;   Inst 11:   movq    %rbp, %rsp
-;   Inst 12:   popq    %rbp
-;   Inst 13:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %rax
+;   pextrd.w $0, %xmm0, %r11
+;   pextrd.w $1, %xmm0, %rdi
+;   movq    %rax, %rcx
+;   sarq    %cl, %r11, %r11
+;   sarq    %cl, %rdi, %rdi
+;   uninit  %xmm0
+;   pinsrd.w $0, %xmm0, %r11, %xmm0
+;   pinsrd.w $1, %xmm0, %rdi, %xmm0
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
diff --git a/cranelift/filetests/filetests/isa/x64/simd-comparison-legalize.clif b/cranelift/filetests/filetests/isa/x64/simd-comparison-legalize.clif
index 58cd0db90e..6a99987e29 100644
--- a/cranelift/filetests/filetests/isa/x64/simd-comparison-legalize.clif
+++ b/cranelift/filetests/filetests/isa/x64/simd-comparison-legalize.clif
@@ -8,20 +8,15 @@ block0(v0: i32x4, v1: i32x4):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 8)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   pcmpeqd %xmm1, %xmm0
-;   Inst 3:   pcmpeqd %xmm1, %xmm1
-;   Inst 4:   pxor    %xmm1, %xmm0
-;   Inst 5:   movq    %rbp, %rsp
-;   Inst 6:   popq    %rbp
-;   Inst 7:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   pcmpeqd %xmm0, %xmm0, %xmm1
+;   pcmpeqd %xmm7, %xmm7, %xmm7
+;   pxor    %xmm0, %xmm0, %xmm7
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %icmp_ugt_i32x4(i32x4, i32x4) -> b32x4 {
 block0(v0: i32x4, v1: i32x4):
@@ -29,21 +24,16 @@ block0(v0: i32x4, v1: i32x4):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 9)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   pmaxud  %xmm1, %xmm0
-;   Inst 3:   pcmpeqd %xmm1, %xmm0
-;   Inst 4:   pcmpeqd %xmm1, %xmm1
-;   Inst 5:   pxor    %xmm1, %xmm0
-;   Inst 6:   movq    %rbp, %rsp
-;   Inst 7:   popq    %rbp
-;   Inst 8:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   pmaxud  %xmm0, %xmm0, %xmm1
+;   pcmpeqd %xmm0, %xmm0, %xmm1
+;   pcmpeqd %xmm9, %xmm9, %xmm9
+;   pxor    %xmm0, %xmm0, %xmm9
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %icmp_sge_i16x8(i16x8, i16x8) -> b16x8 {
 block0(v0: i16x8, v1: i16x8):
@@ -51,21 +41,15 @@ block0(v0: i16x8, v1: i16x8):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 9)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   movdqa  %xmm1, %xmm2
-;   Inst 3:   movdqa  %xmm0, %xmm1
-;   Inst 4:   pmaxsw  %xmm2, %xmm1
-;   Inst 5:   pcmpeqw %xmm1, %xmm0
-;   Inst 6:   movq    %rbp, %rsp
-;   Inst 7:   popq    %rbp
-;   Inst 8:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movdqa  %xmm0, %xmm5
+;   pmaxsw  %xmm5, %xmm5, %xmm1
+;   pcmpeqw %xmm0, %xmm0, %xmm5
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %icmp_uge_i8x16(i8x16, i8x16) -> b8x16 {
 block0(v0: i8x16, v1: i8x16):
@@ -73,19 +57,13 @@ block0(v0: i8x16, v1: i8x16):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 9)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   movdqa  %xmm1, %xmm2
-;   Inst 3:   movdqa  %xmm0, %xmm1
-;   Inst 4:   pmaxub  %xmm2, %xmm1
-;   Inst 5:   pcmpeqb %xmm1, %xmm0
-;   Inst 6:   movq    %rbp, %rsp
-;   Inst 7:   popq    %rbp
-;   Inst 8:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movdqa  %xmm0, %xmm5
+;   pmaxub  %xmm5, %xmm5, %xmm1
+;   pcmpeqb %xmm0, %xmm0, %xmm5
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
diff --git a/cranelift/filetests/filetests/isa/x64/simd-lane-access-compile.clif b/cranelift/filetests/filetests/isa/x64/simd-lane-access-compile.clif
index e2cdd4dbe1..9d4b7b572e 100644
--- a/cranelift/filetests/filetests/isa/x64/simd-lane-access-compile.clif
+++ b/cranelift/filetests/filetests/isa/x64/simd-lane-access-compile.clif
@@ -12,24 +12,19 @@ block0:
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 12)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   load_const VCodeConstant(3), %xmm1
-;   Inst 3:   load_const VCodeConstant(2), %xmm0
-;   Inst 4:   load_const VCodeConstant(0), %xmm2
-;   Inst 5:   pshufb  %xmm2, %xmm1
-;   Inst 6:   load_const VCodeConstant(1), %xmm2
-;   Inst 7:   pshufb  %xmm2, %xmm0
-;   Inst 8:   orps    %xmm1, %xmm0
-;   Inst 9:   movq    %rbp, %rsp
-;   Inst 10:   popq    %rbp
-;   Inst 11:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   load_const VCodeConstant(3), %xmm1
+;   load_const VCodeConstant(2), %xmm0
+;   load_const VCodeConstant(0), %xmm9
+;   pshufb  %xmm1, %xmm1, %xmm9
+;   load_const VCodeConstant(1), %xmm12
+;   pshufb  %xmm0, %xmm0, %xmm12
+;   orps    %xmm0, %xmm0, %xmm1
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %shuffle_same_ssa_value() -> i8x16 {
 block0:
@@ -38,20 +33,15 @@ block0:
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 8)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   load_const VCodeConstant(1), %xmm0
-;   Inst 3:   load_const VCodeConstant(0), %xmm1
-;   Inst 4:   pshufb  %xmm1, %xmm0
-;   Inst 5:   movq    %rbp, %rsp
-;   Inst 6:   popq    %rbp
-;   Inst 7:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   load_const VCodeConstant(1), %xmm0
+;   load_const VCodeConstant(0), %xmm5
+;   pshufb  %xmm0, %xmm0, %xmm5
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %swizzle() -> i8x16 {
 block0:
@@ -61,23 +51,17 @@ block0:
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 11)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   load_const VCodeConstant(1), %xmm1
-;   Inst 3:   load_const VCodeConstant(1), %xmm0
-;   Inst 4:   load_const VCodeConstant(0), %xmm2
-;   Inst 5:   paddusb %xmm2, %xmm0
-;   Inst 6:   pshufb  %xmm0, %xmm1
-;   Inst 7:   movdqa  %xmm1, %xmm0
-;   Inst 8:   movq    %rbp, %rsp
-;   Inst 9:   popq    %rbp
-;   Inst 10:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   load_const VCodeConstant(1), %xmm0
+;   load_const VCodeConstant(1), %xmm2
+;   load_const VCodeConstant(0), %xmm7
+;   paddusb %xmm2, %xmm2, %xmm7
+;   pshufb  %xmm0, %xmm0, %xmm2
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %splat_i8(i8) -> i8x16 {
 block0(v0: i8):
@@ -85,21 +69,16 @@ block0(v0: i8):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 9)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   uninit  %xmm0
-;   Inst 3:   pinsrb  $0, %rdi, %xmm0
-;   Inst 4:   pxor    %xmm1, %xmm1
-;   Inst 5:   pshufb  %xmm1, %xmm0
-;   Inst 6:   movq    %rbp, %rsp
-;   Inst 7:   popq    %rbp
-;   Inst 8:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   uninit  %xmm0
+;   pinsrb  $0, %xmm0, %rdi, %xmm0
+;   pxor    %xmm6, %xmm6, %xmm6
+;   pshufb  %xmm0, %xmm0, %xmm6
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %splat_b16() -> b16x8 {
 block0:
@@ -108,22 +87,17 @@ block0:
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 10)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   movl    $65535, %esi
-;   Inst 3:   uninit  %xmm0
-;   Inst 4:   pinsrw  $0, %rsi, %xmm0
-;   Inst 5:   pinsrw  $1, %rsi, %xmm0
-;   Inst 6:   pshufd  $0, %xmm0, %xmm0
-;   Inst 7:   movq    %rbp, %rsp
-;   Inst 8:   popq    %rbp
-;   Inst 9:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movl    $65535, %eax
+;   uninit  %xmm0
+;   pinsrw  $0, %xmm0, %rax, %xmm0
+;   pinsrw  $1, %xmm0, %rax, %xmm0
+;   pshufd  $0, %xmm0, %xmm0
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %splat_i32(i32) -> i32x4 {
 block0(v0: i32):
@@ -131,20 +105,15 @@ block0(v0: i32):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 8)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   uninit  %xmm0
-;   Inst 3:   pinsrd  $0, %rdi, %xmm0
-;   Inst 4:   pshufd  $0, %xmm0, %xmm0
-;   Inst 5:   movq    %rbp, %rsp
-;   Inst 6:   popq    %rbp
-;   Inst 7:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   uninit  %xmm0
+;   pinsrd  $0, %xmm0, %rdi, %xmm0
+;   pshufd  $0, %xmm0, %xmm0
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %splat_f64(f64) -> f64x2 {
 block0(v0: f64):
@@ -152,21 +121,17 @@ block0(v0: f64):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 9)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   uninit  %xmm1
-;   Inst 3:   movsd   %xmm0, %xmm1
-;   Inst 4:   movlhps %xmm0, %xmm1
-;   Inst 5:   movdqa  %xmm1, %xmm0
-;   Inst 6:   movq    %rbp, %rsp
-;   Inst 7:   popq    %rbp
-;   Inst 8:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movdqa  %xmm0, %xmm4
+;   uninit  %xmm0
+;   movdqa  %xmm4, %xmm5
+;   movsd   %xmm0, %xmm0, %xmm5
+;   movlhps %xmm0, %xmm0, %xmm5
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %load32_zero_coalesced(i64) -> i32x4 {
 block0(v0: i64):
@@ -175,18 +140,13 @@ block0(v0: i64):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 6)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   movss   0(%rdi), %xmm0
-;   Inst 3:   movq    %rbp, %rsp
-;   Inst 4:   popq    %rbp
-;   Inst 5:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movss   0(%rdi), %xmm0
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %load32_zero_int(i32) -> i32x4 {
 block0(v0: i32):
@@ -194,18 +154,13 @@ block0(v0: i32):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 6)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   movd    %edi, %xmm0
-;   Inst 3:   movq    %rbp, %rsp
-;   Inst 4:   popq    %rbp
-;   Inst 5:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movd    %edi, %xmm0
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %load32_zero_float(f32) -> f32x4 {
 block0(v0: f32):
@@ -213,15 +168,10 @@ block0(v0: f32):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 5)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   movq    %rbp, %rsp
-;   Inst 3:   popq    %rbp
-;   Inst 4:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
diff --git a/cranelift/filetests/filetests/isa/x64/simd-logical-compile.clif b/cranelift/filetests/filetests/isa/x64/simd-logical-compile.clif
index 72fc41e34c..42849a3987 100644
--- a/cranelift/filetests/filetests/isa/x64/simd-logical-compile.clif
+++ b/cranelift/filetests/filetests/isa/x64/simd-logical-compile.clif
@@ -8,19 +8,14 @@ block0(v0: b32x4):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 7)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   pcmpeqd %xmm1, %xmm1
-;   Inst 3:   pxor    %xmm1, %xmm0
-;   Inst 4:   movq    %rbp, %rsp
-;   Inst 5:   popq    %rbp
-;   Inst 6:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   pcmpeqd %xmm3, %xmm3, %xmm3
+;   pxor    %xmm0, %xmm0, %xmm3
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %vany_true_b32x4(b32x4) -> b1 {
 block0(v0: b32x4):
@@ -28,20 +23,14 @@ block0(v0: b32x4):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 8)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   ptest   %xmm0, %xmm0
-;   Inst 3:   setnz   %sil
-;   Inst 4:   movq    %rsi, %rax
-;   Inst 5:   movq    %rbp, %rsp
-;   Inst 6:   popq    %rbp
-;   Inst 7:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   ptest   %xmm0, %xmm0
+;   setnz   %al
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function %vall_true_i64x2(i64x2) -> b1 {
 block0(v0: i64x2):
@@ -49,20 +38,14 @@ block0(v0: i64x2):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 10)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   pxor    %xmm1, %xmm1
-;   Inst 3:   pcmpeqq %xmm0, %xmm1
-;   Inst 4:   ptest   %xmm1, %xmm1
-;   Inst 5:   setz    %sil
-;   Inst 6:   movq    %rsi, %rax
-;   Inst 7:   movq    %rbp, %rsp
-;   Inst 8:   popq    %rbp
-;   Inst 9:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   pxor    %xmm4, %xmm4, %xmm4
+;   pcmpeqq %xmm4, %xmm4, %xmm0
+;   ptest   %xmm4, %xmm4
+;   setz    %al
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
diff --git a/cranelift/filetests/filetests/isa/x64/store-stack-full-width-i32.clif b/cranelift/filetests/filetests/isa/x64/store-stack-full-width-i32.clif
deleted file mode 100644
index c20f816fc2..0000000000
--- a/cranelift/filetests/filetests/isa/x64/store-stack-full-width-i32.clif
+++ /dev/null
@@ -1,127 +0,0 @@
-test compile
-target x86_64
-
-;; The goal of this test is to ensure that stack spills of an integer value,
-;; which width is less than the machine word's size, cause the full word to be
-;; stored, and not only the lower bits.
-
-;; Because of unsigned extensions which can be transformed into simple moves,
-;; the source vreg of the extension operation can be coalesced with its
-;; destination vreg, and if it happens to be spill, then the reload may use a
-;; reload of a different, larger size.
-
-function %f0(i32, i32, i32) -> i64 {
-    fn0 = %g(i32) -> i64
-
-; check:  pushq   %rbp
-; nextln: movq    %rsp, %rbp
-; nextln: subq    $$64, %rsp
-
-;; Stash all the callee-saved registers.
-
-; nextln: movq    %r12, 16(%rsp)
-; nextln: movq    %r13, 24(%rsp)
-; nextln: movq    %r14, 32(%rsp)
-; nextln: movq    %rbx, 40(%rsp)
-; nextln: movq    %r15, 48(%rsp)
-
-block0(v0: i32, v1: i32, v2: i32):
-    ;; First, create enough virtual registers so that the call instructions
-    ;; causes at least one of them to be spilled onto the stack.
-
-    v3 = iadd.i32 v0, v1
-    v4 = iadd.i32 v1, v2
-    v5 = iadd.i32 v0, v2
-    v6 = iadd.i32 v3, v0
-    v7 = iadd.i32 v4, v0
-    v8 = iadd.i32 v5, v0
-
-; nextln: movq    %rdi, %r12
-; nextln: addl    %esi, %r12d
-; nextln:  movq    %rsi, %r13
-; nextln:  addl    %edx, %r13d
-; nextln:  movq    %rdi, %r14
-; nextln:  addl    %edx, %r14d
-; nextln:  movq    %r12, %rbx
-; nextln:  addl    %edi, %ebx
-; nextln:  movq    %r13, %r15
-; nextln:  addl    %edi, %r15d
-; nextln:  movq    %r14, %rsi
-
-;; This should be movq below, not movl.
-; nextln:  movq    %rsi, rsp(0 + virtual offset)
-
-; nextln:  movq    rsp(0 + virtual offset), %rsi
-; nextln:  addl    %edi, %esi
-
-    ;; Put an effectful instruction so that the live-ranges of the adds and
-    ;; uextends are split here, and to prevent the uextend to be emitted
-    ;; before the call. This will effectively causing the above i32 to be
-    ;; spilled as an i32, and not a full i64.
-
-    v300 = call fn0(v0)
-
-;; This should be movq below, not movl.
-; nextln:  movq    %rsi, rsp(0 + virtual offset)
-
-; nextln:  load_ext_name %g+0, %rsi
-; nextln:  call    *%rsi
-
-    v31 = uextend.i64 v3
-    v41 = uextend.i64 v4
-    v51 = uextend.i64 v5
-    v61 = uextend.i64 v6
-    v71 = uextend.i64 v7
-    v81 = uextend.i64 v8
-
-    ;; None of the uextends are generated here yet.
-
-    ;; At this point, I'd expect that this second call below would be not
-    ;; necessary, but if it is removed, the uextend is applied before the call,
-    ;; and the i64 is spilled (then reloaded), causing the bug to not appear. So
-    ;; an additional call it is!
-
-    v100 = call fn0(v3)
-
-; nextln:  movq    %r12, %rsi
-; nextln:  movq    %rsi, rsp(8 + virtual offset)
-; nextln:  nop     len=0
-; nextln:  movq    %r12, %rdi
-; nextln:  load_ext_name %g+0, %rsi
-; nextln:  call    *%rsi
-
-    ;; Cause reloads of all the values. Most are in registers, but one of them
-    ;; is on the stack. Make sure they're all used in the final computation.
-
-    v101 = iadd.i64 v100, v31
-    v102 = iadd.i64 v101, v41
-    v103 = iadd.i64 v102, v51
-    v104 = iadd.i64 v103, v61
-    v105 = iadd.i64 v104, v71
-    v200 = iadd.i64 v105, v81
-
-; nextln:  movq    %rax, %rsi
-; nextln:  movq    rsp(8 + virtual offset), %rdi
-; nextln:  addq    %rdi, %rsi
-; nextln:  addq    %r13, %rsi
-; nextln:  addq    %r14, %rsi
-; nextln:  addq    %rbx, %rsi
-; nextln:  addq    %r15, %rsi
-
-;; The reload operates on a full word, so uses movq.
-; nextln:  movq    rsp(0 + virtual offset), %rdi
-
-; nextln:  addq    %rdi, %rsi
-; nextln:  movq    %rsi, %rax
-; nextln:  movq    16(%rsp), %r12
-; nextln:  movq    24(%rsp), %r13
-; nextln:  movq    32(%rsp), %r14
-; nextln:  movq    40(%rsp), %rbx
-; nextln:  movq    48(%rsp), %r15
-; nextln:  addq    $$64, %rsp
-
-    return v200
-; nextln:  movq    %rbp, %rsp
-; nextln:  popq    %rbp
-; nextln:  ret
-}
diff --git a/cranelift/filetests/filetests/isa/x64/struct-arg.clif b/cranelift/filetests/filetests/isa/x64/struct-arg.clif
index b4094abf5c..9076630171 100644
--- a/cranelift/filetests/filetests/isa/x64/struct-arg.clif
+++ b/cranelift/filetests/filetests/isa/x64/struct-arg.clif
@@ -7,20 +7,14 @@ block0(v0: i64):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 8)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   lea     16(%rbp), %rsi
-;   Inst 3:   movzbq  0(%rsi), %rsi
-;   Inst 4:   movq    %rsi, %rax
-;   Inst 5:   movq    %rbp, %rsp
-;   Inst 6:   popq    %rbp
-;   Inst 7:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   lea     16(%rbp), %rsi
+;   movzbq  0(%rsi), %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function u0:1(i64 sarg(64), i64) -> i8 system_v {
 block0(v0: i64, v1: i64):
@@ -30,22 +24,16 @@ block0(v0: i64, v1: i64):
     return v4
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 10)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   lea     16(%rbp), %rsi
-;   Inst 3:   movzbq  0(%rdi), %rdi
-;   Inst 4:   movzbq  0(%rsi), %rsi
-;   Inst 5:   addl    %esi, %edi
-;   Inst 6:   movq    %rdi, %rax
-;   Inst 7:   movq    %rbp, %rsp
-;   Inst 8:   popq    %rbp
-;   Inst 9:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   lea     16(%rbp), %rsi
+;   movzbq  0(%rdi), %rax
+;   movzbq  0(%rsi), %r11
+;   addl    %eax, %r11d, %eax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function u0:2(i64) -> i8 system_v {
 fn1 = colocated u0:0(i64 sarg(64)) -> i8 system_v
@@ -55,27 +43,23 @@ block0(v0: i64):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 15)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   movq    %rdi, %rsi
-;   Inst 3:   subq    $64, %rsp
-;   Inst 4:   virtual_sp_offset_adjust 64
-;   Inst 5:   lea     0(%rsp), %rdi
-;   Inst 6:   movl    $64, %edx
-;   Inst 7:   load_ext_name %Memcpy+0, %rcx
-;   Inst 8:   call    *%rcx
-;   Inst 9:   call    User { namespace: 0, index: 0 }
-;   Inst 10:   addq    $64, %rsp
-;   Inst 11:   virtual_sp_offset_adjust -64
-;   Inst 12:   movq    %rbp, %rsp
-;   Inst 13:   popq    %rbp
-;   Inst 14:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %r8
+;   subq    %rsp, $64, %rsp
+;   virtual_sp_offset_adjust 64
+;   lea     0(%rsp), %rdi
+;   movq    %r8, %rsi
+;   movl    $64, %edx
+;   load_ext_name %Memcpy+0, %rcx
+;   call    *%rcx
+;   call    User { namespace: 0, index: 0 }
+;   addq    %rsp, $64, %rsp
+;   virtual_sp_offset_adjust -64
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function u0:3(i64, i64) -> i8 system_v {
 fn1 = colocated u0:0(i64, i64 sarg(64)) -> i8 system_v
@@ -85,32 +69,27 @@ block0(v0: i64, v1: i64):
     return v2
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 20)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   subq    $16, %rsp
-;   Inst 3:   movq    %r12, 0(%rsp)
-;   Inst 4:   movq    %rdi, %r12
-;   Inst 5:   subq    $64, %rsp
-;   Inst 6:   virtual_sp_offset_adjust 64
-;   Inst 7:   lea     0(%rsp), %rdi
-;   Inst 8:   movl    $64, %edx
-;   Inst 9:   load_ext_name %Memcpy+0, %rcx
-;   Inst 10:   call    *%rcx
-;   Inst 11:   movq    %r12, %rdi
-;   Inst 12:   call    User { namespace: 0, index: 0 }
-;   Inst 13:   addq    $64, %rsp
-;   Inst 14:   virtual_sp_offset_adjust -64
-;   Inst 15:   movq    0(%rsp), %r12
-;   Inst 16:   addq    $16, %rsp
-;   Inst 17:   movq    %rbp, %rsp
-;   Inst 18:   popq    %rbp
-;   Inst 19:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+;   subq    %rsp, $16, %rsp
+;   movq    %r12, 0(%rsp)
+; block0:
+;   movq    %rdi, %r12
+;   subq    %rsp, $64, %rsp
+;   virtual_sp_offset_adjust 64
+;   lea     0(%rsp), %rdi
+;   movl    $64, %edx
+;   load_ext_name %Memcpy+0, %rcx
+;   call    *%rcx
+;   movq    %r12, %rdi
+;   call    User { namespace: 0, index: 0 }
+;   addq    %rsp, $64, %rsp
+;   virtual_sp_offset_adjust -64
+;   movq    0(%rsp), %r12
+;   addq    %rsp, $16, %rsp
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function u0:4(i64 sarg(128), i64 sarg(64)) -> i8 system_v {
 block0(v0: i64, v1: i64):
@@ -120,23 +99,17 @@ block0(v0: i64, v1: i64):
     return v4
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 11)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   lea     16(%rbp), %rsi
-;   Inst 3:   lea     144(%rbp), %rdi
-;   Inst 4:   movzbq  0(%rsi), %rsi
-;   Inst 5:   movzbq  0(%rdi), %rdi
-;   Inst 6:   addl    %edi, %esi
-;   Inst 7:   movq    %rsi, %rax
-;   Inst 8:   movq    %rbp, %rsp
-;   Inst 9:   popq    %rbp
-;   Inst 10:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   lea     16(%rbp), %rsi
+;   lea     144(%rbp), %rdi
+;   movzbq  0(%rsi), %rax
+;   movzbq  0(%rdi), %r11
+;   addl    %eax, %r11d, %eax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
 function u0:5(i64, i64, i64) -> i8 system_v {
 fn1 = colocated u0:0(i64, i64 sarg(128), i64 sarg(64)) -> i8 system_v
@@ -146,38 +119,33 @@ block0(v0: i64, v1: i64, v2: i64):
     return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 28)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   subq    $16, %rsp
-;   Inst 3:   movq    %r12, 0(%rsp)
-;   Inst 4:   movq    %r13, 8(%rsp)
-;   Inst 5:   movq    %rdi, %r12
-;   Inst 6:   movq    %rdx, %r13
-;   Inst 7:   subq    $192, %rsp
-;   Inst 8:   virtual_sp_offset_adjust 192
-;   Inst 9:   lea     0(%rsp), %rdi
-;   Inst 10:   movl    $128, %edx
-;   Inst 11:   load_ext_name %Memcpy+0, %rcx
-;   Inst 12:   call    *%rcx
-;   Inst 13:   lea     128(%rsp), %rdi
-;   Inst 14:   movq    %r13, %rsi
-;   Inst 15:   movl    $64, %edx
-;   Inst 16:   load_ext_name %Memcpy+0, %rcx
-;   Inst 17:   call    *%rcx
-;   Inst 18:   movq    %r12, %rdi
-;   Inst 19:   call    User { namespace: 0, index: 0 }
-;   Inst 20:   addq    $192, %rsp
-;   Inst 21:   virtual_sp_offset_adjust -192
-;   Inst 22:   movq    0(%rsp), %r12
-;   Inst 23:   movq    8(%rsp), %r13
-;   Inst 24:   addq    $16, %rsp
-;   Inst 25:   movq    %rbp, %rsp
-;   Inst 26:   popq    %rbp
-;   Inst 27:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+;   subq    %rsp, $16, %rsp
+;   movq    %rbx, 0(%rsp)
+;   movq    %r14, 8(%rsp)
+; block0:
+;   movq    %rdi, %r14
+;   movq    %rdx, %rbx
+;   subq    %rsp, $192, %rsp
+;   virtual_sp_offset_adjust 192
+;   lea     0(%rsp), %rdi
+;   movl    $128, %edx
+;   load_ext_name %Memcpy+0, %rcx
+;   call    *%rcx
+;   lea     128(%rsp), %rdi
+;   movq    %rbx, %rsi
+;   movl    $64, %edx
+;   load_ext_name %Memcpy+0, %rcx
+;   call    *%rcx
+;   movq    %r14, %rdi
+;   call    User { namespace: 0, index: 0 }
+;   addq    %rsp, $192, %rsp
+;   virtual_sp_offset_adjust -192
+;   movq    0(%rsp), %rbx
+;   movq    8(%rsp), %r14
+;   addq    %rsp, $16, %rsp
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
diff --git a/cranelift/filetests/filetests/isa/x64/struct-ret.clif b/cranelift/filetests/filetests/isa/x64/struct-ret.clif
index 0ecda5ec32..a131363569 100644
--- a/cranelift/filetests/filetests/isa/x64/struct-ret.clif
+++ b/cranelift/filetests/filetests/isa/x64/struct-ret.clif
@@ -8,18 +8,13 @@ block0(v0: i64):
     return
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 8)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   movq    %rdi, %rax
-;   Inst 3:   movl    $42, %esi
-;   Inst 4:   movq    %rsi, 0(%rdi)
-;   Inst 5:   movq    %rbp, %rsp
-;   Inst 6:   popq    %rbp
-;   Inst 7:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %rax
+;   movl    $42, %edx
+;   movq    %rdx, 0(%rdi)
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
diff --git a/cranelift/filetests/filetests/isa/x64/tls_elf.clif b/cranelift/filetests/filetests/isa/x64/tls_elf.clif
index 5f9333772b..58b21308da 100644
--- a/cranelift/filetests/filetests/isa/x64/tls_elf.clif
+++ b/cranelift/filetests/filetests/isa/x64/tls_elf.clif
@@ -10,16 +10,11 @@ block0(v0: i32):
     return v1
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 6)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   elf_tls_get_addr User { namespace: 1, index: 0 }
-;   Inst 3:   movq    %rbp, %rsp
-;   Inst 4:   popq    %rbp
-;   Inst 5:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   elf_tls_get_addr User { namespace: 1, index: 0 }
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
diff --git a/cranelift/filetests/filetests/isa/x64/uextend-elision.clif b/cranelift/filetests/filetests/isa/x64/uextend-elision.clif
index e69d941c54..1f88ad6538 100644
--- a/cranelift/filetests/filetests/isa/x64/uextend-elision.clif
+++ b/cranelift/filetests/filetests/isa/x64/uextend-elision.clif
@@ -8,17 +8,12 @@ block0(v0: i32, v1: i32):
     return v3
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 7)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   addl    %esi, %edi
-;   Inst 3:   movq    %rdi, %rax
-;   Inst 4:   movq    %rbp, %rsp
-;   Inst 5:   popq    %rbp
-;   Inst 6:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   addl    %edi, %esi, %edi
+;   movq    %rdi, %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
 
diff --git a/cranelift/filetests/filetests/isa/x64/unused_jt_unreachable_block.clif b/cranelift/filetests/filetests/isa/x64/unused_jt_unreachable_block.clif
index 3840689b9b..7160805ddb 100644
--- a/cranelift/filetests/filetests/isa/x64/unused_jt_unreachable_block.clif
+++ b/cranelift/filetests/filetests/isa/x64/unused_jt_unreachable_block.clif
@@ -13,15 +13,10 @@ block1:
     trap unreachable
 }
 
-; VCode_ShowWithRRU {{
-;   Entry block: 0
-; Block 0:
-;   (original IR block: block0)
-;   (instruction range: 0 .. 5)
-;   Inst 0:   pushq   %rbp
-;   Inst 1:   movq    %rsp, %rbp
-;   Inst 2:   movq    %rbp, %rsp
-;   Inst 3:   popq    %rbp
-;   Inst 4:   ret
-; }}
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret