diff --git a/cranelift/codegen/Cargo.toml b/cranelift/codegen/Cargo.toml
index 83219d42e6..8bf10759c4 100644
--- a/cranelift/codegen/Cargo.toml
+++ b/cranelift/codegen/Cargo.toml
@@ -34,7 +34,7 @@ regalloc = "0.0.17"
 cranelift-codegen-meta = { path = "meta", version = "0.62.0" }
 
 [features]
-default = ["std", "unwind", "all-arch"]
+default = ["std", "unwind"]
 
 # The "std" feature enables use of libstd. The "core" feature enables use
 # of some minimal std-like replacement libraries. At least one of these two
diff --git a/cranelift/codegen/src/context.rs b/cranelift/codegen/src/context.rs
index 2c3a84509e..0fe5b38ad0 100644
--- a/cranelift/codegen/src/context.rs
+++ b/cranelift/codegen/src/context.rs
@@ -180,8 +180,7 @@ impl Context {
         }
 
         if let Some(backend) = isa.get_mach_backend() {
-            let func = std::mem::replace(&mut self.func, Function::new());
-            let result = backend.compile_function(func, self.want_disasm)?;
+            let result = backend.compile_function(&mut self.func, self.want_disasm)?;
             let info = result.code_info();
             self.mach_compile_result = Some(result);
             Ok(info)
@@ -312,15 +311,15 @@ impl Context {
 
     /// Run the legalizer for `isa` on the function.
     pub fn legalize(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> {
+        // Legalization invalidates the domtree and loop_analysis by mutating the CFG.
+        // TODO: Avoid doing this when legalization doesn't actually mutate the CFG.
+        self.domtree.clear();
+        self.loop_analysis.clear();
         if isa.get_mach_backend().is_some() {
             // Run some specific legalizations only.
             simple_legalize(&mut self.func, &mut self.cfg, isa);
-            Ok(())
+            self.verify_if(isa)
         } else {
-            // Legalization invalidates the domtree and loop_analysis by mutating the CFG.
-            // TODO: Avoid doing this when legalization doesn't actually mutate the CFG.
-            self.domtree.clear();
-            self.loop_analysis.clear();
             legalize_function(&mut self.func, &mut self.cfg, isa);
             debug!("Legalized:\n{}", self.func.display(isa));
             self.verify_if(isa)
diff --git a/cranelift/codegen/src/dce.rs b/cranelift/codegen/src/dce.rs
index 827ae98ec4..e3e855806d 100644
--- a/cranelift/codegen/src/dce.rs
+++ b/cranelift/codegen/src/dce.rs
@@ -6,48 +6,10 @@
 use crate::cursor::{Cursor, FuncCursor};
 use crate::dominator_tree::DominatorTree;
 use crate::entity::EntityRef;
-use crate::ir::instructions::InstructionData;
-use crate::ir::{DataFlowGraph, Function, Inst, Opcode};
+use crate::inst_predicates::{any_inst_results_used, has_side_effect};
+use crate::ir::Function;
 use crate::timing;
 
-/// Test whether the given opcode is unsafe to even consider for DCE.
-fn trivially_unsafe_for_dce(opcode: Opcode) -> bool {
-    opcode.is_call()
-        || opcode.is_branch()
-        || opcode.is_terminator()
-        || opcode.is_return()
-        || opcode.can_trap()
-        || opcode.other_side_effects()
-        || opcode.can_store()
-}
-
-/// Preserve instructions with used result values.
-fn any_inst_results_used(inst: Inst, live: &[bool], dfg: &DataFlowGraph) -> bool {
-    dfg.inst_results(inst).iter().any(|v| live[v.index()])
-}
-
-/// Load instructions without the `notrap` flag are defined to trap when
-/// operating on inaccessible memory, so we can't DCE them even if the
-/// loaded value is unused.
-fn is_load_with_defined_trapping(opcode: Opcode, data: &InstructionData) -> bool {
-    if !opcode.can_load() {
-        return false;
-    }
-    match *data {
-        InstructionData::StackLoad { .. } => false,
-        InstructionData::Load { flags, .. } => !flags.notrap(),
-        _ => true,
-    }
-}
-
-/// Does the given instruction have any side-effect that would preclude it from being removed when
-/// its value is unused?
-pub fn has_side_effect(func: &Function, inst: Inst) -> bool {
-    let data = &func.dfg[inst];
-    let opcode = data.opcode();
-    trivially_unsafe_for_dce(opcode) || is_load_with_defined_trapping(opcode, data)
-}
-
 /// Perform DCE on `func`.
 pub fn do_dce(func: &mut Function, domtree: &mut DominatorTree) {
     let _tt = timing::dce();
diff --git a/cranelift/codegen/src/inst_predicates.rs b/cranelift/codegen/src/inst_predicates.rs
new file mode 100644
index 0000000000..9cefbc38f9
--- /dev/null
+++ b/cranelift/codegen/src/inst_predicates.rs
@@ -0,0 +1,42 @@
+//! Instruction predicates/properties, shared by various analyses.
+
+use crate::ir::{DataFlowGraph, Function, Inst, InstructionData, Opcode};
+use cranelift_entity::EntityRef;
+
+/// Preserve instructions with used result values.
+pub fn any_inst_results_used(inst: Inst, live: &[bool], dfg: &DataFlowGraph) -> bool {
+    dfg.inst_results(inst).iter().any(|v| live[v.index()])
+}
+
+/// Test whether the given opcode is unsafe to even consider as side-effect-free.
+fn trivially_has_side_effects(opcode: Opcode) -> bool {
+    opcode.is_call()
+        || opcode.is_branch()
+        || opcode.is_terminator()
+        || opcode.is_return()
+        || opcode.can_trap()
+        || opcode.other_side_effects()
+        || opcode.can_store()
+}
+
+/// Load instructions without the `notrap` flag are defined to trap when
+/// operating on inaccessible memory, so we can't treat them as side-effect-free even if the loaded
+/// value is unused.
+fn is_load_with_defined_trapping(opcode: Opcode, data: &InstructionData) -> bool {
+    if !opcode.can_load() {
+        return false;
+    }
+    match *data {
+        InstructionData::StackLoad { .. } => false,
+        InstructionData::Load { flags, .. } => !flags.notrap(),
+        _ => true,
+    }
+}
+
+/// Does the given instruction have any side-effect that would preclude it from being removed when
+/// its value is unused?
+pub fn has_side_effect(func: &Function, inst: Inst) -> bool {
+    let data = &func.dfg[inst];
+    let opcode = data.opcode();
+    trivially_has_side_effects(opcode) || is_load_with_defined_trapping(opcode, data)
+}
diff --git a/cranelift/codegen/src/ir/function.rs b/cranelift/codegen/src/ir/function.rs
index 7e3cf71956..4a3829780b 100644
--- a/cranelift/codegen/src/ir/function.rs
+++ b/cranelift/codegen/src/ir/function.rs
@@ -3,8 +3,6 @@
 //! The `Function` struct defined in this module owns all of its basic blocks and
 //! instructions.
 
-#![allow(unused_imports)]
-
 use crate::binemit::CodeOffset;
 use crate::entity::{PrimaryMap, SecondaryMap};
 use crate::ir;
@@ -19,7 +17,6 @@ use crate::isa::{CallConv, EncInfo, Encoding, Legalize, TargetIsa};
 use crate::regalloc::{EntryRegDiversions, RegDiversions};
 use crate::value_label::ValueLabelsRanges;
 use crate::write::write_function;
-use alloc::boxed::Box;
 use core::fmt;
 
 /// A function.
diff --git a/cranelift/codegen/src/isa/arm64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs
similarity index 92%
rename from cranelift/codegen/src/isa/arm64/abi.rs
rename to cranelift/codegen/src/isa/aarch64/abi.rs
index 13abb6233a..88aa60f8af 100644
--- a/cranelift/codegen/src/isa/arm64/abi.rs
+++ b/cranelift/codegen/src/isa/aarch64/abi.rs
@@ -1,11 +1,11 @@
-//! Implementation of the standard ARM64 ABI.
+//! Implementation of the standard AArch64 ABI.
 
 use crate::ir;
 use crate::ir::types;
 use crate::ir::types::*;
 use crate::ir::StackSlot;
 use crate::isa;
-use crate::isa::arm64::inst::*;
+use crate::isa::aarch64::inst::*;
 use crate::machinst::*;
 use crate::settings;
 
@@ -15,19 +15,16 @@ use regalloc::{RealReg, Reg, RegClass, Set, SpillSlot, Writable};
 
 use log::debug;
 
-// A location for an argument or return value.
-#[derive(Clone, Debug)]
+/// A location for an argument or return value.
+#[derive(Clone, Copy, Debug)]
 enum ABIArg {
-    // In a real register.
+    /// In a real register.
     Reg(RealReg, ir::Type),
-    // Arguments only: on stack, at given offset from SP at entry.
+    /// Arguments only: on stack, at given offset from SP at entry.
     Stack(i64, ir::Type),
-    // (first and only) return value only: in memory pointed to by x8 on entry.
-    #[allow(dead_code)]
-    RetMem(ir::Type),
 }
 
-/// ARM64 ABI information shared between body (callee) and caller.
+/// AArch64 ABI information shared between body (callee) and caller.
 struct ABISig {
     args: Vec<ABIArg>,
     rets: Vec<ABIArg>,
@@ -161,11 +158,6 @@ impl ABISig {
         let (args, stack_arg_space) = compute_arg_locs(sig.call_conv, &sig.params);
         let (rets, _) = compute_arg_locs(sig.call_conv, &sig.returns);
 
-        // Verify that there are no arguments in return-memory area.
-        assert!(args.iter().all(|a| match a {
-            &ABIArg::RetMem(..) => false,
-            _ => true,
-        }));
         // Verify that there are no return values on the stack.
         assert!(rets.iter().all(|a| match a {
             &ABIArg::Stack(..) => false,
@@ -181,14 +173,21 @@ impl ABISig {
     }
 }
 
-/// ARM64 ABI object for a function body.
-pub struct ARM64ABIBody {
-    sig: ABISig,                       // signature: arg and retval regs
-    stackslots: Vec<usize>,            // offsets to each stackslot
-    stackslots_size: usize,            // total stack size of all stackslots
-    clobbered: Set<Writable<RealReg>>, // clobbered registers, from regalloc.
-    spillslots: Option<usize>,         // total number of spillslots, from regalloc.
-    frame_size: Option<usize>,
+/// AArch64 ABI object for a function body.
+pub struct AArch64ABIBody {
+    /// signature: arg and retval regs
+    sig: ABISig,
+    /// offsets to each stackslot
+    stackslots: Vec<u32>,
+    /// total stack size of all stackslots
+    stackslots_size: u32,
+    /// clobbered registers, from regalloc.
+    clobbered: Set<Writable<RealReg>>,
+    /// total number of spillslots, from regalloc.
+    spillslots: Option<usize>,
+    /// Total frame size.
+    frame_size: Option<u32>,
+    /// Calling convention this function expects.
     call_conv: isa::CallConv,
 }
 
@@ -207,20 +206,31 @@ fn in_vec_reg(ty: ir::Type) -> bool {
     }
 }
 
-impl ARM64ABIBody {
+impl AArch64ABIBody {
     /// Create a new body ABI instance.
     pub fn new(f: &ir::Function) -> Self {
-        debug!("ARM64 ABI: func signature {:?}", f.signature);
+        debug!("AArch64 ABI: func signature {:?}", f.signature);
 
         let sig = ABISig::from_func_sig(&f.signature);
 
+        let call_conv = f.signature.call_conv;
+        // Only these calling conventions are supported.
+        assert!(
+            call_conv == isa::CallConv::SystemV
+                || call_conv == isa::CallConv::Fast
+                || call_conv == isa::CallConv::Cold
+                || call_conv.extends_baldrdash(),
+            "Unsupported calling convention: {:?}",
+            call_conv
+        );
+
         // Compute stackslot locations and total stackslot size.
-        let mut stack_offset: usize = 0;
+        let mut stack_offset: u32 = 0;
         let mut stackslots = vec![];
         for (stackslot, data) in f.stack_slots.iter() {
             let off = stack_offset;
-            stack_offset += data.size as usize;
-            stack_offset = (stack_offset + 7) & !7usize;
+            stack_offset += data.size;
+            stack_offset = (stack_offset + 7) & !7;
             assert_eq!(stackslot.as_u32() as usize, stackslots.len());
             stackslots.push(off);
         }
@@ -232,7 +242,7 @@ impl ARM64ABIBody {
             clobbered: Set::empty(),
             spillslots: None,
             frame_size: None,
-            call_conv: f.signature.call_conv,
+            call_conv,
         }
     }
 }
@@ -264,7 +274,7 @@ fn load_stack(fp_offset: i64, into_reg: Writable<Reg>, ty: Type) -> Inst {
             mem,
             srcloc: None,
         },
-        _ => unimplemented!(),
+        _ => unimplemented!("load_stack({})", ty),
     }
 }
 
@@ -295,7 +305,7 @@ fn store_stack(fp_offset: i64, from_reg: Reg, ty: Type) -> Inst {
             mem,
             srcloc: None,
         },
-        _ => unimplemented!(),
+        _ => unimplemented!("store_stack({})", ty),
     }
 }
 
@@ -402,11 +412,13 @@ fn get_caller_saves_set(call_conv: isa::CallConv) -> Set<Writable<Reg>> {
     set
 }
 
-impl ABIBody<Inst> for ARM64ABIBody {
+impl ABIBody for AArch64ABIBody {
+    type I = Inst;
+
     fn liveins(&self) -> Set<RealReg> {
         let mut set: Set<RealReg> = Set::empty();
-        for arg in &self.sig.args {
-            if let &ABIArg::Reg(r, _) = arg {
+        for &arg in &self.sig.args {
+            if let ABIArg::Reg(r, _) = arg {
                 set.insert(r);
             }
         }
@@ -415,8 +427,8 @@ impl ABIBody<Inst> for ARM64ABIBody {
 
     fn liveouts(&self) -> Set<RealReg> {
         let mut set: Set<RealReg> = Set::empty();
-        for ret in &self.sig.rets {
-            if let &ABIArg::Reg(r, _) = ret {
+        for &ret in &self.sig.rets {
+            if let ABIArg::Reg(r, _) = ret {
                 set.insert(r);
             }
         }
@@ -439,7 +451,6 @@ impl ABIBody<Inst> for ARM64ABIBody {
         match &self.sig.args[idx] {
             &ABIArg::Reg(r, ty) => Inst::gen_move(into_reg, r.to_reg(), ty),
             &ABIArg::Stack(off, ty) => load_stack(off + 16, into_reg, ty),
-            _ => unimplemented!(),
         }
     }
 
@@ -447,7 +458,6 @@ impl ABIBody<Inst> for ARM64ABIBody {
         match &self.sig.rets[idx] {
             &ABIArg::Reg(r, ty) => Inst::gen_move(Writable::from_reg(r.to_reg()), from_reg, ty),
             &ABIArg::Stack(off, ty) => store_stack(off + 16, from_reg, ty),
-            _ => unimplemented!(),
         }
     }
 
@@ -470,7 +480,7 @@ impl ABIBody<Inst> for ARM64ABIBody {
     fn load_stackslot(
         &self,
         slot: StackSlot,
-        offset: usize,
+        offset: u32,
         ty: Type,
         into_reg: Writable<Reg>,
     ) -> Inst {
@@ -480,7 +490,7 @@ impl ABIBody<Inst> for ARM64ABIBody {
         load_stack(fp_off, into_reg, ty)
     }
 
-    fn store_stackslot(&self, slot: StackSlot, offset: usize, ty: Type, from_reg: Reg) -> Inst {
+    fn store_stackslot(&self, slot: StackSlot, offset: u32, ty: Type, from_reg: Reg) -> Inst {
         // Offset from beginning of stackslot area, which is at FP - stackslots_size.
         let stack_off = self.stackslots[slot.as_u32() as usize] as i64;
         let fp_off: i64 = -(self.stackslots_size as i64) + stack_off + (offset as i64);
@@ -532,13 +542,13 @@ impl ABIBody<Inst> for ARM64ABIBody {
             });
         }
 
-        let mut total_stacksize = self.stackslots_size + 8 * self.spillslots.unwrap();
+        let mut total_stacksize = self.stackslots_size + 8 * self.spillslots.unwrap() as u32;
         if self.call_conv.extends_baldrdash() {
             debug_assert!(
                 !flags.enable_probestack(),
                 "baldrdash does not expect cranelift to emit stack probes"
             );
-            total_stacksize += flags.baldrdash_prologue_words() as usize * 8;
+            total_stacksize += flags.baldrdash_prologue_words() as u32 * 8;
         }
         let total_stacksize = (total_stacksize + 15) & !15; // 16-align the stack.
 
@@ -692,7 +702,7 @@ impl ABIBody<Inst> for ARM64ABIBody {
 
     fn frame_size(&self) -> u32 {
         self.frame_size
-            .expect("frame size not computed before prologue generation") as u32
+            .expect("frame size not computed before prologue generation")
     }
 
     fn get_spillslot_size(&self, rc: RegClass, ty: Type) -> u32 {
@@ -719,8 +729,8 @@ enum CallDest {
     Reg(Reg),
 }
 
-/// ARM64 ABI object for a function call.
-pub struct ARM64ABICall {
+/// AArch64 ABI object for a function call.
+pub struct AArch64ABICall {
     sig: ABISig,
     uses: Set<Reg>,
     defs: Set<Writable<Reg>>,
@@ -751,16 +761,16 @@ fn abisig_to_uses_and_defs(sig: &ABISig) -> (Set<Reg>, Set<Writable<Reg>>) {
     (uses, defs)
 }
 
-impl ARM64ABICall {
+impl AArch64ABICall {
     /// Create a callsite ABI object for a call directly to the specified function.
     pub fn from_func(
         sig: &ir::Signature,
         extname: &ir::ExternalName,
         loc: ir::SourceLoc,
-    ) -> ARM64ABICall {
+    ) -> AArch64ABICall {
         let sig = ABISig::from_func_sig(sig);
         let (uses, defs) = abisig_to_uses_and_defs(&sig);
-        ARM64ABICall {
+        AArch64ABICall {
             sig,
             uses,
             defs,
@@ -777,10 +787,10 @@ impl ARM64ABICall {
         ptr: Reg,
         loc: ir::SourceLoc,
         opcode: ir::Opcode,
-    ) -> ARM64ABICall {
+    ) -> AArch64ABICall {
         let sig = ABISig::from_func_sig(sig);
         let (uses, defs) = abisig_to_uses_and_defs(&sig);
-        ARM64ABICall {
+        AArch64ABICall {
             sig,
             uses,
             defs,
@@ -820,7 +830,9 @@ fn adjust_stack(amt: u64, is_sub: bool) -> Vec<Inst> {
     }
 }
 
-impl ABICall<Inst> for ARM64ABICall {
+impl ABICall for AArch64ABICall {
+    type I = Inst;
+
     fn num_args(&self) -> usize {
         self.sig.args.len()
     }
@@ -841,14 +853,12 @@ impl ABICall<Inst> for ARM64ABICall {
                 mem: MemArg::SPOffset(off),
                 srcloc: None,
             },
-            _ => unimplemented!(),
         }
     }
 
     fn gen_copy_retval_to_reg(&self, idx: usize, into_reg: Writable<Reg>) -> Inst {
         match &self.sig.rets[idx] {
             &ABIArg::Reg(reg, ty) => Inst::gen_move(into_reg, reg.to_reg(), ty),
-            &ABIArg::RetMem(..) => panic!("Return-memory area not yet supported"),
             _ => unimplemented!(),
         }
     }
diff --git a/cranelift/codegen/src/isa/arm64/inst/args.rs b/cranelift/codegen/src/isa/aarch64/inst/args.rs
similarity index 78%
rename from cranelift/codegen/src/isa/arm64/inst/args.rs
rename to cranelift/codegen/src/isa/aarch64/inst/args.rs
index 75cf12283b..b83f375bcf 100644
--- a/cranelift/codegen/src/isa/arm64/inst/args.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/args.rs
@@ -1,49 +1,34 @@
-//! ARM64 ISA definitions: instruction arguments.
+//! AArch64 ISA definitions: instruction arguments.
 
+// Some variants are never constructed, but we still want them as options in the future.
 #![allow(dead_code)]
-#![allow(non_snake_case)]
 
-use crate::binemit::{CodeOffset, CodeSink};
-use crate::ir::constant::{ConstantData, ConstantOffset};
+use crate::binemit::CodeOffset;
 use crate::ir::Type;
-use crate::isa::arm64::inst::*;
-use crate::machinst::*;
+use crate::isa::aarch64::inst::*;
 
-use regalloc::{
-    RealReg, RealRegUniverse, Reg, RegClass, RegClassInfo, SpillSlot, VirtualReg, Writable,
-    NUM_REG_CLASSES,
-};
+use regalloc::{RealRegUniverse, Reg, Writable};
 
-use std::string::{String, ToString};
+use core::convert::{Into, TryFrom};
+use std::string::String;
 
 /// A shift operator for a register or immediate.
 #[derive(Clone, Copy, Debug)]
+#[repr(u8)]
 pub enum ShiftOp {
-    ASR,
-    LSR,
-    LSL,
-    ROR,
+    LSL = 0b00,
+    LSR = 0b01,
+    ASR = 0b10,
+    ROR = 0b11,
 }
 
 impl ShiftOp {
     /// Get the encoding of this shift op.
-    pub fn bits(&self) -> u8 {
-        match self {
-            &ShiftOp::LSL => 0b00,
-            &ShiftOp::LSR => 0b01,
-            &ShiftOp::ASR => 0b10,
-            &ShiftOp::ROR => 0b11,
-        }
+    pub fn bits(self) -> u8 {
+        self as u8
     }
 }
 
-/// A shift operator with an amount, guaranteed to be within range.
-#[derive(Clone, Debug)]
-pub struct ShiftOpAndAmt {
-    op: ShiftOp,
-    shift: ShiftOpShiftImm,
-}
-
 /// A shift operator amount.
 #[derive(Clone, Copy, Debug)]
 pub struct ShiftOpShiftImm(u8);
@@ -62,11 +47,18 @@ impl ShiftOpShiftImm {
     }
 
     /// Return the shift amount.
-    pub fn value(&self) -> u8 {
+    pub fn value(self) -> u8 {
         self.0
     }
 }
 
+/// A shift operator with an amount, guaranteed to be within range.
+#[derive(Clone, Debug)]
+pub struct ShiftOpAndAmt {
+    op: ShiftOp,
+    shift: ShiftOpShiftImm,
+}
+
 impl ShiftOpAndAmt {
     pub fn new(op: ShiftOp, shift: ShiftOpShiftImm) -> ShiftOpAndAmt {
         ShiftOpAndAmt { op, shift }
@@ -74,7 +66,7 @@ impl ShiftOpAndAmt {
 
     /// Get the shift op.
     pub fn op(&self) -> ShiftOp {
-        self.op.clone()
+        self.op
     }
 
     /// Get the shift amount.
@@ -85,30 +77,22 @@ impl ShiftOpAndAmt {
 
 /// An extend operator for a register.
 #[derive(Clone, Copy, Debug)]
+#[repr(u8)]
 pub enum ExtendOp {
-    SXTB,
-    SXTH,
-    SXTW,
-    SXTX,
-    UXTB,
-    UXTH,
-    UXTW,
-    UXTX,
+    UXTB = 0b000,
+    UXTH = 0b001,
+    UXTW = 0b010,
+    UXTX = 0b011,
+    SXTB = 0b100,
+    SXTH = 0b101,
+    SXTW = 0b110,
+    SXTX = 0b111,
 }
 
 impl ExtendOp {
     /// Encoding of this op.
-    pub fn bits(&self) -> u8 {
-        match self {
-            &ExtendOp::UXTB => 0b000,
-            &ExtendOp::UXTH => 0b001,
-            &ExtendOp::UXTW => 0b010,
-            &ExtendOp::UXTX => 0b011,
-            &ExtendOp::SXTB => 0b100,
-            &ExtendOp::SXTH => 0b101,
-            &ExtendOp::SXTW => 0b110,
-            &ExtendOp::SXTX => 0b111,
-        }
+    pub fn bits(self) -> u8 {
+        self as u8
     }
 }
 
@@ -128,18 +112,34 @@ pub enum MemLabel {
 #[derive(Clone, Debug)]
 pub enum MemArg {
     Label(MemLabel),
+    /// "post-indexed" mode as per AArch64 docs: postincrement reg after address computation.
     PostIndexed(Writable<Reg>, SImm9),
+    /// "pre-indexed" mode as per AArch64 docs: preincrement reg before address computation.
     PreIndexed(Writable<Reg>, SImm9),
+
     // N.B.: RegReg, RegScaled, and RegScaledExtended all correspond to
     // what the ISA calls the "register offset" addressing mode. We split out
     // several options here for more ergonomic codegen.
+    /// Register plus register offset.
     RegReg(Reg, Reg),
+
+    /// Register plus register offset, scaled by type's size.
     RegScaled(Reg, Reg, Type),
+
+    /// Register plus register offset, scaled by type's size, with index sign- or zero-extended
+    /// first.
     RegScaledExtended(Reg, Reg, Type, ExtendOp),
+
+    /// Unscaled signed 9-bit immediate offset from reg.
     Unscaled(Reg, SImm9),
+
+    /// Scaled (by size of a type) unsigned 12-bit immediate offset from reg.
     UnsignedOffset(Reg, UImm12Scaled),
-    /// Offset from the stack pointer or frame pointer.
+
+    /// Offset from the stack pointer. Lowered into a real amode at emission.
     SPOffset(i64),
+
+    /// Offset from the frame pointer. Lowered into a real amode at emission.
     FPOffset(i64),
 }
 
@@ -153,9 +153,7 @@ impl MemArg {
 
     /// Memory reference using an address in a register and an offset, if possible.
     pub fn reg_maybe_offset(reg: Reg, offset: i64, value_type: Type) -> Option<MemArg> {
-        if offset == 0 {
-            Some(MemArg::Unscaled(reg, SImm9::zero()))
-        } else if let Some(simm9) = SImm9::maybe_from_i64(offset) {
+        if let Some(simm9) = SImm9::maybe_from_i64(offset) {
             Some(MemArg::Unscaled(reg, simm9))
         } else if let Some(uimm12s) = UImm12Scaled::maybe_from_i64(offset, value_type) {
             Some(MemArg::UnsignedOffset(reg, uimm12s))
@@ -165,17 +163,18 @@ impl MemArg {
     }
 
     /// Memory reference using the sum of two registers as an address.
-    pub fn reg_reg(reg1: Reg, reg2: Reg) -> MemArg {
+    pub fn reg_plus_reg(reg1: Reg, reg2: Reg) -> MemArg {
         MemArg::RegReg(reg1, reg2)
     }
 
     /// Memory reference using `reg1 + sizeof(ty) * reg2` as an address.
-    pub fn reg_reg_scaled(reg1: Reg, reg2: Reg, ty: Type) -> MemArg {
+    pub fn reg_plus_reg_scaled(reg1: Reg, reg2: Reg, ty: Type) -> MemArg {
         MemArg::RegScaled(reg1, reg2, ty)
     }
 
-    /// Memory reference using `reg1 + sizeof(ty) * reg2` as an address.
-    pub fn reg_reg_scaled_extended(reg1: Reg, reg2: Reg, ty: Type, op: ExtendOp) -> MemArg {
+    /// Memory reference using `reg1 + sizeof(ty) * reg2` as an address, with `reg2` sign- or
+    /// zero-extended as per `op`.
+    pub fn reg_plus_reg_scaled_extended(reg1: Reg, reg2: Reg, ty: Type, op: ExtendOp) -> MemArg {
         MemArg::RegScaledExtended(reg1, reg2, ty, op)
     }
 
@@ -199,23 +198,24 @@ pub enum PairMemArg {
 
 /// Condition for conditional branches.
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
+#[repr(u8)]
 pub enum Cond {
-    Eq,
-    Ne,
-    Hs,
-    Lo,
-    Mi,
-    Pl,
-    Vs,
-    Vc,
-    Hi,
-    Ls,
-    Ge,
-    Lt,
-    Gt,
-    Le,
-    Al,
-    Nv,
+    Eq = 0,
+    Ne = 1,
+    Hs = 2,
+    Lo = 3,
+    Mi = 4,
+    Pl = 5,
+    Vs = 6,
+    Vc = 7,
+    Hi = 8,
+    Ls = 9,
+    Ge = 10,
+    Lt = 11,
+    Gt = 12,
+    Le = 13,
+    Al = 14,
+    Nv = 15,
 }
 
 impl Cond {
@@ -224,18 +224,25 @@ impl Cond {
         match self {
             Cond::Eq => Cond::Ne,
             Cond::Ne => Cond::Eq,
+
             Cond::Hs => Cond::Lo,
             Cond::Lo => Cond::Hs,
+
             Cond::Mi => Cond::Pl,
             Cond::Pl => Cond::Mi,
+
             Cond::Vs => Cond::Vc,
             Cond::Vc => Cond::Vs,
+
             Cond::Hi => Cond::Ls,
             Cond::Ls => Cond::Hi,
+
             Cond::Ge => Cond::Lt,
             Cond::Lt => Cond::Ge,
+
             Cond::Gt => Cond::Le,
             Cond::Le => Cond::Gt,
+
             Cond::Al => Cond::Nv,
             Cond::Nv => Cond::Al,
         }
@@ -243,24 +250,7 @@ impl Cond {
 
     /// Return the machine encoding of this condition.
     pub fn bits(self) -> u32 {
-        match self {
-            Cond::Eq => 0,
-            Cond::Ne => 1,
-            Cond::Hs => 2,
-            Cond::Lo => 3,
-            Cond::Mi => 4,
-            Cond::Pl => 5,
-            Cond::Vs => 6,
-            Cond::Vc => 7,
-            Cond::Hi => 8,
-            Cond::Ls => 9,
-            Cond::Ge => 10,
-            Cond::Lt => 11,
-            Cond::Gt => 12,
-            Cond::Le => 13,
-            Cond::Al => 14,
-            Cond::Nv => 15,
-        }
+        self as u32
     }
 }
 
@@ -305,7 +295,7 @@ impl BranchTarget {
     pub fn lower(&mut self, targets: &[CodeOffset], my_offset: CodeOffset) {
         match self {
             &mut BranchTarget::Block(bix) => {
-                let bix = bix as usize;
+                let bix = usize::try_from(bix).unwrap();
                 assert!(bix < targets.len());
                 let block_offset_in_func = targets[bix];
                 let branch_offset = (block_offset_in_func as isize) - (my_offset as isize);
@@ -343,7 +333,7 @@ impl BranchTarget {
         }
     }
 
-    /// Get the offset as a 16-bit offset, or `None` if overflow.
+    /// Get the offset as a 19-bit offset, or `None` if overflow.
     pub fn as_off19(&self) -> Option<u32> {
         let off = self.as_offset_words();
         if (off < (1 << 18)) && (off >= -(1 << 18)) {
@@ -357,7 +347,7 @@ impl BranchTarget {
     pub fn map(&mut self, block_index_map: &[BlockIndex]) {
         match self {
             &mut BranchTarget::Block(ref mut bix) => {
-                let n = block_index_map[*bix as usize];
+                let n = block_index_map[usize::try_from(*bix).unwrap()];
                 *bix = n;
             }
             &mut BranchTarget::ResolvedOffset(_) => {}
@@ -392,7 +382,7 @@ fn shift_for_type(ty: Type) -> usize {
         4 => 2,
         8 => 3,
         16 => 4,
-        _ => panic!("unknown type"),
+        _ => panic!("unknown type: {}", ty),
     }
 }
 
@@ -427,15 +417,15 @@ impl ShowWithRRU for MemArg {
             }
             &MemArg::RegScaledExtended(r1, r2, ty, op) => {
                 let shift = shift_for_type(ty);
-                let is32 = match op {
-                    ExtendOp::SXTW | ExtendOp::UXTW => true,
-                    _ => false,
+                let size = match op {
+                    ExtendOp::SXTW | ExtendOp::UXTW => InstSize::Size32,
+                    _ => InstSize::Size64,
                 };
                 let op = op.show_rru(mb_rru);
                 format!(
                     "[{}, {}, {} #{}]",
                     r1.show_rru(mb_rru),
-                    show_ireg_sized(r2, mb_rru, is32),
+                    show_ireg_sized(r2, mb_rru, size),
                     op,
                     shift
                 )
@@ -499,3 +489,40 @@ impl ShowWithRRU for BranchTarget {
         }
     }
 }
+
+/// Type used to communicate the operand size of a machine instruction, as AArch64 has 32- and
+/// 64-bit variants of many instructions (and integer registers).
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum InstSize {
+    Size32,
+    Size64,
+}
+
+impl InstSize {
+    /// 32-bit case?
+    pub fn is32(self) -> bool {
+        self == InstSize::Size32
+    }
+    /// 64-bit case?
+    pub fn is64(self) -> bool {
+        self == InstSize::Size64
+    }
+    /// Convert from an `is32` boolean flag to an `InstSize`.
+    pub fn from_is32(is32: bool) -> InstSize {
+        if is32 {
+            InstSize::Size32
+        } else {
+            InstSize::Size64
+        }
+    }
+    /// Convert from a needed width to the smallest size that fits.
+    pub fn from_bits<I: Into<usize>>(bits: I) -> InstSize {
+        let bits: usize = bits.into();
+        assert!(bits <= 64);
+        if bits <= 32 {
+            InstSize::Size32
+        } else {
+            InstSize::Size64
+        }
+    }
+}
diff --git a/cranelift/codegen/src/isa/arm64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs
similarity index 97%
rename from cranelift/codegen/src/isa/arm64/inst/emit.rs
rename to cranelift/codegen/src/isa/aarch64/inst/emit.rs
index 20eefdeaae..f01746543c 100644
--- a/cranelift/codegen/src/isa/arm64/inst/emit.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs
@@ -1,22 +1,14 @@
-//! ARM64 ISA: binary code emission.
+//! AArch64 ISA: binary code emission.
 
-#![allow(dead_code)]
-#![allow(non_snake_case)]
-
-use crate::binemit::{CodeOffset, CodeSink, Reloc};
+use crate::binemit::{CodeOffset, Reloc};
 use crate::ir::constant::ConstantData;
 use crate::ir::types::*;
-use crate::ir::{Opcode, TrapCode, Type};
-use crate::isa::arm64::inst::*;
-use crate::machinst::*;
-use cranelift_entity::EntityRef;
+use crate::ir::TrapCode;
+use crate::isa::aarch64::inst::*;
 
-use std::env;
+use core::convert::TryFrom;
 
-use regalloc::{
-    RealReg, RealRegUniverse, Reg, RegClass, RegClassInfo, SpillSlot, VirtualReg, Writable,
-    NUM_REG_CLASSES,
-};
+use regalloc::{Reg, RegClass, Writable};
 
 use alloc::vec::Vec;
 
@@ -66,16 +58,7 @@ pub fn mem_finalize(insn_off: CodeOffset, mem: &MemArg) -> (Vec<Inst>, MemArg) {
 
 /// Helper: get a ConstantData from a u64.
 pub fn u64_constant(bits: u64) -> ConstantData {
-    let data = [
-        (bits & 0xff) as u8,
-        ((bits >> 8) & 0xff) as u8,
-        ((bits >> 16) & 0xff) as u8,
-        ((bits >> 24) & 0xff) as u8,
-        ((bits >> 32) & 0xff) as u8,
-        ((bits >> 40) & 0xff) as u8,
-        ((bits >> 48) & 0xff) as u8,
-        ((bits >> 56) & 0xff) as u8,
-    ];
+    let data = bits.to_le_bytes();
     ConstantData::from(&data[..])
 }
 
@@ -84,41 +67,42 @@ pub fn u64_constant(bits: u64) -> ConstantData {
 
 fn machreg_to_gpr(m: Reg) -> u32 {
     assert!(m.get_class() == RegClass::I64);
-    assert!(m.is_real());
-    m.to_real_reg().get_hw_encoding() as u32
+    u32::try_from(m.to_real_reg().get_hw_encoding()).unwrap()
 }
 
 fn machreg_to_vec(m: Reg) -> u32 {
     assert!(m.get_class() == RegClass::V128);
-    assert!(m.is_real());
-    m.to_real_reg().get_hw_encoding() as u32
+    u32::try_from(m.to_real_reg().get_hw_encoding()).unwrap()
 }
 
 fn machreg_to_gpr_or_vec(m: Reg) -> u32 {
-    m.to_real_reg().get_hw_encoding() as u32
+    u32::try_from(m.to_real_reg().get_hw_encoding()).unwrap()
 }
 
-fn enc_arith_rrr(bits_31_21: u16, bits_15_10: u8, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32 {
-    ((bits_31_21 as u32) << 21)
-        | ((bits_15_10 as u32) << 10)
+fn enc_arith_rrr(bits_31_21: u32, bits_15_10: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32 {
+    (bits_31_21 << 21)
+        | (bits_15_10 << 10)
         | machreg_to_gpr(rd.to_reg())
         | (machreg_to_gpr(rn) << 5)
         | (machreg_to_gpr(rm) << 16)
 }
 
-fn enc_arith_rr_imm12(bits_31_24: u8, immshift: u8, imm12: u16, rn: Reg, rd: Writable<Reg>) -> u32 {
-    ((bits_31_24 as u32) << 24)
-        | ((immshift as u32) << 22)
-        | ((imm12 as u32) << 10)
+fn enc_arith_rr_imm12(
+    bits_31_24: u32,
+    immshift: u32,
+    imm12: u32,
+    rn: Reg,
+    rd: Writable<Reg>,
+) -> u32 {
+    (bits_31_24 << 24)
+        | (immshift << 22)
+        | (imm12 << 10)
         | (machreg_to_gpr(rn) << 5)
         | machreg_to_gpr(rd.to_reg())
 }
 
-fn enc_arith_rr_imml(bits_31_23: u16, imm_bits: u16, rn: Reg, rd: Writable<Reg>) -> u32 {
-    ((bits_31_23 as u32) << 23)
-        | ((imm_bits as u32) << 10)
-        | (machreg_to_gpr(rn) << 5)
-        | machreg_to_gpr(rd.to_reg())
+fn enc_arith_rr_imml(bits_31_23: u32, imm_bits: u32, rn: Reg, rd: Writable<Reg>) -> u32 {
+    (bits_31_23 << 23) | (imm_bits << 10) | (machreg_to_gpr(rn) << 5) | machreg_to_gpr(rd.to_reg())
 }
 
 fn enc_arith_rrrr(top11: u32, rm: Reg, bit15: u32, ra: Reg, rn: Reg, rd: Writable<Reg>) -> u32 {
@@ -159,8 +143,8 @@ fn enc_move_wide(op: MoveWideOpcode, rd: Writable<Reg>, imm: MoveWideConst) -> u
     assert!(imm.shift <= 0b11);
     MOVE_WIDE_FIXED
         | (op as u32) << 29
-        | (imm.shift as u32) << 21
-        | (imm.bits as u32) << 5
+        | u32::from(imm.shift) << 21
+        | u32::from(imm.bits) << 5
         | machreg_to_gpr(rd.to_reg())
 }
 
@@ -201,7 +185,7 @@ fn enc_ldst_reg(
         Some(ExtendOp::UXTW) => 0b010,
         Some(ExtendOp::SXTW) => 0b110,
         Some(ExtendOp::SXTX) => 0b111,
-        None => 0b011, /* LSL */
+        None => 0b011, // LSL
         _ => panic!("bad extend mode for ld/st MemArg"),
     };
     (op_31_22 << 22)
@@ -244,7 +228,7 @@ fn enc_br(rn: Reg) -> u32 {
 }
 
 fn enc_adr(off: i32, rd: Writable<Reg>) -> u32 {
-    let off = off as u32;
+    let off = u32::try_from(off).unwrap();
     let immlo = off & 3;
     let immhi = (off >> 2) & ((1 << 19) - 1);
     (0b00010000 << 24) | (immlo << 29) | (immhi << 5) | machreg_to_gpr(rd.to_reg())
@@ -258,8 +242,8 @@ fn enc_csel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond) -> u32 {
         | (cond.bits() << 12)
 }
 
-fn enc_fcsel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond, is32: bool) -> u32 {
-    let ty_bit = if is32 { 0 } else { 1 };
+fn enc_fcsel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond, size: InstSize) -> u32 {
+    let ty_bit = if size.is32() { 0 } else { 1 };
     0b000_11110_00_1_00000_0000_11_00000_00000
         | (machreg_to_vec(rm) << 16)
         | (machreg_to_vec(rn) << 5)
@@ -301,8 +285,8 @@ fn enc_fpurrrr(top17: u32, rd: Writable<Reg>, rn: Reg, rm: Reg, ra: Reg) -> u32
         | machreg_to_vec(rd.to_reg())
 }
 
-fn enc_fcmp(is32: bool, rn: Reg, rm: Reg) -> u32 {
-    let bits = if is32 {
+fn enc_fcmp(size: InstSize, rn: Reg, rm: Reg) -> u32 {
+    let bits = if size.is32() {
         0b000_11110_00_1_00000_00_1000_00000_00000
     } else {
         0b000_11110_01_1_00000_00_1000_00000_00000
@@ -359,7 +343,7 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
                     | ALUOp::SMulH
                     | ALUOp::UMulH => {
                         //// RRRR ops.
-                        panic!("Bad ALUOp in RRR form!");
+                        panic!("Bad ALUOp {:?} in RRR form!", alu_op);
                     }
                 };
                 let bit15_10 = match alu_op {
@@ -450,14 +434,14 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
             } => {
                 let amt = immshift.value();
                 let (top10, immr, imms) = match alu_op {
-                    ALUOp::RotR32 => (0b0001001110, machreg_to_gpr(rn), amt as u32),
-                    ALUOp::RotR64 => (0b1001001111, machreg_to_gpr(rn), amt as u32),
-                    ALUOp::Lsr32 => (0b0101001100, amt as u32, 0b011111),
-                    ALUOp::Lsr64 => (0b1101001101, amt as u32, 0b111111),
-                    ALUOp::Asr32 => (0b0001001100, amt as u32, 0b011111),
-                    ALUOp::Asr64 => (0b1001001101, amt as u32, 0b111111),
-                    ALUOp::Lsl32 => (0b0101001100, (32 - amt) as u32, (31 - amt) as u32),
-                    ALUOp::Lsl64 => (0b1101001101, (64 - amt) as u32, (63 - amt) as u32),
+                    ALUOp::RotR32 => (0b0001001110, machreg_to_gpr(rn), u32::from(amt)),
+                    ALUOp::RotR64 => (0b1001001111, machreg_to_gpr(rn), u32::from(amt)),
+                    ALUOp::Lsr32 => (0b0101001100, u32::from(amt), 0b011111),
+                    ALUOp::Lsr64 => (0b1101001101, u32::from(amt), 0b111111),
+                    ALUOp::Asr32 => (0b0001001100, u32::from(amt), 0b011111),
+                    ALUOp::Asr64 => (0b1001001101, u32::from(amt), 0b111111),
+                    ALUOp::Lsl32 => (0b0101001100, u32::from(32 - amt), u32::from(31 - amt)),
+                    ALUOp::Lsl64 => (0b1101001101, u32::from(64 - amt), u32::from(63 - amt)),
                     _ => unimplemented!("{:?}", alu_op),
                 };
                 sink.put4(
@@ -476,7 +460,7 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
                 rm,
                 ref shiftop,
             } => {
-                let top11: u16 = match alu_op {
+                let top11: u32 = match alu_op {
                     ALUOp::Add32 => 0b000_01011000,
                     ALUOp::Add64 => 0b100_01011000,
                     ALUOp::AddS32 => 0b001_01011000,
@@ -499,8 +483,8 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
                     ALUOp::AndNot64 => 0b100_01010001,
                     _ => unimplemented!("{:?}", alu_op),
                 };
-                let top11 = top11 | ((shiftop.op().bits() as u16) << 1);
-                let bits_15_10 = shiftop.amt().value();
+                let top11 = top11 | (u32::from(shiftop.op().bits()) << 1);
+                let bits_15_10 = u32::from(shiftop.amt().value());
                 sink.put4(enc_arith_rrr(top11, bits_15_10, rd, rn, rm));
             }
 
@@ -511,7 +495,7 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
                 rm,
                 extendop,
             } => {
-                let top11 = match alu_op {
+                let top11: u32 = match alu_op {
                     ALUOp::Add32 => 0b00001011001,
                     ALUOp::Add64 => 0b10001011001,
                     ALUOp::Sub32 => 0b01001011001,
@@ -522,12 +506,12 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
                     ALUOp::SubS64 => 0b11101011001,
                     _ => unimplemented!("{:?}", alu_op),
                 };
-                let bits_15_10 = extendop.bits() << 3;
+                let bits_15_10 = u32::from(extendop.bits()) << 3;
                 sink.put4(enc_arith_rrr(top11, bits_15_10, rd, rn, rm));
             }
 
             &Inst::BitRR { op, rd, rn, .. } => {
-                let size = if op.is_32_bit() { 0b0 } else { 0b1 };
+                let size = if op.inst_size().is32() { 0b0 } else { 0b1 };
                 let (op1, op2) = match op {
                     BitOp::RBit32 | BitOp::RBit64 => (0b00000, 0b000000),
                     BitOp::Clz32 | BitOp::Clz64 => (0b00000, 0b000100),
@@ -655,6 +639,7 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
                     }
                     &MemArg::Label(ref label) => {
                         let offset = match label {
+                            // cast i32 to u32 (two's-complement)
                             &MemLabel::PCRel(off) => off as u32,
                         } / 4;
                         assert!(offset < (1 << 19));
@@ -825,10 +810,16 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
             &Inst::Mov { rd, rm } => {
                 assert!(rd.to_reg().get_class() == rm.get_class());
                 assert!(rm.get_class() == RegClass::I64);
+                // MOV to SP is interpreted as MOV to XZR instead. And our codegen
+                // should never MOV to XZR.
+                assert!(machreg_to_gpr(rd.to_reg()) != 31);
                 // Encoded as ORR rd, rm, zero.
                 sink.put4(enc_arith_rrr(0b10101010_000, 0b000_000, rd, zero_reg(), rm));
             }
             &Inst::Mov32 { rd, rm } => {
+                // MOV to SP is interpreted as MOV to XZR instead. And our codegen
+                // should never MOV to XZR.
+                assert!(machreg_to_gpr(rd.to_reg()) != 31);
                 // Encoded as ORR rd, rm, zero.
                 sink.put4(enc_arith_rrr(0b00101010_000, 0b000_000, rd, zero_reg(), rm));
             }
@@ -888,10 +879,10 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
                 sink.put4(enc_fpurrrr(top17, rd, rn, rm, ra));
             }
             &Inst::FpuCmp32 { rn, rm } => {
-                sink.put4(enc_fcmp(/* is32 = */ true, rn, rm));
+                sink.put4(enc_fcmp(InstSize::Size32, rn, rm));
             }
             &Inst::FpuCmp64 { rn, rm } => {
-                sink.put4(enc_fcmp(/* is32 = */ false, rn, rm));
+                sink.put4(enc_fcmp(InstSize::Size64, rn, rm));
             }
             &Inst::FpuToInt { op, rd, rn } => {
                 let top16 = match op {
@@ -962,10 +953,10 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
                 sink.put8(const_data.to_bits());
             }
             &Inst::FpuCSel32 { rd, rn, rm, cond } => {
-                sink.put4(enc_fcsel(rd, rn, rm, cond, /* is32 = */ true));
+                sink.put4(enc_fcsel(rd, rn, rm, cond, InstSize::Size32));
             }
             &Inst::FpuCSel64 { rd, rn, rm, cond } => {
-                sink.put4(enc_fcsel(rd, rn, rm, cond, /* is32 = */ false));
+                sink.put4(enc_fcsel(rd, rn, rm, cond, InstSize::Size64));
             }
             &Inst::FpuRound { op, rd, rn } => {
                 let top22 = match op {
@@ -1093,10 +1084,10 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
                 // do early (fake) emission for size computation.
                 sink.put4(enc_jump26(0b000101, dest.as_off26().unwrap()));
             }
-            &Inst::Ret {} => {
+            &Inst::Ret => {
                 sink.put4(0xd65f03c0);
             }
-            &Inst::EpiloguePlaceholder {} => {
+            &Inst::EpiloguePlaceholder => {
                 // Noop; this is just a placeholder for epilogues.
             }
             &Inst::Call {
@@ -1168,7 +1159,7 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
             &Inst::IndirectBr { rn, .. } => {
                 sink.put4(enc_br(rn));
             }
-            &Inst::Nop => {}
+            &Inst::Nop0 => {}
             &Inst::Nop4 => {
                 sink.put4(0xd503201f);
             }
@@ -1204,7 +1195,7 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
                 // the middle; we depend on hardcoded PC-rel addressing below.
                 //
                 // N.B.: if PC-rel addressing on ADR below is changed, also update
-                // `Inst::with_block_offsets()` in arm64/inst/mod.rs.
+                // `Inst::with_block_offsets()` in aarch64/inst/mod.rs.
 
                 // Save index in a tmp (the live range of ridx only goes to start of this
                 // sequence; rtmp1 or rtmp2 may overwrite it).
@@ -1219,7 +1210,7 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
                 // Load value out of jump table
                 let inst = Inst::SLoad32 {
                     rd: rtmp2,
-                    mem: MemArg::reg_reg_scaled_extended(
+                    mem: MemArg::reg_plus_reg_scaled_extended(
                         rtmp1.to_reg(),
                         rtmp2.to_reg(),
                         I32,
@@ -1246,7 +1237,9 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
                 // Emit jump table (table of 32-bit offsets).
                 for target in targets {
                     let off = target.as_offset_words() * 4;
-                    let off = off as i32 as u32;
+                    let off = i32::try_from(off).unwrap();
+                    // cast i32 to u32 (two's-complement)
+                    let off = off as u32;
                     sink.put4(off);
                 }
             }
@@ -1292,7 +1285,7 @@ mod test {
     use crate::isa::test_utils;
 
     #[test]
-    fn test_arm64_binemit() {
+    fn test_aarch64_binemit() {
         let mut insns = Vec::<(Inst, &str, &str)>::new();
 
         // N.B.: the architecture is little-endian, so when transcribing the 32-bit
@@ -1310,10 +1303,10 @@ mod test {
         //
         // Then:
         //
-        //      $ echo "mov x1, x2" | arm64inst.sh
-        insns.push((Inst::Ret {}, "C0035FD6", "ret"));
-        insns.push((Inst::Nop {}, "", "nop-zero-len"));
-        insns.push((Inst::Nop4 {}, "1F2003D5", "nop"));
+        //      $ echo "mov x1, x2" | aarch64inst.sh
+        insns.push((Inst::Ret, "C0035FD6", "ret"));
+        insns.push((Inst::Nop0, "", "nop-zero-len"));
+        insns.push((Inst::Nop4, "1F2003D5", "nop"));
         insns.push((
             Inst::AluRRR {
                 alu_op: ALUOp::Add32,
@@ -4052,7 +4045,7 @@ mod test {
         let rru = create_reg_universe();
         for (insn, expected_encoding, expected_printing) in insns {
             println!(
-                "ARM64: {:?}, {}, {}",
+                "AArch64: {:?}, {}, {}",
                 insn, expected_encoding, expected_printing
             );
 
diff --git a/cranelift/codegen/src/isa/arm64/inst/imms.rs b/cranelift/codegen/src/isa/aarch64/inst/imms.rs
similarity index 91%
rename from cranelift/codegen/src/isa/arm64/inst/imms.rs
rename to cranelift/codegen/src/isa/aarch64/inst/imms.rs
index eda68af7b1..7230b4f44e 100644
--- a/cranelift/codegen/src/isa/arm64/inst/imms.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/imms.rs
@@ -1,8 +1,7 @@
-//! ARM64 ISA definitions: immediate constants.
-
-#![allow(dead_code)]
-#![allow(non_snake_case)]
+//! AArch64 ISA definitions: immediate constants.
 
+// Some variants are never constructed, but we still want them as options in the future.
+#[allow(dead_code)]
 use crate::ir::types::*;
 use crate::ir::Type;
 use crate::machinst::*;
@@ -28,12 +27,12 @@ impl SImm7Scaled {
         assert!(scale_ty == I64 || scale_ty == I32);
         let scale = scale_ty.bytes();
         assert!(scale.is_power_of_two());
-        let scale = scale as i64;
+        let scale = i64::from(scale);
         let upper_limit = 63 * scale;
         let lower_limit = -(64 * scale);
         if value >= lower_limit && value <= upper_limit && (value & (scale - 1)) == 0 {
             Some(SImm7Scaled {
-                value: value as i16,
+                value: i16::try_from(value).unwrap(),
                 scale_ty,
             })
         } else {
@@ -48,7 +47,12 @@ impl SImm7Scaled {
 
     /// Bits for encoding.
     pub fn bits(&self) -> u32 {
-        ((self.value / self.scale_ty.bytes() as i16) as u32) & 0x7f
+        let ty_bytes: i16 = self.scale_ty.bytes() as i16;
+        let scaled: i16 = self.value / ty_bytes;
+        assert!(scaled <= 63 && scaled >= -64);
+        let scaled: i8 = scaled as i8;
+        let encoded: u32 = scaled as u32;
+        encoded & 0x7f
     }
 }
 
@@ -125,7 +129,7 @@ impl UImm12Scaled {
 #[derive(Clone, Debug)]
 pub struct Imm12 {
     /// The immediate bits.
-    pub bits: usize,
+    pub bits: u16,
     /// Whether the immediate bits are shifted left by 12 or not.
     pub shift12: bool,
 }
@@ -140,12 +144,12 @@ impl Imm12 {
             })
         } else if val < 0xfff {
             Some(Imm12 {
-                bits: val as usize,
+                bits: val as u16,
                 shift12: false,
             })
         } else if val < 0xfff_000 && (val & 0xfff == 0) {
             Some(Imm12 {
-                bits: (val as usize) >> 12,
+                bits: (val >> 12) as u16,
                 shift12: true,
             })
         } else {
@@ -154,7 +158,7 @@ impl Imm12 {
     }
 
     /// Bits for 2-bit "shift" field in e.g. AddI.
-    pub fn shift_bits(&self) -> u8 {
+    pub fn shift_bits(&self) -> u32 {
         if self.shift12 {
             0b01
         } else {
@@ -163,8 +167,8 @@ impl Imm12 {
     }
 
     /// Bits for 12-bit "imm" field in e.g. AddI.
-    pub fn imm_bits(&self) -> u16 {
-        self.bits as u16
+    pub fn imm_bits(&self) -> u32 {
+        self.bits as u32
     }
 }
 
@@ -175,11 +179,11 @@ pub struct ImmLogic {
     /// The actual value.
     value: u64,
     /// `N` flag.
-    pub N: bool,
+    pub n: bool,
     /// `S` field: element size and element bits.
-    pub R: u8,
+    pub r: u8,
     /// `R` field: rotate amount.
-    pub S: u8,
+    pub s: u8,
 }
 
 impl ImmLogic {
@@ -367,24 +371,19 @@ impl ImmLogic {
         debug_assert!(u8::try_from(s).is_ok());
         Some(ImmLogic {
             value: original_value,
-            N: out_n != 0,
-            R: r as u8,
-            S: s as u8,
+            n: out_n != 0,
+            r: r as u8,
+            s: s as u8,
         })
     }
 
     pub fn from_raw(value: u64, n: bool, r: u8, s: u8) -> ImmLogic {
-        ImmLogic {
-            N: n,
-            R: r,
-            S: s,
-            value,
-        }
+        ImmLogic { n, r, s, value }
     }
 
     /// Returns bits ready for encoding: (N:1, R:6, S:6)
-    pub fn enc_bits(&self) -> u16 {
-        ((self.N as u16) << 12) | ((self.R as u16) << 6) | (self.S as u16)
+    pub fn enc_bits(&self) -> u32 {
+        ((self.n as u32) << 12) | ((self.r as u32) << 6) | (self.s as u32)
     }
 
     /// Returns the value that this immediate represents.
@@ -427,7 +426,7 @@ impl ImmShift {
 pub struct MoveWideConst {
     /// The value.
     pub bits: u16,
-    /// shifted 16*shift bits to the left.
+    /// Result is `bits` shifted 16*shift bits to the left.
     pub shift: u8,
 }
 
@@ -487,7 +486,7 @@ impl MoveWideConst {
 impl ShowWithRRU for Imm12 {
     fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
         let shift = if self.shift12 { 12 } else { 0 };
-        let value = self.bits << shift;
+        let value = u32::from(self.bits) << shift;
         format!("#{}", value)
     }
 }
@@ -544,9 +543,9 @@ mod test {
         assert_eq!(
             Some(ImmLogic {
                 value: 1,
-                N: true,
-                R: 0,
-                S: 0
+                n: true,
+                r: 0,
+                s: 0
             }),
             ImmLogic::maybe_from_u64(1, I64)
         );
@@ -554,9 +553,9 @@ mod test {
         assert_eq!(
             Some(ImmLogic {
                 value: 2,
-                N: true,
-                R: 63,
-                S: 0
+                n: true,
+                r: 63,
+                s: 0
             }),
             ImmLogic::maybe_from_u64(2, I64)
         );
@@ -568,9 +567,9 @@ mod test {
         assert_eq!(
             Some(ImmLogic {
                 value: 248,
-                N: true,
-                R: 61,
-                S: 4
+                n: true,
+                r: 61,
+                s: 4
             }),
             ImmLogic::maybe_from_u64(248, I64)
         );
@@ -580,9 +579,9 @@ mod test {
         assert_eq!(
             Some(ImmLogic {
                 value: 1920,
-                N: true,
-                R: 57,
-                S: 3
+                n: true,
+                r: 57,
+                s: 3
             }),
             ImmLogic::maybe_from_u64(1920, I64)
         );
@@ -590,9 +589,9 @@ mod test {
         assert_eq!(
             Some(ImmLogic {
                 value: 0x7ffe,
-                N: true,
-                R: 63,
-                S: 13
+                n: true,
+                r: 63,
+                s: 13
             }),
             ImmLogic::maybe_from_u64(0x7ffe, I64)
         );
@@ -600,9 +599,9 @@ mod test {
         assert_eq!(
             Some(ImmLogic {
                 value: 0x30000,
-                N: true,
-                R: 48,
-                S: 1
+                n: true,
+                r: 48,
+                s: 1
             }),
             ImmLogic::maybe_from_u64(0x30000, I64)
         );
@@ -610,9 +609,9 @@ mod test {
         assert_eq!(
             Some(ImmLogic {
                 value: 0x100000,
-                N: true,
-                R: 44,
-                S: 0
+                n: true,
+                r: 44,
+                s: 0
             }),
             ImmLogic::maybe_from_u64(0x100000, I64)
         );
@@ -620,9 +619,9 @@ mod test {
         assert_eq!(
             Some(ImmLogic {
                 value: u64::max_value() - 1,
-                N: true,
-                R: 63,
-                S: 62
+                n: true,
+                r: 63,
+                s: 62
             }),
             ImmLogic::maybe_from_u64(u64::max_value() - 1, I64)
         );
@@ -630,9 +629,9 @@ mod test {
         assert_eq!(
             Some(ImmLogic {
                 value: 0xaaaaaaaaaaaaaaaa,
-                N: false,
-                R: 1,
-                S: 60
+                n: false,
+                r: 1,
+                s: 60
             }),
             ImmLogic::maybe_from_u64(0xaaaaaaaaaaaaaaaa, I64)
         );
@@ -640,9 +639,9 @@ mod test {
         assert_eq!(
             Some(ImmLogic {
                 value: 0x8181818181818181,
-                N: false,
-                R: 1,
-                S: 49
+                n: false,
+                r: 1,
+                s: 49
             }),
             ImmLogic::maybe_from_u64(0x8181818181818181, I64)
         );
@@ -650,9 +649,9 @@ mod test {
         assert_eq!(
             Some(ImmLogic {
                 value: 0xffc3ffc3ffc3ffc3,
-                N: false,
-                R: 10,
-                S: 43
+                n: false,
+                r: 10,
+                s: 43
             }),
             ImmLogic::maybe_from_u64(0xffc3ffc3ffc3ffc3, I64)
         );
@@ -660,9 +659,9 @@ mod test {
         assert_eq!(
             Some(ImmLogic {
                 value: 0x100000001,
-                N: false,
-                R: 0,
-                S: 0
+                n: false,
+                r: 0,
+                s: 0
             }),
             ImmLogic::maybe_from_u64(0x100000001, I64)
         );
@@ -670,9 +669,9 @@ mod test {
         assert_eq!(
             Some(ImmLogic {
                 value: 0x1111111111111111,
-                N: false,
-                R: 0,
-                S: 56
+                n: false,
+                r: 0,
+                s: 56
             }),
             ImmLogic::maybe_from_u64(0x1111111111111111, I64)
         );
diff --git a/cranelift/codegen/src/isa/arm64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs
similarity index 66%
rename from cranelift/codegen/src/isa/arm64/inst/mod.rs
rename to cranelift/codegen/src/isa/aarch64/inst/mod.rs
index ecc948cc70..44da584b44 100644
--- a/cranelift/codegen/src/isa/arm64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs
@@ -1,28 +1,19 @@
-//! This module defines arm64-specific machine instruction types.
+//! This module defines aarch64-specific machine instruction types.
 
-#![allow(non_snake_case)]
-#![allow(unused_imports)]
-#![allow(non_camel_case_types)]
+// Some variants are not constructed, but we still want them as options in the future.
 #![allow(dead_code)]
 
 use crate::binemit::CodeOffset;
-use crate::ir::constant::{ConstantData, ConstantOffset};
-use crate::ir::types::{
-    B1, B128, B16, B32, B64, B8, F32, F64, FFLAGS, I128, I16, I32, I64, I8, IFLAGS,
-};
-use crate::ir::{ExternalName, GlobalValue, JumpTable, Opcode, SourceLoc, TrapCode, Type};
+use crate::ir::types::{B1, B16, B32, B64, B8, F32, F64, FFLAGS, I16, I32, I64, I8, IFLAGS};
+use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode, Type};
 use crate::machinst::*;
 
 use regalloc::Map as RegallocMap;
-use regalloc::{
-    RealReg, RealRegUniverse, Reg, RegClass, RegClassInfo, SpillSlot, VirtualReg, Writable,
-    NUM_REG_CLASSES,
-};
+use regalloc::{RealReg, RealRegUniverse, Reg, RegClass, SpillSlot, VirtualReg, Writable};
 use regalloc::{RegUsageCollector, Set};
 
 use alloc::vec::Vec;
 use smallvec::{smallvec, SmallVec};
-use std::mem;
 use std::string::{String, ToString};
 
 pub mod regs;
@@ -47,25 +38,43 @@ pub enum ALUOp {
     Sub64,
     Orr32,
     Orr64,
+    /// NOR
     OrrNot32,
+    /// NOR
     OrrNot64,
     And32,
     And64,
+    /// NAND
     AndNot32,
+    /// NAND
     AndNot64,
+    /// XOR (AArch64 calls this "EOR")
     Eor32,
+    /// XOR (AArch64 calls this "EOR")
     Eor64,
+    /// XNOR (AArch64 calls this "EOR-NOT")
     EorNot32,
+    /// XNOR (AArch64 calls this "EOR-NOT")
     EorNot64,
+    /// Add, setting flags
     AddS32,
+    /// Add, setting flags
     AddS64,
+    /// Sub, setting flags
     SubS32,
+    /// Sub, setting flags
     SubS64,
-    MAdd32, // multiply-add
+    /// Multiply-add
+    MAdd32,
+    /// Multiply-add
     MAdd64,
+    /// Multiply-sub
     MSub32,
+    /// Multiply-sub
     MSub64,
+    /// Signed multiply, high-word result
     SMulH,
+    /// Unsigned multiply, high-word result
     UMulH,
     SDiv64,
     UDiv64,
@@ -159,17 +168,23 @@ pub enum FpuRoundMode {
 /// A vector ALU operation.
 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
 pub enum VecALUOp {
-    SQAddScalar, // signed saturating add
-    UQAddScalar, // unsigned saturating add
-    SQSubScalar, // signed saturating subtract
-    UQSubScalar, // unsigned saturating subtract
+    /// Signed saturating add
+    SQAddScalar,
+    /// Unsigned saturating add
+    UQAddScalar,
+    /// Signed saturating subtract
+    SQSubScalar,
+    /// Unsigned saturating subtract
+    UQSubScalar,
 }
 
 /// An operation on the bits of a register. This can be paired with several instruction formats
 /// below (see `Inst`) in any combination.
 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
 pub enum BitOp {
+    /// Bit reverse
     RBit32,
+    /// Bit reverse
     RBit64,
     Clz32,
     Clz64,
@@ -178,13 +193,11 @@ pub enum BitOp {
 }
 
 impl BitOp {
-    /// Is the opcode a 32-bit operation.
-    pub fn is_32_bit(&self) -> bool {
+    /// What is the opcode's native width?
+    pub fn inst_size(&self) -> InstSize {
         match self {
-            BitOp::RBit32 => true,
-            BitOp::Clz32 => true,
-            BitOp::Cls32 => true,
-            _ => false,
+            BitOp::RBit32 | BitOp::Clz32 | BitOp::Cls32 => InstSize::Size32,
+            _ => InstSize::Size64,
         }
     }
 
@@ -217,7 +230,7 @@ impl From<(Opcode, Type)> for BitOp {
 #[derive(Clone, Debug)]
 pub enum Inst {
     /// A no-op of zero size.
-    Nop,
+    Nop0,
 
     /// A no-op that is one instruction large.
     Nop4,
@@ -465,32 +478,37 @@ pub enum Inst {
         rm: Reg,
     },
 
-    /// Floating-point loads and stores.
+    /// Floating-point load, single-precision (32 bit).
     FpuLoad32 {
         rd: Writable<Reg>,
         mem: MemArg,
         srcloc: Option<SourceLoc>,
     },
+    /// Floating-point store, single-precision (32 bit).
     FpuStore32 {
         rd: Reg,
         mem: MemArg,
         srcloc: Option<SourceLoc>,
     },
+    /// Floating-point load, double-precision (64 bit).
     FpuLoad64 {
         rd: Writable<Reg>,
         mem: MemArg,
         srcloc: Option<SourceLoc>,
     },
+    /// Floating-point store, double-precision (64 bit).
     FpuStore64 {
         rd: Reg,
         mem: MemArg,
         srcloc: Option<SourceLoc>,
     },
+    /// Floating-point/vector load, 128 bit.
     FpuLoad128 {
         rd: Writable<Reg>,
         mem: MemArg,
         srcloc: Option<SourceLoc>,
     },
+    /// Floating-point/vector store, 128 bit.
     FpuStore128 {
         rd: Reg,
         mem: MemArg,
@@ -507,26 +525,28 @@ pub enum Inst {
         const_data: f64,
     },
 
-    /// Conversions between FP and integer values.
+    /// Conversion: FP -> integer.
     FpuToInt {
         op: FpuToIntOp,
         rd: Writable<Reg>,
         rn: Reg,
     },
 
+    /// Conversion: integer -> FP.
     IntToFpu {
         op: IntToFpuOp,
         rd: Writable<Reg>,
         rn: Reg,
     },
 
-    // FP conditional select.
+    /// FP conditional select, 32 bit.
     FpuCSel32 {
         rd: Writable<Reg>,
         rn: Reg,
         rm: Reg,
         cond: Cond,
     },
+    /// FP conditional select, 64 bit.
     FpuCSel64 {
         rd: Writable<Reg>,
         rn: Reg,
@@ -534,7 +554,7 @@ pub enum Inst {
         cond: Cond,
     },
 
-    // Round to integer.
+    /// Round to integer.
     FpuRound {
         op: FpuRoundMode,
         rd: Writable<Reg>,
@@ -596,11 +616,11 @@ pub enum Inst {
 
     // ---- branches (exactly one must appear at end of BB) ----
     /// A machine return instruction.
-    Ret {},
+    Ret,
 
     /// A placeholder instruction, generating no code, meaning that a function epilogue must be
     /// inserted there.
-    EpiloguePlaceholder {},
+    EpiloguePlaceholder,
 
     /// An unconditional branch.
     Jump {
@@ -689,7 +709,7 @@ pub enum Inst {
     },
 }
 
-fn count_clear_half_words(mut value: u64) -> usize {
+fn count_zero_half_words(mut value: u64) -> usize {
     let mut count = 0;
     for _ in 0..4 {
         if value & 0xffff == 0 {
@@ -748,7 +768,7 @@ impl Inst {
 
             // If the number of 0xffff half words is greater than the number of 0x0000 half words
             // it is more efficient to use `movn` for the first instruction.
-            let first_is_inverted = count_clear_half_words(!value) > count_clear_half_words(value);
+            let first_is_inverted = count_zero_half_words(!value) > count_zero_half_words(value);
             // Either 0xffff or 0x0000 half words can be skipped, depending on the first
             // instruction used.
             let ignored_halfword = if first_is_inverted { 0xffff } else { 0 };
@@ -839,7 +859,7 @@ fn pairmemarg_regs(pairmemarg: &PairMemArg, collector: &mut RegUsageCollector) {
     }
 }
 
-fn arm64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
+fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
     match inst {
         &Inst::AluRRR { rd, rn, rm, .. } => {
             collector.add_def(rd);
@@ -1024,7 +1044,7 @@ fn arm64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
             collector.add_def(rd);
             collector.add_use(rn);
         }
-        &Inst::Jump { .. } | &Inst::Ret { .. } | &Inst::EpiloguePlaceholder { .. } => {}
+        &Inst::Jump { .. } | &Inst::Ret | &Inst::EpiloguePlaceholder => {}
         &Inst::Call {
             ref uses, ref defs, ..
         } => {
@@ -1052,7 +1072,7 @@ fn arm64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
         &Inst::IndirectBr { rn, .. } => {
             collector.add_use(rn);
         }
-        &Inst::Nop | Inst::Nop4 => {}
+        &Inst::Nop0 | Inst::Nop4 => {}
         &Inst::Brk => {}
         &Inst::Udf { .. } => {}
         &Inst::Adr { rd, .. } => {
@@ -1075,548 +1095,555 @@ fn arm64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
 //=============================================================================
 // Instructions: map_regs
 
-fn arm64_map_regs(
+fn aarch64_map_regs(
     inst: &mut Inst,
     pre_map: &RegallocMap<VirtualReg, RealReg>,
     post_map: &RegallocMap<VirtualReg, RealReg>,
 ) {
-    fn map(m: &RegallocMap<VirtualReg, RealReg>, r: Reg) -> Reg {
+    fn map(m: &RegallocMap<VirtualReg, RealReg>, r: &mut Reg) {
         if r.is_virtual() {
-            m.get(&r.to_virtual_reg()).cloned().unwrap().to_reg()
-        } else {
-            r
+            let new = m.get(&r.to_virtual_reg()).cloned().unwrap().to_reg();
+            *r = new;
         }
     }
 
-    fn map_wr(m: &RegallocMap<VirtualReg, RealReg>, r: Writable<Reg>) -> Writable<Reg> {
-        Writable::from_reg(map(m, r.to_reg()))
+    fn map_wr(m: &RegallocMap<VirtualReg, RealReg>, r: &mut Writable<Reg>) {
+        let mut reg = r.to_reg();
+        map(m, &mut reg);
+        *r = Writable::from_reg(reg);
     }
 
-    fn map_mem(u: &RegallocMap<VirtualReg, RealReg>, mem: &MemArg) -> MemArg {
+    fn map_mem(u: &RegallocMap<VirtualReg, RealReg>, mem: &mut MemArg) {
         // N.B.: we take only the pre-map here, but this is OK because the
         // only addressing modes that update registers (pre/post-increment on
-        // ARM64) both read and write registers, so they are "mods" rather
+        // AArch64) both read and write registers, so they are "mods" rather
         // than "defs", so must be the same in both the pre- and post-map.
         match mem {
-            &MemArg::Unscaled(reg, simm9) => MemArg::Unscaled(map(u, reg), simm9),
-            &MemArg::UnsignedOffset(reg, uimm12) => MemArg::UnsignedOffset(map(u, reg), uimm12),
-            &MemArg::RegReg(r1, r2) => MemArg::RegReg(map(u, r1), map(u, r2)),
-            &MemArg::RegScaled(r1, r2, ty) => MemArg::RegScaled(map(u, r1), map(u, r2), ty),
-            &MemArg::RegScaledExtended(r1, r2, ty, op) => {
-                MemArg::RegScaledExtended(map(u, r1), map(u, r2), ty, op)
+            &mut MemArg::Unscaled(ref mut reg, ..) => map(u, reg),
+            &mut MemArg::UnsignedOffset(ref mut reg, ..) => map(u, reg),
+            &mut MemArg::RegReg(ref mut r1, ref mut r2) => {
+                map(u, r1);
+                map(u, r2);
             }
-            &MemArg::Label(ref l) => MemArg::Label(l.clone()),
-            &MemArg::PreIndexed(r, simm9) => MemArg::PreIndexed(map_wr(u, r), simm9),
-            &MemArg::PostIndexed(r, simm9) => MemArg::PostIndexed(map_wr(u, r), simm9),
-            &MemArg::FPOffset(off) => MemArg::FPOffset(off),
-            &MemArg::SPOffset(off) => MemArg::SPOffset(off),
-        }
+            &mut MemArg::RegScaled(ref mut r1, ref mut r2, ..) => {
+                map(u, r1);
+                map(u, r2);
+            }
+            &mut MemArg::RegScaledExtended(ref mut r1, ref mut r2, ..) => {
+                map(u, r1);
+                map(u, r2);
+            }
+            &mut MemArg::Label(..) => {}
+            &mut MemArg::PreIndexed(ref mut r, ..) => map_wr(u, r),
+            &mut MemArg::PostIndexed(ref mut r, ..) => map_wr(u, r),
+            &mut MemArg::FPOffset(..) | &mut MemArg::SPOffset(..) => {}
+        };
     }
 
-    fn map_pairmem(u: &RegallocMap<VirtualReg, RealReg>, mem: &PairMemArg) -> PairMemArg {
+    fn map_pairmem(u: &RegallocMap<VirtualReg, RealReg>, mem: &mut PairMemArg) {
         match mem {
-            &PairMemArg::SignedOffset(reg, simm7) => PairMemArg::SignedOffset(map(u, reg), simm7),
-            &PairMemArg::PreIndexed(reg, simm7) => PairMemArg::PreIndexed(map_wr(u, reg), simm7),
-            &PairMemArg::PostIndexed(reg, simm7) => PairMemArg::PostIndexed(map_wr(u, reg), simm7),
+            &mut PairMemArg::SignedOffset(ref mut reg, ..) => map(u, reg),
+            &mut PairMemArg::PreIndexed(ref mut reg, ..) => map_wr(u, reg),
+            &mut PairMemArg::PostIndexed(ref mut reg, ..) => map_wr(u, reg),
         }
     }
 
-    fn map_br(u: &RegallocMap<VirtualReg, RealReg>, br: &CondBrKind) -> CondBrKind {
+    fn map_br(u: &RegallocMap<VirtualReg, RealReg>, br: &mut CondBrKind) {
         match br {
-            &CondBrKind::Zero(reg) => CondBrKind::Zero(map(u, reg)),
-            &CondBrKind::NotZero(reg) => CondBrKind::NotZero(map(u, reg)),
-            &CondBrKind::Cond(c) => CondBrKind::Cond(c),
-        }
+            &mut CondBrKind::Zero(ref mut reg) => map(u, reg),
+            &mut CondBrKind::NotZero(ref mut reg) => map(u, reg),
+            &mut CondBrKind::Cond(..) => {}
+        };
     }
 
     let u = pre_map; // For brevity below.
     let d = post_map;
 
-    let newval = match inst {
-        &mut Inst::AluRRR { alu_op, rd, rn, rm } => Inst::AluRRR {
-            alu_op,
-            rd: map_wr(d, rd),
-            rn: map(u, rn),
-            rm: map(u, rm),
-        },
+    match inst {
+        &mut Inst::AluRRR {
+            ref mut rd,
+            ref mut rn,
+            ref mut rm,
+            ..
+        } => {
+            map_wr(d, rd);
+            map(u, rn);
+            map(u, rm);
+        }
         &mut Inst::AluRRRR {
-            alu_op,
-            rd,
-            rn,
-            rm,
-            ra,
-        } => Inst::AluRRRR {
-            alu_op,
-            rd: map_wr(d, rd),
-            rn: map(u, rn),
-            rm: map(u, rm),
-            ra: map(u, ra),
-        },
+            ref mut rd,
+            ref mut rn,
+            ref mut rm,
+            ref mut ra,
+            ..
+        } => {
+            map_wr(d, rd);
+            map(u, rn);
+            map(u, rm);
+            map(u, ra);
+        }
         &mut Inst::AluRRImm12 {
-            alu_op,
-            rd,
-            rn,
-            ref imm12,
-        } => Inst::AluRRImm12 {
-            alu_op,
-            rd: map_wr(d, rd),
-            rn: map(u, rn),
-            imm12: imm12.clone(),
-        },
+            ref mut rd,
+            ref mut rn,
+            ..
+        } => {
+            map_wr(d, rd);
+            map(u, rn);
+        }
         &mut Inst::AluRRImmLogic {
-            alu_op,
-            rd,
-            rn,
-            ref imml,
-        } => Inst::AluRRImmLogic {
-            alu_op,
-            rd: map_wr(d, rd),
-            rn: map(u, rn),
-            imml: imml.clone(),
-        },
+            ref mut rd,
+            ref mut rn,
+            ..
+        } => {
+            map_wr(d, rd);
+            map(u, rn);
+        }
         &mut Inst::AluRRImmShift {
-            alu_op,
-            rd,
-            rn,
-            ref immshift,
-        } => Inst::AluRRImmShift {
-            alu_op,
-            rd: map_wr(d, rd),
-            rn: map(u, rn),
-            immshift: immshift.clone(),
-        },
+            ref mut rd,
+            ref mut rn,
+            ..
+        } => {
+            map_wr(d, rd);
+            map(u, rn);
+        }
         &mut Inst::AluRRRShift {
-            alu_op,
-            rd,
-            rn,
-            rm,
-            ref shiftop,
-        } => Inst::AluRRRShift {
-            alu_op,
-            rd: map_wr(d, rd),
-            rn: map(u, rn),
-            rm: map(u, rm),
-            shiftop: shiftop.clone(),
-        },
+            ref mut rd,
+            ref mut rn,
+            ref mut rm,
+            ..
+        } => {
+            map_wr(d, rd);
+            map(u, rn);
+            map(u, rm);
+        }
         &mut Inst::AluRRRExtend {
-            alu_op,
-            rd,
-            rn,
-            rm,
-            ref extendop,
-        } => Inst::AluRRRExtend {
-            alu_op,
-            rd: map_wr(d, rd),
-            rn: map(u, rn),
-            rm: map(u, rm),
-            extendop: extendop.clone(),
-        },
-        &mut Inst::BitRR { op, rd, rn } => Inst::BitRR {
-            op,
-            rd: map_wr(d, rd),
-            rn: map(u, rn),
-        },
+            ref mut rd,
+            ref mut rn,
+            ref mut rm,
+            ..
+        } => {
+            map_wr(d, rd);
+            map(u, rn);
+            map(u, rm);
+        }
+        &mut Inst::BitRR {
+            ref mut rd,
+            ref mut rn,
+            ..
+        } => {
+            map_wr(d, rd);
+            map(u, rn);
+        }
         &mut Inst::ULoad8 {
-            rd,
-            ref mem,
-            srcloc,
-        } => Inst::ULoad8 {
-            rd: map_wr(d, rd),
-            mem: map_mem(u, mem),
-            srcloc,
-        },
+            ref mut rd,
+            ref mut mem,
+            ..
+        } => {
+            map_wr(d, rd);
+            map_mem(u, mem);
+        }
         &mut Inst::SLoad8 {
-            rd,
-            ref mem,
-            srcloc,
-        } => Inst::SLoad8 {
-            rd: map_wr(d, rd),
-            mem: map_mem(u, mem),
-            srcloc,
-        },
+            ref mut rd,
+            ref mut mem,
+            ..
+        } => {
+            map_wr(d, rd);
+            map_mem(u, mem);
+        }
         &mut Inst::ULoad16 {
-            rd,
-            ref mem,
-            srcloc,
-        } => Inst::ULoad16 {
-            rd: map_wr(d, rd),
-            mem: map_mem(u, mem),
-            srcloc,
-        },
+            ref mut rd,
+            ref mut mem,
+            ..
+        } => {
+            map_wr(d, rd);
+            map_mem(u, mem);
+        }
         &mut Inst::SLoad16 {
-            rd,
-            ref mem,
-            srcloc,
-        } => Inst::SLoad16 {
-            rd: map_wr(d, rd),
-            mem: map_mem(u, mem),
-            srcloc,
-        },
+            ref mut rd,
+            ref mut mem,
+            ..
+        } => {
+            map_wr(d, rd);
+            map_mem(u, mem);
+        }
         &mut Inst::ULoad32 {
-            rd,
-            ref mem,
-            srcloc,
-        } => Inst::ULoad32 {
-            rd: map_wr(d, rd),
-            mem: map_mem(u, mem),
-            srcloc,
-        },
+            ref mut rd,
+            ref mut mem,
+            ..
+        } => {
+            map_wr(d, rd);
+            map_mem(u, mem);
+        }
         &mut Inst::SLoad32 {
-            rd,
-            ref mem,
-            srcloc,
-        } => Inst::SLoad32 {
-            rd: map_wr(d, rd),
-            mem: map_mem(u, mem),
-            srcloc,
-        },
+            ref mut rd,
+            ref mut mem,
+            ..
+        } => {
+            map_wr(d, rd);
+            map_mem(u, mem);
+        }
+
         &mut Inst::ULoad64 {
-            rd,
-            ref mem,
-            srcloc,
-        } => Inst::ULoad64 {
-            rd: map_wr(d, rd),
-            mem: map_mem(u, mem),
-            srcloc,
-        },
+            ref mut rd,
+            ref mut mem,
+            ..
+        } => {
+            map_wr(d, rd);
+            map_mem(u, mem);
+        }
         &mut Inst::Store8 {
-            rd,
-            ref mem,
-            srcloc,
-        } => Inst::Store8 {
-            rd: map(u, rd),
-            mem: map_mem(u, mem),
-            srcloc,
-        },
+            ref mut rd,
+            ref mut mem,
+            ..
+        } => {
+            map(u, rd);
+            map_mem(u, mem);
+        }
         &mut Inst::Store16 {
-            rd,
-            ref mem,
-            srcloc,
-        } => Inst::Store16 {
-            rd: map(u, rd),
-            mem: map_mem(u, mem),
-            srcloc,
-        },
+            ref mut rd,
+            ref mut mem,
+            ..
+        } => {
+            map(u, rd);
+            map_mem(u, mem);
+        }
         &mut Inst::Store32 {
-            rd,
-            ref mem,
-            srcloc,
-        } => Inst::Store32 {
-            rd: map(u, rd),
-            mem: map_mem(u, mem),
-            srcloc,
-        },
+            ref mut rd,
+            ref mut mem,
+            ..
+        } => {
+            map(u, rd);
+            map_mem(u, mem);
+        }
         &mut Inst::Store64 {
-            rd,
-            ref mem,
-            srcloc,
-        } => Inst::Store64 {
-            rd: map(u, rd),
-            mem: map_mem(u, mem),
-            srcloc,
-        },
-        &mut Inst::StoreP64 { rt, rt2, ref mem } => Inst::StoreP64 {
-            rt: map(u, rt),
-            rt2: map(u, rt2),
-            mem: map_pairmem(u, mem),
-        },
-        &mut Inst::LoadP64 { rt, rt2, ref mem } => Inst::LoadP64 {
-            rt: map_wr(d, rt),
-            rt2: map_wr(d, rt2),
-            mem: map_pairmem(u, mem),
-        },
-        &mut Inst::Mov { rd, rm } => Inst::Mov {
-            rd: map_wr(d, rd),
-            rm: map(u, rm),
-        },
-        &mut Inst::Mov32 { rd, rm } => Inst::Mov32 {
-            rd: map_wr(d, rd),
-            rm: map(u, rm),
-        },
-        &mut Inst::MovZ { rd, ref imm } => Inst::MovZ {
-            rd: map_wr(d, rd),
-            imm: imm.clone(),
-        },
-        &mut Inst::MovN { rd, ref imm } => Inst::MovN {
-            rd: map_wr(d, rd),
-            imm: imm.clone(),
-        },
-        &mut Inst::MovK { rd, ref imm } => Inst::MovK {
-            rd: map_wr(d, rd),
-            imm: imm.clone(),
-        },
-        &mut Inst::CSel { rd, rn, rm, cond } => Inst::CSel {
-            cond,
-            rd: map_wr(d, rd),
-            rn: map(u, rn),
-            rm: map(u, rm),
-        },
-        &mut Inst::CSet { rd, cond } => Inst::CSet {
-            cond,
-            rd: map_wr(d, rd),
-        },
-        &mut Inst::FpuMove64 { rd, rn } => Inst::FpuMove64 {
-            rd: map_wr(d, rd),
-            rn: map(u, rn),
-        },
-        &mut Inst::FpuRR { fpu_op, rd, rn } => Inst::FpuRR {
-            fpu_op,
-            rd: map_wr(d, rd),
-            rn: map(u, rn),
-        },
-        &mut Inst::FpuRRR { fpu_op, rd, rn, rm } => Inst::FpuRRR {
-            fpu_op,
-            rd: map_wr(d, rd),
-            rn: map(u, rn),
-            rm: map(u, rm),
-        },
+            ref mut rd,
+            ref mut mem,
+            ..
+        } => {
+            map(u, rd);
+            map_mem(u, mem);
+        }
+
+        &mut Inst::StoreP64 {
+            ref mut rt,
+            ref mut rt2,
+            ref mut mem,
+        } => {
+            map(u, rt);
+            map(u, rt2);
+            map_pairmem(u, mem);
+        }
+        &mut Inst::LoadP64 {
+            ref mut rt,
+            ref mut rt2,
+            ref mut mem,
+        } => {
+            map_wr(d, rt);
+            map_wr(d, rt2);
+            map_pairmem(u, mem);
+        }
+        &mut Inst::Mov {
+            ref mut rd,
+            ref mut rm,
+        } => {
+            map_wr(d, rd);
+            map(u, rm);
+        }
+        &mut Inst::Mov32 {
+            ref mut rd,
+            ref mut rm,
+        } => {
+            map_wr(d, rd);
+            map(u, rm);
+        }
+        &mut Inst::MovZ { ref mut rd, .. } => {
+            map_wr(d, rd);
+        }
+        &mut Inst::MovN { ref mut rd, .. } => {
+            map_wr(d, rd);
+        }
+        &mut Inst::MovK { ref mut rd, .. } => {
+            map_wr(d, rd);
+        }
+        &mut Inst::CSel {
+            ref mut rd,
+            ref mut rn,
+            ref mut rm,
+            ..
+        } => {
+            map_wr(d, rd);
+            map(u, rn);
+            map(u, rm);
+        }
+        &mut Inst::CSet { ref mut rd, .. } => {
+            map_wr(d, rd);
+        }
+        &mut Inst::FpuMove64 {
+            ref mut rd,
+            ref mut rn,
+        } => {
+            map_wr(d, rd);
+            map(u, rn);
+        }
+        &mut Inst::FpuRR {
+            ref mut rd,
+            ref mut rn,
+            ..
+        } => {
+            map_wr(d, rd);
+            map(u, rn);
+        }
+        &mut Inst::FpuRRR {
+            ref mut rd,
+            ref mut rn,
+            ref mut rm,
+            ..
+        } => {
+            map_wr(d, rd);
+            map(u, rn);
+            map(u, rm);
+        }
         &mut Inst::FpuRRRR {
-            fpu_op,
-            rd,
-            rn,
-            rm,
-            ra,
-        } => Inst::FpuRRRR {
-            fpu_op,
-            rd: map_wr(d, rd),
-            rn: map(u, rn),
-            rm: map(u, rm),
-            ra: map(u, ra),
-        },
-        &mut Inst::FpuCmp32 { rn, rm } => Inst::FpuCmp32 {
-            rn: map(u, rn),
-            rm: map(u, rm),
-        },
-        &mut Inst::FpuCmp64 { rn, rm } => Inst::FpuCmp64 {
-            rn: map(u, rn),
-            rm: map(u, rm),
-        },
+            ref mut rd,
+            ref mut rn,
+            ref mut rm,
+            ref mut ra,
+            ..
+        } => {
+            map_wr(d, rd);
+            map(u, rn);
+            map(u, rm);
+            map(u, ra);
+        }
+        &mut Inst::FpuCmp32 {
+            ref mut rn,
+            ref mut rm,
+        } => {
+            map(u, rn);
+            map(u, rm);
+        }
+        &mut Inst::FpuCmp64 {
+            ref mut rn,
+            ref mut rm,
+        } => {
+            map(u, rn);
+            map(u, rm);
+        }
         &mut Inst::FpuLoad32 {
-            rd,
-            ref mem,
-            srcloc,
-        } => Inst::FpuLoad32 {
-            rd: map_wr(d, rd),
-            mem: map_mem(u, mem),
-            srcloc,
-        },
+            ref mut rd,
+            ref mut mem,
+            ..
+        } => {
+            map_wr(d, rd);
+            map_mem(u, mem);
+        }
         &mut Inst::FpuLoad64 {
-            rd,
-            ref mem,
-            srcloc,
-        } => Inst::FpuLoad64 {
-            rd: map_wr(d, rd),
-            mem: map_mem(u, mem),
-            srcloc,
-        },
+            ref mut rd,
+            ref mut mem,
+            ..
+        } => {
+            map_wr(d, rd);
+            map_mem(u, mem);
+        }
         &mut Inst::FpuLoad128 {
-            rd,
-            ref mem,
-            srcloc,
-        } => Inst::FpuLoad64 {
-            rd: map_wr(d, rd),
-            mem: map_mem(u, mem),
-            srcloc,
-        },
+            ref mut rd,
+            ref mut mem,
+            ..
+        } => {
+            map_wr(d, rd);
+            map_mem(u, mem);
+        }
         &mut Inst::FpuStore32 {
-            rd,
-            ref mem,
-            srcloc,
-        } => Inst::FpuStore32 {
-            rd: map(u, rd),
-            mem: map_mem(u, mem),
-            srcloc,
-        },
+            ref mut rd,
+            ref mut mem,
+            ..
+        } => {
+            map(u, rd);
+            map_mem(u, mem);
+        }
         &mut Inst::FpuStore64 {
-            rd,
-            ref mem,
-            srcloc,
-        } => Inst::FpuStore64 {
-            rd: map(u, rd),
-            mem: map_mem(u, mem),
-            srcloc,
-        },
+            ref mut rd,
+            ref mut mem,
+            ..
+        } => {
+            map(u, rd);
+            map_mem(u, mem);
+        }
         &mut Inst::FpuStore128 {
-            rd,
-            ref mem,
-            srcloc,
-        } => Inst::FpuStore64 {
-            rd: map(u, rd),
-            mem: map_mem(u, mem),
-            srcloc,
-        },
-        &mut Inst::LoadFpuConst32 { rd, const_data } => Inst::LoadFpuConst32 {
-            rd: map_wr(d, rd),
-            const_data,
-        },
-        &mut Inst::LoadFpuConst64 { rd, const_data } => Inst::LoadFpuConst64 {
-            rd: map_wr(d, rd),
-            const_data,
-        },
-        &mut Inst::FpuToInt { op, rd, rn } => Inst::FpuToInt {
-            op,
-            rd: map_wr(d, rd),
-            rn: map(u, rn),
-        },
-        &mut Inst::IntToFpu { op, rd, rn } => Inst::IntToFpu {
-            op,
-            rd: map_wr(d, rd),
-            rn: map(u, rn),
-        },
-        &mut Inst::FpuCSel32 { rd, rn, rm, cond } => Inst::FpuCSel32 {
-            cond,
-            rd: map_wr(d, rd),
-            rn: map(u, rn),
-            rm: map(u, rm),
-        },
-        &mut Inst::FpuCSel64 { rd, rn, rm, cond } => Inst::FpuCSel64 {
-            cond,
-            rd: map_wr(d, rd),
-            rn: map(u, rn),
-            rm: map(u, rm),
-        },
-        &mut Inst::FpuRound { op, rd, rn } => Inst::FpuRound {
-            op,
-            rd: map_wr(d, rd),
-            rn: map(u, rn),
-        },
-        &mut Inst::MovToVec64 { rd, rn } => Inst::MovToVec64 {
-            rd: map_wr(d, rd),
-            rn: map(u, rn),
-        },
-        &mut Inst::MovFromVec64 { rd, rn } => Inst::MovFromVec64 {
-            rd: map_wr(d, rd),
-            rn: map(u, rn),
-        },
-        &mut Inst::VecRRR { rd, rn, rm, alu_op } => Inst::VecRRR {
-            rd: map_wr(d, rd),
-            rn: map(u, rn),
-            rm: map(u, rm),
-            alu_op,
-        },
-        &mut Inst::MovToNZCV { rn } => Inst::MovToNZCV { rn: map(u, rn) },
-        &mut Inst::MovFromNZCV { rd } => Inst::MovFromNZCV { rd: map_wr(d, rd) },
-        &mut Inst::CondSet { rd, cond } => Inst::CondSet {
-            rd: map_wr(d, rd),
-            cond,
-        },
+            ref mut rd,
+            ref mut mem,
+            ..
+        } => {
+            map(u, rd);
+            map_mem(u, mem);
+        }
+        &mut Inst::LoadFpuConst32 { ref mut rd, .. } => {
+            map_wr(d, rd);
+        }
+        &mut Inst::LoadFpuConst64 { ref mut rd, .. } => {
+            map_wr(d, rd);
+        }
+        &mut Inst::FpuToInt {
+            ref mut rd,
+            ref mut rn,
+            ..
+        } => {
+            map_wr(d, rd);
+            map(u, rn);
+        }
+        &mut Inst::IntToFpu {
+            ref mut rd,
+            ref mut rn,
+            ..
+        } => {
+            map_wr(d, rd);
+            map(u, rn);
+        }
+        &mut Inst::FpuCSel32 {
+            ref mut rd,
+            ref mut rn,
+            ref mut rm,
+            ..
+        } => {
+            map_wr(d, rd);
+            map(u, rn);
+            map(u, rm);
+        }
+        &mut Inst::FpuCSel64 {
+            ref mut rd,
+            ref mut rn,
+            ref mut rm,
+            ..
+        } => {
+            map_wr(d, rd);
+            map(u, rn);
+            map(u, rm);
+        }
+        &mut Inst::FpuRound {
+            ref mut rd,
+            ref mut rn,
+            ..
+        } => {
+            map_wr(d, rd);
+            map(u, rn);
+        }
+        &mut Inst::MovToVec64 {
+            ref mut rd,
+            ref mut rn,
+        } => {
+            map_wr(d, rd);
+            map(u, rn);
+        }
+        &mut Inst::MovFromVec64 {
+            ref mut rd,
+            ref mut rn,
+        } => {
+            map_wr(d, rd);
+            map(u, rn);
+        }
+        &mut Inst::VecRRR {
+            ref mut rd,
+            ref mut rn,
+            ref mut rm,
+            ..
+        } => {
+            map_wr(d, rd);
+            map(u, rn);
+            map(u, rm);
+        }
+        &mut Inst::MovToNZCV { ref mut rn } => {
+            map(u, rn);
+        }
+        &mut Inst::MovFromNZCV { ref mut rd } => {
+            map_wr(d, rd);
+        }
+        &mut Inst::CondSet { ref mut rd, .. } => {
+            map_wr(d, rd);
+        }
         &mut Inst::Extend {
-            rd,
-            rn,
-            signed,
-            from_bits,
-            to_bits,
-        } => Inst::Extend {
-            rd: map_wr(d, rd),
-            rn: map(u, rn),
-            signed,
-            from_bits,
-            to_bits,
-        },
-        &mut Inst::Jump { dest } => Inst::Jump { dest },
+            ref mut rd,
+            ref mut rn,
+            ..
+        } => {
+            map_wr(d, rd);
+            map(u, rn);
+        }
+        &mut Inst::Jump { .. } => {}
         &mut Inst::Call {
-            ref uses,
-            ref defs,
-            ref dest,
-            loc,
-            opcode,
+            ref mut uses,
+            ref mut defs,
+            ..
         } => {
-            let uses = uses.map(|r| map(u, *r));
-            let defs = defs.map(|r| map_wr(d, *r));
-            let dest = dest.clone();
-            Inst::Call {
-                dest,
-                uses,
-                defs,
-                loc,
-                opcode,
-            }
+            // TODO: add `map_mut()` to regalloc.rs's Set.
+            let new_uses = uses.map(|r| {
+                let mut r = *r;
+                map(u, &mut r);
+                r
+            });
+            let new_defs = defs.map(|r| {
+                let mut r = *r;
+                map_wr(d, &mut r);
+                r
+            });
+            *uses = new_uses;
+            *defs = new_defs;
         }
-        &mut Inst::Ret {} => Inst::Ret {},
-        &mut Inst::EpiloguePlaceholder {} => Inst::EpiloguePlaceholder {},
+        &mut Inst::Ret | &mut Inst::EpiloguePlaceholder => {}
         &mut Inst::CallInd {
-            ref uses,
-            ref defs,
-            rn,
-            loc,
-            opcode,
+            ref mut uses,
+            ref mut defs,
+            ref mut rn,
+            ..
         } => {
-            let uses = uses.map(|r| map(u, *r));
-            let defs = defs.map(|r| map_wr(d, *r));
-            Inst::CallInd {
-                uses,
-                defs,
-                rn: map(u, rn),
-                loc,
-                opcode,
-            }
+            // TODO: add `map_mut()` to regalloc.rs's Set.
+            let new_uses = uses.map(|r| {
+                let mut r = *r;
+                map(u, &mut r);
+                r
+            });
+            let new_defs = defs.map(|r| {
+                let mut r = *r;
+                map_wr(d, &mut r);
+                r
+            });
+            *uses = new_uses;
+            *defs = new_defs;
+            map(u, rn);
         }
-        &mut Inst::CondBr {
-            taken,
-            not_taken,
-            kind,
-        } => Inst::CondBr {
-            taken,
-            not_taken,
-            kind: map_br(u, &kind),
-        },
-        &mut Inst::CondBrLowered { target, kind } => Inst::CondBrLowered {
-            target,
-            kind: map_br(u, &kind),
-        },
-        &mut Inst::CondBrLoweredCompound {
-            taken,
-            not_taken,
-            kind,
-        } => Inst::CondBrLoweredCompound {
-            taken,
-            not_taken,
-            kind: map_br(u, &kind),
-        },
-        &mut Inst::IndirectBr { rn, ref targets } => Inst::IndirectBr {
-            rn: map(u, rn),
-            targets: targets.clone(),
-        },
-        &mut Inst::Nop => Inst::Nop,
-        &mut Inst::Nop4 => Inst::Nop4,
-        &mut Inst::Brk => Inst::Brk,
-        &mut Inst::Udf { trap_info } => Inst::Udf { trap_info },
-        &mut Inst::Adr { rd, ref label } => Inst::Adr {
-            rd: map_wr(d, rd),
-            label: label.clone(),
-        },
-        &mut Inst::Word4 { data } => Inst::Word4 { data },
-        &mut Inst::Word8 { data } => Inst::Word8 { data },
+        &mut Inst::CondBr { ref mut kind, .. } => {
+            map_br(u, kind);
+        }
+        &mut Inst::CondBrLowered { ref mut kind, .. } => {
+            map_br(u, kind);
+        }
+        &mut Inst::CondBrLoweredCompound { ref mut kind, .. } => {
+            map_br(u, kind);
+        }
+        &mut Inst::IndirectBr { ref mut rn, .. } => {
+            map(u, rn);
+        }
+        &mut Inst::Nop0 | &mut Inst::Nop4 | &mut Inst::Brk | &mut Inst::Udf { .. } => {}
+        &mut Inst::Adr { ref mut rd, .. } => {
+            map_wr(d, rd);
+        }
+        &mut Inst::Word4 { .. } | &mut Inst::Word8 { .. } => {}
         &mut Inst::JTSequence {
-            ridx,
-            rtmp1,
-            rtmp2,
-            ref targets,
-            ref targets_for_term,
-        } => Inst::JTSequence {
-            targets: targets.clone(),
-            targets_for_term: targets_for_term.clone(),
-            ridx: map(u, ridx),
-            rtmp1: map_wr(d, rtmp1),
-            rtmp2: map_wr(d, rtmp2),
-        },
-        &mut Inst::LoadConst64 { rd, const_data } => Inst::LoadConst64 {
-            rd: map_wr(d, rd),
-            const_data,
-        },
-        &mut Inst::LoadExtName {
-            rd,
-            ref name,
-            offset,
-            srcloc,
-        } => Inst::LoadExtName {
-            rd: map_wr(d, rd),
-            name: name.clone(),
-            offset,
-            srcloc,
-        },
-    };
-    *inst = newval;
+            ref mut ridx,
+            ref mut rtmp1,
+            ref mut rtmp2,
+            ..
+        } => {
+            map(u, ridx);
+            map_wr(d, rtmp1);
+            map_wr(d, rtmp2);
+        }
+        &mut Inst::LoadConst64 { ref mut rd, .. } => {
+            map_wr(d, rd);
+        }
+        &mut Inst::LoadExtName { ref mut rd, .. } => {
+            map_wr(d, rd);
+        }
+    }
 }
 
 //=============================================================================
@@ -1624,7 +1651,7 @@ fn arm64_map_regs(
 
 impl MachInst for Inst {
     fn get_regs(&self, collector: &mut RegUsageCollector) {
-        arm64_get_regs(self, collector)
+        aarch64_get_regs(self, collector)
     }
 
     fn map_regs(
@@ -1632,7 +1659,7 @@ impl MachInst for Inst {
         pre_map: &RegallocMap<VirtualReg, RealReg>,
         post_map: &RegallocMap<VirtualReg, RealReg>,
     ) {
-        arm64_map_regs(self, pre_map, post_map);
+        aarch64_map_regs(self, pre_map, post_map);
     }
 
     fn is_move(&self) -> Option<(Writable<Reg>, Reg)> {
@@ -1644,7 +1671,7 @@ impl MachInst for Inst {
     }
 
     fn is_epilogue_placeholder(&self) -> bool {
-        if let Inst::EpiloguePlaceholder { .. } = self {
+        if let Inst::EpiloguePlaceholder = self {
             true
         } else {
             false
@@ -1653,7 +1680,7 @@ impl MachInst for Inst {
 
     fn is_term<'a>(&'a self) -> MachTerminator<'a> {
         match self {
-            &Inst::Ret {} | &Inst::EpiloguePlaceholder {} => MachTerminator::Ret,
+            &Inst::Ret | &Inst::EpiloguePlaceholder => MachTerminator::Ret,
             &Inst::Jump { dest } => MachTerminator::Uncond(dest.as_block_index().unwrap()),
             &Inst::CondBr {
                 taken, not_taken, ..
@@ -1687,7 +1714,7 @@ impl MachInst for Inst {
     }
 
     fn gen_zero_len_nop() -> Inst {
-        Inst::Nop
+        Inst::Nop0
     }
 
     fn gen_nop(preferred_size: usize) -> Inst {
@@ -1704,7 +1731,6 @@ impl MachInst for Inst {
         match ty {
             I8 | I16 | I32 | I64 | B1 | B8 | B16 | B32 | B64 => RegClass::I64,
             F32 | F64 => RegClass::V128,
-            I128 | B128 => RegClass::V128,
             IFLAGS | FFLAGS => RegClass::I64,
             _ => panic!("Unexpected SSA-value type: {}", ty),
         }
@@ -1750,7 +1776,7 @@ impl MachInst for Inst {
                 if taken.as_block_index() == fallthrough
                     && not_taken.as_block_index() == fallthrough
                 {
-                    *self = Inst::Nop;
+                    *self = Inst::Nop0;
                 } else if taken.as_block_index() == fallthrough {
                     *self = Inst::CondBrLowered {
                         target: not_taken,
@@ -1772,7 +1798,7 @@ impl MachInst for Inst {
             }
             &mut Inst::Jump { dest } => {
                 if dest.as_block_index() == fallthrough {
-                    *self = Inst::Nop;
+                    *self = Inst::Nop0;
                 }
             }
             _ => {}
@@ -1831,55 +1857,55 @@ fn mem_finalize_for_show(mem: &MemArg, mb_rru: Option<&RealRegUniverse>) -> (Str
 
 impl ShowWithRRU for Inst {
     fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
-        fn op_is32(alu_op: ALUOp) -> (&'static str, bool) {
+        fn op_name_size(alu_op: ALUOp) -> (&'static str, InstSize) {
             match alu_op {
-                ALUOp::Add32 => ("add", true),
-                ALUOp::Add64 => ("add", false),
-                ALUOp::Sub32 => ("sub", true),
-                ALUOp::Sub64 => ("sub", false),
-                ALUOp::Orr32 => ("orr", true),
-                ALUOp::Orr64 => ("orr", false),
-                ALUOp::And32 => ("and", true),
-                ALUOp::And64 => ("and", false),
-                ALUOp::Eor32 => ("eor", true),
-                ALUOp::Eor64 => ("eor", false),
-                ALUOp::AddS32 => ("adds", true),
-                ALUOp::AddS64 => ("adds", false),
-                ALUOp::SubS32 => ("subs", true),
-                ALUOp::SubS64 => ("subs", false),
-                ALUOp::MAdd32 => ("madd", true),
-                ALUOp::MAdd64 => ("madd", false),
-                ALUOp::MSub32 => ("msub", true),
-                ALUOp::MSub64 => ("msub", false),
-                ALUOp::SMulH => ("smulh", false),
-                ALUOp::UMulH => ("umulh", false),
-                ALUOp::SDiv64 => ("sdiv", false),
-                ALUOp::UDiv64 => ("udiv", false),
-                ALUOp::AndNot32 => ("bic", true),
-                ALUOp::AndNot64 => ("bic", false),
-                ALUOp::OrrNot32 => ("orn", true),
-                ALUOp::OrrNot64 => ("orn", false),
-                ALUOp::EorNot32 => ("eon", true),
-                ALUOp::EorNot64 => ("eon", false),
-                ALUOp::RotR32 => ("ror", true),
-                ALUOp::RotR64 => ("ror", false),
-                ALUOp::Lsr32 => ("lsr", true),
-                ALUOp::Lsr64 => ("lsr", false),
-                ALUOp::Asr32 => ("asr", true),
-                ALUOp::Asr64 => ("asr", false),
-                ALUOp::Lsl32 => ("lsl", true),
-                ALUOp::Lsl64 => ("lsl", false),
+                ALUOp::Add32 => ("add", InstSize::Size32),
+                ALUOp::Add64 => ("add", InstSize::Size64),
+                ALUOp::Sub32 => ("sub", InstSize::Size32),
+                ALUOp::Sub64 => ("sub", InstSize::Size64),
+                ALUOp::Orr32 => ("orr", InstSize::Size32),
+                ALUOp::Orr64 => ("orr", InstSize::Size64),
+                ALUOp::And32 => ("and", InstSize::Size32),
+                ALUOp::And64 => ("and", InstSize::Size64),
+                ALUOp::Eor32 => ("eor", InstSize::Size32),
+                ALUOp::Eor64 => ("eor", InstSize::Size64),
+                ALUOp::AddS32 => ("adds", InstSize::Size32),
+                ALUOp::AddS64 => ("adds", InstSize::Size64),
+                ALUOp::SubS32 => ("subs", InstSize::Size32),
+                ALUOp::SubS64 => ("subs", InstSize::Size64),
+                ALUOp::MAdd32 => ("madd", InstSize::Size32),
+                ALUOp::MAdd64 => ("madd", InstSize::Size64),
+                ALUOp::MSub32 => ("msub", InstSize::Size32),
+                ALUOp::MSub64 => ("msub", InstSize::Size64),
+                ALUOp::SMulH => ("smulh", InstSize::Size64),
+                ALUOp::UMulH => ("umulh", InstSize::Size64),
+                ALUOp::SDiv64 => ("sdiv", InstSize::Size64),
+                ALUOp::UDiv64 => ("udiv", InstSize::Size64),
+                ALUOp::AndNot32 => ("bic", InstSize::Size32),
+                ALUOp::AndNot64 => ("bic", InstSize::Size64),
+                ALUOp::OrrNot32 => ("orn", InstSize::Size32),
+                ALUOp::OrrNot64 => ("orn", InstSize::Size64),
+                ALUOp::EorNot32 => ("eon", InstSize::Size32),
+                ALUOp::EorNot64 => ("eon", InstSize::Size64),
+                ALUOp::RotR32 => ("ror", InstSize::Size32),
+                ALUOp::RotR64 => ("ror", InstSize::Size64),
+                ALUOp::Lsr32 => ("lsr", InstSize::Size32),
+                ALUOp::Lsr64 => ("lsr", InstSize::Size64),
+                ALUOp::Asr32 => ("asr", InstSize::Size32),
+                ALUOp::Asr64 => ("asr", InstSize::Size64),
+                ALUOp::Lsl32 => ("lsl", InstSize::Size32),
+                ALUOp::Lsl64 => ("lsl", InstSize::Size64),
             }
         }
 
         match self {
-            &Inst::Nop => "nop-zero-len".to_string(),
+            &Inst::Nop0 => "nop-zero-len".to_string(),
             &Inst::Nop4 => "nop".to_string(),
             &Inst::AluRRR { alu_op, rd, rn, rm } => {
-                let (op, is32) = op_is32(alu_op);
-                let rd = show_ireg_sized(rd.to_reg(), mb_rru, is32);
-                let rn = show_ireg_sized(rn, mb_rru, is32);
-                let rm = show_ireg_sized(rm, mb_rru, is32);
+                let (op, size) = op_name_size(alu_op);
+                let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
+                let rn = show_ireg_sized(rn, mb_rru, size);
+                let rm = show_ireg_sized(rm, mb_rru, size);
                 format!("{} {}, {}, {}", op, rd, rn, rm)
             }
             &Inst::AluRRRR {
@@ -1889,12 +1915,12 @@ impl ShowWithRRU for Inst {
                 rm,
                 ra,
             } => {
-                let (op, is32) = op_is32(alu_op);
+                let (op, size) = op_name_size(alu_op);
                 let four_args = alu_op != ALUOp::SMulH && alu_op != ALUOp::UMulH;
-                let rd = show_ireg_sized(rd.to_reg(), mb_rru, is32);
-                let rn = show_ireg_sized(rn, mb_rru, is32);
-                let rm = show_ireg_sized(rm, mb_rru, is32);
-                let ra = show_ireg_sized(ra, mb_rru, is32);
+                let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
+                let rn = show_ireg_sized(rn, mb_rru, size);
+                let rm = show_ireg_sized(rm, mb_rru, size);
+                let ra = show_ireg_sized(ra, mb_rru, size);
                 if four_args {
                     format!("{} {}, {}, {}, {}", op, rd, rn, rm, ra)
                 } else {
@@ -1909,9 +1935,9 @@ impl ShowWithRRU for Inst {
                 rn,
                 ref imm12,
             } => {
-                let (op, is32) = op_is32(alu_op);
-                let rd = show_ireg_sized(rd.to_reg(), mb_rru, is32);
-                let rn = show_ireg_sized(rn, mb_rru, is32);
+                let (op, size) = op_name_size(alu_op);
+                let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
+                let rn = show_ireg_sized(rn, mb_rru, size);
 
                 if imm12.bits == 0 && alu_op == ALUOp::Add64 {
                     // special-case MOV (used for moving into SP).
@@ -1927,9 +1953,9 @@ impl ShowWithRRU for Inst {
                 rn,
                 ref imml,
             } => {
-                let (op, is32) = op_is32(alu_op);
-                let rd = show_ireg_sized(rd.to_reg(), mb_rru, is32);
-                let rn = show_ireg_sized(rn, mb_rru, is32);
+                let (op, size) = op_name_size(alu_op);
+                let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
+                let rn = show_ireg_sized(rn, mb_rru, size);
                 let imml = imml.show_rru(mb_rru);
                 format!("{} {}, {}, {}", op, rd, rn, imml)
             }
@@ -1939,9 +1965,9 @@ impl ShowWithRRU for Inst {
                 rn,
                 ref immshift,
             } => {
-                let (op, is32) = op_is32(alu_op);
-                let rd = show_ireg_sized(rd.to_reg(), mb_rru, is32);
-                let rn = show_ireg_sized(rn, mb_rru, is32);
+                let (op, size) = op_name_size(alu_op);
+                let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
+                let rn = show_ireg_sized(rn, mb_rru, size);
                 let immshift = immshift.show_rru(mb_rru);
                 format!("{} {}, {}, {}", op, rd, rn, immshift)
             }
@@ -1952,10 +1978,10 @@ impl ShowWithRRU for Inst {
                 rm,
                 ref shiftop,
             } => {
-                let (op, is32) = op_is32(alu_op);
-                let rd = show_ireg_sized(rd.to_reg(), mb_rru, is32);
-                let rn = show_ireg_sized(rn, mb_rru, is32);
-                let rm = show_ireg_sized(rm, mb_rru, is32);
+                let (op, size) = op_name_size(alu_op);
+                let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
+                let rn = show_ireg_sized(rn, mb_rru, size);
+                let rm = show_ireg_sized(rm, mb_rru, size);
                 let shiftop = shiftop.show_rru(mb_rru);
                 format!("{} {}, {}, {}, {}", op, rd, rn, rm, shiftop)
             }
@@ -1966,18 +1992,18 @@ impl ShowWithRRU for Inst {
                 rm,
                 ref extendop,
             } => {
-                let (op, is32) = op_is32(alu_op);
-                let rd = show_ireg_sized(rd.to_reg(), mb_rru, is32);
-                let rn = show_ireg_sized(rn, mb_rru, is32);
-                let rm = show_ireg_sized(rm, mb_rru, is32);
+                let (op, size) = op_name_size(alu_op);
+                let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
+                let rn = show_ireg_sized(rn, mb_rru, size);
+                let rm = show_ireg_sized(rm, mb_rru, size);
                 let extendop = extendop.show_rru(mb_rru);
                 format!("{} {}, {}, {}, {}", op, rd, rn, rm, extendop)
             }
             &Inst::BitRR { op, rd, rn } => {
-                let is32 = op.is_32_bit();
+                let size = op.inst_size();
                 let op = op.op_str();
-                let rd = show_ireg_sized(rd.to_reg(), mb_rru, is32);
-                let rn = show_ireg_sized(rn, mb_rru, is32);
+                let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
+                let rn = show_ireg_sized(rn, mb_rru, size);
                 format!("{} {}, {}", op, rd, rn)
             }
             &Inst::ULoad8 {
@@ -2022,24 +2048,24 @@ impl ShowWithRRU for Inst {
                     &MemArg::Unscaled(..) => true,
                     _ => false,
                 };
-                let (op, is32) = match (self, is_unscaled) {
-                    (&Inst::ULoad8 { .. }, false) => ("ldrb", true),
-                    (&Inst::ULoad8 { .. }, true) => ("ldurb", true),
-                    (&Inst::SLoad8 { .. }, false) => ("ldrsb", false),
-                    (&Inst::SLoad8 { .. }, true) => ("ldursb", false),
-                    (&Inst::ULoad16 { .. }, false) => ("ldrh", true),
-                    (&Inst::ULoad16 { .. }, true) => ("ldurh", true),
-                    (&Inst::SLoad16 { .. }, false) => ("ldrsh", false),
-                    (&Inst::SLoad16 { .. }, true) => ("ldursh", false),
-                    (&Inst::ULoad32 { .. }, false) => ("ldr", true),
-                    (&Inst::ULoad32 { .. }, true) => ("ldur", true),
-                    (&Inst::SLoad32 { .. }, false) => ("ldrsw", false),
-                    (&Inst::SLoad32 { .. }, true) => ("ldursw", false),
-                    (&Inst::ULoad64 { .. }, false) => ("ldr", false),
-                    (&Inst::ULoad64 { .. }, true) => ("ldur", false),
+                let (op, size) = match (self, is_unscaled) {
+                    (&Inst::ULoad8 { .. }, false) => ("ldrb", InstSize::Size32),
+                    (&Inst::ULoad8 { .. }, true) => ("ldurb", InstSize::Size32),
+                    (&Inst::SLoad8 { .. }, false) => ("ldrsb", InstSize::Size64),
+                    (&Inst::SLoad8 { .. }, true) => ("ldursb", InstSize::Size64),
+                    (&Inst::ULoad16 { .. }, false) => ("ldrh", InstSize::Size32),
+                    (&Inst::ULoad16 { .. }, true) => ("ldurh", InstSize::Size32),
+                    (&Inst::SLoad16 { .. }, false) => ("ldrsh", InstSize::Size64),
+                    (&Inst::SLoad16 { .. }, true) => ("ldursh", InstSize::Size64),
+                    (&Inst::ULoad32 { .. }, false) => ("ldr", InstSize::Size32),
+                    (&Inst::ULoad32 { .. }, true) => ("ldur", InstSize::Size32),
+                    (&Inst::SLoad32 { .. }, false) => ("ldrsw", InstSize::Size64),
+                    (&Inst::SLoad32 { .. }, true) => ("ldursw", InstSize::Size64),
+                    (&Inst::ULoad64 { .. }, false) => ("ldr", InstSize::Size64),
+                    (&Inst::ULoad64 { .. }, true) => ("ldur", InstSize::Size64),
                     _ => unreachable!(),
                 };
-                let rd = show_ireg_sized(rd.to_reg(), mb_rru, is32);
+                let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
                 let mem = mem.show_rru(mb_rru);
                 format!("{}{} {}, {}", mem_str, op, rd, mem)
             }
@@ -2070,18 +2096,18 @@ impl ShowWithRRU for Inst {
                     &MemArg::Unscaled(..) => true,
                     _ => false,
                 };
-                let (op, is32) = match (self, is_unscaled) {
-                    (&Inst::Store8 { .. }, false) => ("strb", true),
-                    (&Inst::Store8 { .. }, true) => ("sturb", true),
-                    (&Inst::Store16 { .. }, false) => ("strh", true),
-                    (&Inst::Store16 { .. }, true) => ("sturh", true),
-                    (&Inst::Store32 { .. }, false) => ("str", true),
-                    (&Inst::Store32 { .. }, true) => ("stur", true),
-                    (&Inst::Store64 { .. }, false) => ("str", false),
-                    (&Inst::Store64 { .. }, true) => ("stur", false),
+                let (op, size) = match (self, is_unscaled) {
+                    (&Inst::Store8 { .. }, false) => ("strb", InstSize::Size32),
+                    (&Inst::Store8 { .. }, true) => ("sturb", InstSize::Size32),
+                    (&Inst::Store16 { .. }, false) => ("strh", InstSize::Size32),
+                    (&Inst::Store16 { .. }, true) => ("sturh", InstSize::Size32),
+                    (&Inst::Store32 { .. }, false) => ("str", InstSize::Size32),
+                    (&Inst::Store32 { .. }, true) => ("stur", InstSize::Size32),
+                    (&Inst::Store64 { .. }, false) => ("str", InstSize::Size64),
+                    (&Inst::Store64 { .. }, true) => ("stur", InstSize::Size64),
                     _ => unreachable!(),
                 };
-                let rd = show_ireg_sized(rd, mb_rru, is32);
+                let rd = show_ireg_sized(rd, mb_rru, size);
                 let mem = mem.show_rru(mb_rru);
                 format!("{}{} {}, {}", mem_str, op, rd, mem)
             }
@@ -2103,8 +2129,8 @@ impl ShowWithRRU for Inst {
                 format!("mov {}, {}", rd, rm)
             }
             &Inst::Mov32 { rd, rm } => {
-                let rd = show_ireg_sized(rd.to_reg(), mb_rru, /* is32 = */ true);
-                let rm = show_ireg_sized(rm, mb_rru, /* is32 = */ true);
+                let rd = show_ireg_sized(rd.to_reg(), mb_rru, InstSize::Size32);
+                let rm = show_ireg_sized(rm, mb_rru, InstSize::Size32);
                 format!("mov {}, {}", rd, rm)
             }
             &Inst::MovZ { rd, ref imm } => {
@@ -2140,38 +2166,38 @@ impl ShowWithRRU for Inst {
                 format!("mov {}.8b, {}.8b", rd, rn)
             }
             &Inst::FpuRR { fpu_op, rd, rn } => {
-                let (op, is32src, is32dst) = match fpu_op {
-                    FPUOp1::Abs32 => ("fabs", true, true),
-                    FPUOp1::Abs64 => ("fabs", false, false),
-                    FPUOp1::Neg32 => ("fneg", true, true),
-                    FPUOp1::Neg64 => ("fneg", false, false),
-                    FPUOp1::Sqrt32 => ("fsqrt", true, true),
-                    FPUOp1::Sqrt64 => ("fsqrt", false, false),
-                    FPUOp1::Cvt32To64 => ("fcvt", true, false),
-                    FPUOp1::Cvt64To32 => ("fcvt", false, true),
+                let (op, sizesrc, sizedest) = match fpu_op {
+                    FPUOp1::Abs32 => ("fabs", InstSize::Size32, InstSize::Size32),
+                    FPUOp1::Abs64 => ("fabs", InstSize::Size64, InstSize::Size64),
+                    FPUOp1::Neg32 => ("fneg", InstSize::Size32, InstSize::Size32),
+                    FPUOp1::Neg64 => ("fneg", InstSize::Size64, InstSize::Size64),
+                    FPUOp1::Sqrt32 => ("fsqrt", InstSize::Size32, InstSize::Size32),
+                    FPUOp1::Sqrt64 => ("fsqrt", InstSize::Size64, InstSize::Size64),
+                    FPUOp1::Cvt32To64 => ("fcvt", InstSize::Size32, InstSize::Size64),
+                    FPUOp1::Cvt64To32 => ("fcvt", InstSize::Size64, InstSize::Size32),
                 };
-                let rd = show_freg_sized(rd.to_reg(), mb_rru, is32dst);
-                let rn = show_freg_sized(rn, mb_rru, is32src);
+                let rd = show_freg_sized(rd.to_reg(), mb_rru, sizedest);
+                let rn = show_freg_sized(rn, mb_rru, sizesrc);
                 format!("{} {}, {}", op, rd, rn)
             }
             &Inst::FpuRRR { fpu_op, rd, rn, rm } => {
-                let (op, is32) = match fpu_op {
-                    FPUOp2::Add32 => ("fadd", true),
-                    FPUOp2::Add64 => ("fadd", false),
-                    FPUOp2::Sub32 => ("fsub", true),
-                    FPUOp2::Sub64 => ("fsub", false),
-                    FPUOp2::Mul32 => ("fmul", true),
-                    FPUOp2::Mul64 => ("fmul", false),
-                    FPUOp2::Div32 => ("fdiv", true),
-                    FPUOp2::Div64 => ("fdiv", false),
-                    FPUOp2::Max32 => ("fmax", true),
-                    FPUOp2::Max64 => ("fmax", false),
-                    FPUOp2::Min32 => ("fmin", true),
-                    FPUOp2::Min64 => ("fmin", false),
+                let (op, size) = match fpu_op {
+                    FPUOp2::Add32 => ("fadd", InstSize::Size32),
+                    FPUOp2::Add64 => ("fadd", InstSize::Size64),
+                    FPUOp2::Sub32 => ("fsub", InstSize::Size32),
+                    FPUOp2::Sub64 => ("fsub", InstSize::Size64),
+                    FPUOp2::Mul32 => ("fmul", InstSize::Size32),
+                    FPUOp2::Mul64 => ("fmul", InstSize::Size64),
+                    FPUOp2::Div32 => ("fdiv", InstSize::Size32),
+                    FPUOp2::Div64 => ("fdiv", InstSize::Size64),
+                    FPUOp2::Max32 => ("fmax", InstSize::Size32),
+                    FPUOp2::Max64 => ("fmax", InstSize::Size64),
+                    FPUOp2::Min32 => ("fmin", InstSize::Size32),
+                    FPUOp2::Min64 => ("fmin", InstSize::Size64),
                 };
-                let rd = show_freg_sized(rd.to_reg(), mb_rru, is32);
-                let rn = show_freg_sized(rn, mb_rru, is32);
-                let rm = show_freg_sized(rm, mb_rru, is32);
+                let rd = show_freg_sized(rd.to_reg(), mb_rru, size);
+                let rn = show_freg_sized(rn, mb_rru, size);
+                let rm = show_freg_sized(rm, mb_rru, size);
                 format!("{} {}, {}, {}", op, rd, rn, rm)
             }
             &Inst::FpuRRRR {
@@ -2181,33 +2207,33 @@ impl ShowWithRRU for Inst {
                 rm,
                 ra,
             } => {
-                let (op, is32) = match fpu_op {
-                    FPUOp3::MAdd32 => ("fmadd", true),
-                    FPUOp3::MAdd64 => ("fmadd", false),
+                let (op, size) = match fpu_op {
+                    FPUOp3::MAdd32 => ("fmadd", InstSize::Size32),
+                    FPUOp3::MAdd64 => ("fmadd", InstSize::Size64),
                 };
-                let rd = show_freg_sized(rd.to_reg(), mb_rru, is32);
-                let rn = show_freg_sized(rn, mb_rru, is32);
-                let rm = show_freg_sized(rm, mb_rru, is32);
-                let ra = show_freg_sized(ra, mb_rru, is32);
+                let rd = show_freg_sized(rd.to_reg(), mb_rru, size);
+                let rn = show_freg_sized(rn, mb_rru, size);
+                let rm = show_freg_sized(rm, mb_rru, size);
+                let ra = show_freg_sized(ra, mb_rru, size);
                 format!("{} {}, {}, {}, {}", op, rd, rn, rm, ra)
             }
             &Inst::FpuCmp32 { rn, rm } => {
-                let rn = show_freg_sized(rn, mb_rru, /* is32 = */ true);
-                let rm = show_freg_sized(rm, mb_rru, /* is32 = */ true);
+                let rn = show_freg_sized(rn, mb_rru, InstSize::Size32);
+                let rm = show_freg_sized(rm, mb_rru, InstSize::Size32);
                 format!("fcmp {}, {}", rn, rm)
             }
             &Inst::FpuCmp64 { rn, rm } => {
-                let rn = show_freg_sized(rn, mb_rru, /* is32 = */ false);
-                let rm = show_freg_sized(rm, mb_rru, /* is32 = */ false);
+                let rn = show_freg_sized(rn, mb_rru, InstSize::Size64);
+                let rm = show_freg_sized(rm, mb_rru, InstSize::Size64);
                 format!("fcmp {}, {}", rn, rm)
             }
             &Inst::FpuLoad32 { rd, ref mem, .. } => {
-                let rd = show_freg_sized(rd.to_reg(), mb_rru, /* is32 = */ true);
+                let rd = show_freg_sized(rd.to_reg(), mb_rru, InstSize::Size32);
                 let mem = mem.show_rru_sized(mb_rru, /* size = */ 4);
                 format!("ldr {}, {}", rd, mem)
             }
             &Inst::FpuLoad64 { rd, ref mem, .. } => {
-                let rd = show_freg_sized(rd.to_reg(), mb_rru, /* is32 = */ false);
+                let rd = show_freg_sized(rd.to_reg(), mb_rru, InstSize::Size64);
                 let mem = mem.show_rru_sized(mb_rru, /* size = */ 8);
                 format!("ldr {}, {}", rd, mem)
             }
@@ -2218,12 +2244,12 @@ impl ShowWithRRU for Inst {
                 format!("ldr {}, {}", rd, mem)
             }
             &Inst::FpuStore32 { rd, ref mem, .. } => {
-                let rd = show_freg_sized(rd, mb_rru, /* is32 = */ true);
+                let rd = show_freg_sized(rd, mb_rru, InstSize::Size32);
                 let mem = mem.show_rru_sized(mb_rru, /* size = */ 4);
                 format!("str {}, {}", rd, mem)
             }
             &Inst::FpuStore64 { rd, ref mem, .. } => {
-                let rd = show_freg_sized(rd, mb_rru, /* is32 = */ false);
+                let rd = show_freg_sized(rd, mb_rru, InstSize::Size64);
                 let mem = mem.show_rru_sized(mb_rru, /* size = */ 8);
                 format!("str {}, {}", rd, mem)
             }
@@ -2234,70 +2260,70 @@ impl ShowWithRRU for Inst {
                 format!("str {}, {}", rd, mem)
             }
             &Inst::LoadFpuConst32 { rd, const_data } => {
-                let rd = show_freg_sized(rd.to_reg(), mb_rru, /* is32 = */ true);
+                let rd = show_freg_sized(rd.to_reg(), mb_rru, InstSize::Size32);
                 format!("ldr {}, pc+8 ; b 8 ; data.f32 {}", rd, const_data)
             }
             &Inst::LoadFpuConst64 { rd, const_data } => {
-                let rd = show_freg_sized(rd.to_reg(), mb_rru, /* is32 = */ false);
+                let rd = show_freg_sized(rd.to_reg(), mb_rru, InstSize::Size64);
                 format!("ldr {}, pc+8 ; b 12 ; data.f64 {}", rd, const_data)
             }
             &Inst::FpuToInt { op, rd, rn } => {
-                let (op, is32src, is32dest) = match op {
-                    FpuToIntOp::F32ToI32 => ("fcvtzs", true, true),
-                    FpuToIntOp::F32ToU32 => ("fcvtzu", true, true),
-                    FpuToIntOp::F32ToI64 => ("fcvtzs", true, false),
-                    FpuToIntOp::F32ToU64 => ("fcvtzu", true, false),
-                    FpuToIntOp::F64ToI32 => ("fcvtzs", false, true),
-                    FpuToIntOp::F64ToU32 => ("fcvtzu", false, true),
-                    FpuToIntOp::F64ToI64 => ("fcvtzs", false, false),
-                    FpuToIntOp::F64ToU64 => ("fcvtzu", false, false),
+                let (op, sizesrc, sizedest) = match op {
+                    FpuToIntOp::F32ToI32 => ("fcvtzs", InstSize::Size32, InstSize::Size32),
+                    FpuToIntOp::F32ToU32 => ("fcvtzu", InstSize::Size32, InstSize::Size32),
+                    FpuToIntOp::F32ToI64 => ("fcvtzs", InstSize::Size32, InstSize::Size64),
+                    FpuToIntOp::F32ToU64 => ("fcvtzu", InstSize::Size32, InstSize::Size64),
+                    FpuToIntOp::F64ToI32 => ("fcvtzs", InstSize::Size64, InstSize::Size32),
+                    FpuToIntOp::F64ToU32 => ("fcvtzu", InstSize::Size64, InstSize::Size32),
+                    FpuToIntOp::F64ToI64 => ("fcvtzs", InstSize::Size64, InstSize::Size64),
+                    FpuToIntOp::F64ToU64 => ("fcvtzu", InstSize::Size64, InstSize::Size64),
                 };
-                let rd = show_ireg_sized(rd.to_reg(), mb_rru, is32dest);
-                let rn = show_freg_sized(rn, mb_rru, is32src);
+                let rd = show_ireg_sized(rd.to_reg(), mb_rru, sizedest);
+                let rn = show_freg_sized(rn, mb_rru, sizesrc);
                 format!("{} {}, {}", op, rd, rn)
             }
             &Inst::IntToFpu { op, rd, rn } => {
-                let (op, is32src, is32dest) = match op {
-                    IntToFpuOp::I32ToF32 => ("scvtf", true, true),
-                    IntToFpuOp::U32ToF32 => ("ucvtf", true, true),
-                    IntToFpuOp::I64ToF32 => ("scvtf", false, true),
-                    IntToFpuOp::U64ToF32 => ("ucvtf", false, true),
-                    IntToFpuOp::I32ToF64 => ("scvtf", true, false),
-                    IntToFpuOp::U32ToF64 => ("ucvtf", true, false),
-                    IntToFpuOp::I64ToF64 => ("scvtf", false, false),
-                    IntToFpuOp::U64ToF64 => ("ucvtf", false, false),
+                let (op, sizesrc, sizedest) = match op {
+                    IntToFpuOp::I32ToF32 => ("scvtf", InstSize::Size32, InstSize::Size32),
+                    IntToFpuOp::U32ToF32 => ("ucvtf", InstSize::Size32, InstSize::Size32),
+                    IntToFpuOp::I64ToF32 => ("scvtf", InstSize::Size64, InstSize::Size32),
+                    IntToFpuOp::U64ToF32 => ("ucvtf", InstSize::Size64, InstSize::Size32),
+                    IntToFpuOp::I32ToF64 => ("scvtf", InstSize::Size32, InstSize::Size64),
+                    IntToFpuOp::U32ToF64 => ("ucvtf", InstSize::Size32, InstSize::Size64),
+                    IntToFpuOp::I64ToF64 => ("scvtf", InstSize::Size64, InstSize::Size64),
+                    IntToFpuOp::U64ToF64 => ("ucvtf", InstSize::Size64, InstSize::Size64),
                 };
-                let rd = show_freg_sized(rd.to_reg(), mb_rru, is32dest);
-                let rn = show_ireg_sized(rn, mb_rru, is32src);
+                let rd = show_freg_sized(rd.to_reg(), mb_rru, sizedest);
+                let rn = show_ireg_sized(rn, mb_rru, sizesrc);
                 format!("{} {}, {}", op, rd, rn)
             }
             &Inst::FpuCSel32 { rd, rn, rm, cond } => {
-                let rd = show_freg_sized(rd.to_reg(), mb_rru, /* is32 = */ true);
-                let rn = show_freg_sized(rn, mb_rru, /* is32 = */ true);
-                let rm = show_freg_sized(rm, mb_rru, /* is32 = */ true);
+                let rd = show_freg_sized(rd.to_reg(), mb_rru, InstSize::Size32);
+                let rn = show_freg_sized(rn, mb_rru, InstSize::Size32);
+                let rm = show_freg_sized(rm, mb_rru, InstSize::Size32);
                 let cond = cond.show_rru(mb_rru);
                 format!("fcsel {}, {}, {}, {}", rd, rn, rm, cond)
             }
             &Inst::FpuCSel64 { rd, rn, rm, cond } => {
-                let rd = show_freg_sized(rd.to_reg(), mb_rru, /* is32 = */ false);
-                let rn = show_freg_sized(rn, mb_rru, /* is32 = */ false);
-                let rm = show_freg_sized(rm, mb_rru, /* is32 = */ false);
+                let rd = show_freg_sized(rd.to_reg(), mb_rru, InstSize::Size64);
+                let rn = show_freg_sized(rn, mb_rru, InstSize::Size64);
+                let rm = show_freg_sized(rm, mb_rru, InstSize::Size64);
                 let cond = cond.show_rru(mb_rru);
                 format!("fcsel {}, {}, {}, {}", rd, rn, rm, cond)
             }
             &Inst::FpuRound { op, rd, rn } => {
-                let (inst, is32) = match op {
-                    FpuRoundMode::Minus32 => ("frintm", true),
-                    FpuRoundMode::Minus64 => ("frintm", false),
-                    FpuRoundMode::Plus32 => ("frintp", true),
-                    FpuRoundMode::Plus64 => ("frintp", false),
-                    FpuRoundMode::Zero32 => ("frintz", true),
-                    FpuRoundMode::Zero64 => ("frintz", false),
-                    FpuRoundMode::Nearest32 => ("frintn", true),
-                    FpuRoundMode::Nearest64 => ("frintn", false),
+                let (inst, size) = match op {
+                    FpuRoundMode::Minus32 => ("frintm", InstSize::Size32),
+                    FpuRoundMode::Minus64 => ("frintm", InstSize::Size64),
+                    FpuRoundMode::Plus32 => ("frintp", InstSize::Size32),
+                    FpuRoundMode::Plus64 => ("frintp", InstSize::Size64),
+                    FpuRoundMode::Zero32 => ("frintz", InstSize::Size32),
+                    FpuRoundMode::Zero64 => ("frintz", InstSize::Size64),
+                    FpuRoundMode::Nearest32 => ("frintn", InstSize::Size32),
+                    FpuRoundMode::Nearest64 => ("frintn", InstSize::Size64),
                 };
-                let rd = show_freg_sized(rd.to_reg(), mb_rru, is32);
-                let rn = show_freg_sized(rn, mb_rru, is32);
+                let rd = show_freg_sized(rd.to_reg(), mb_rru, size);
+                let rn = show_freg_sized(rn, mb_rru, size);
                 format!("{} {}, {}", inst, rd, rn)
             }
             &Inst::MovToVec64 { rd, rn } => {
@@ -2346,13 +2372,13 @@ impl ShowWithRRU for Inst {
                 // extend-to width is <= 32 bits, *unless* we have an unsigned
                 // 32-to-64-bit extension, which is implemented with a "mov" to a
                 // 32-bit (W-reg) dest, because this zeroes the top 32 bits.
-                let dest_is32 = if !signed && from_bits == 32 && to_bits == 64 {
-                    true
+                let dest_size = if !signed && from_bits == 32 && to_bits == 64 {
+                    InstSize::Size32
                 } else {
-                    to_bits <= 32
+                    InstSize::from_bits(to_bits)
                 };
-                let rd = show_ireg_sized(rd.to_reg(), mb_rru, dest_is32);
-                let rn = show_ireg_sized(rn, mb_rru, from_bits <= 32);
+                let rd = show_ireg_sized(rd.to_reg(), mb_rru, dest_size);
+                let rn = show_ireg_sized(rn, mb_rru, InstSize::from_bits(from_bits));
                 let op = match (signed, from_bits, to_bits) {
                     (false, 8, 32) => "uxtb",
                     (true, 8, 32) => "sxtb",
@@ -2375,11 +2401,11 @@ impl ShowWithRRU for Inst {
                 from_bits,
                 to_bits,
             } if from_bits == 1 && signed => {
-                let dest_is32 = to_bits <= 32;
-                let zr = if dest_is32 { "wzr" } else { "xzr" };
-                let rd32 = show_ireg_sized(rd.to_reg(), mb_rru, /* is32 = */ true);
-                let rd = show_ireg_sized(rd.to_reg(), mb_rru, dest_is32);
-                let rn = show_ireg_sized(rn, mb_rru, /* is32 = */ true);
+                let dest_size = InstSize::from_bits(to_bits);
+                let zr = if dest_size.is32() { "wzr" } else { "xzr" };
+                let rd32 = show_ireg_sized(rd.to_reg(), mb_rru, InstSize::Size32);
+                let rd = show_ireg_sized(rd.to_reg(), mb_rru, dest_size);
+                let rn = show_ireg_sized(rn, mb_rru, InstSize::Size32);
                 format!("and {}, {}, #1 ; sub {}, {}, {}", rd32, rn, rd, zr, rd)
             }
             &Inst::Extend {
@@ -2389,8 +2415,8 @@ impl ShowWithRRU for Inst {
                 from_bits,
                 ..
             } if from_bits == 1 && !signed => {
-                let rd = show_ireg_sized(rd.to_reg(), mb_rru, /* is32 = */ true);
-                let rn = show_ireg_sized(rn, mb_rru, /* is32 = */ true);
+                let rd = show_ireg_sized(rd.to_reg(), mb_rru, InstSize::Size32);
+                let rn = show_ireg_sized(rn, mb_rru, InstSize::Size32);
                 format!("and {}, {}, #1", rd, rn)
             }
             &Inst::Extend { .. } => {
@@ -2401,8 +2427,8 @@ impl ShowWithRRU for Inst {
                 let rn = rn.show_rru(mb_rru);
                 format!("blr {}", rn)
             }
-            &Inst::Ret {} => "ret".to_string(),
-            &Inst::EpiloguePlaceholder {} => "epilogue placeholder".to_string(),
+            &Inst::Ret => "ret".to_string(),
+            &Inst::EpiloguePlaceholder => "epilogue placeholder".to_string(),
             &Inst::Jump { ref dest } => {
                 let dest = dest.show_rru(mb_rru);
                 format!("b {}", dest)
diff --git a/cranelift/codegen/src/isa/arm64/inst/regs.rs b/cranelift/codegen/src/isa/aarch64/inst/regs.rs
similarity index 90%
rename from cranelift/codegen/src/isa/arm64/inst/regs.rs
rename to cranelift/codegen/src/isa/aarch64/inst/regs.rs
index 31a915410a..b675d7f4d7 100644
--- a/cranelift/codegen/src/isa/arm64/inst/regs.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/regs.rs
@@ -1,13 +1,9 @@
-//! ARM64 ISA definitions: registers.
-
-#![allow(dead_code)]
+//! AArch64 ISA definitions: registers.
 
+use crate::isa::aarch64::inst::InstSize;
 use crate::machinst::*;
 
-use regalloc::{
-    RealReg, RealRegUniverse, Reg, RegClass, RegClassInfo, SpillSlot, VirtualReg, Writable,
-    NUM_REG_CLASSES,
-};
+use regalloc::{RealRegUniverse, Reg, RegClass, RegClassInfo, Writable, NUM_REG_CLASSES};
 
 use std::string::{String, ToString};
 
@@ -83,7 +79,7 @@ pub fn writable_zero_reg() -> Writable<Reg> {
 /// Get a reference to the stack-pointer register.
 pub fn stack_reg() -> Reg {
     // XSP (stack) and XZR (zero) are logically different registers which have
-    // the same hardware encoding, and whose meaning, in real arm64
+    // the same hardware encoding, and whose meaning, in real aarch64
     // instructions, is context-dependent.  For convenience of
     // universe-construction and for correct printing, we make them be two
     // different real registers.
@@ -134,7 +130,7 @@ pub fn writable_spilltmp_reg() -> Writable<Reg> {
     Writable::from_reg(spilltmp_reg())
 }
 
-/// Create the register universe for ARM64.
+/// Create the register universe for AArch64.
 pub fn create_reg_universe() -> RealRegUniverse {
     let mut regs = vec![];
     let mut allocable_by_class = [None; NUM_REG_CLASSES];
@@ -217,37 +213,38 @@ pub fn create_reg_universe() -> RealRegUniverse {
     }
 }
 
-/// If |ireg| denotes an I64-classed reg, make a best-effort attempt to show
+/// If `ireg` denotes an I64-classed reg, make a best-effort attempt to show
 /// its name at the 32-bit size.
-pub fn show_ireg_sized(reg: Reg, mb_rru: Option<&RealRegUniverse>, is32: bool) -> String {
+pub fn show_ireg_sized(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: InstSize) -> String {
     let mut s = reg.show_rru(mb_rru);
-    if reg.get_class() != RegClass::I64 || !is32 {
+    if reg.get_class() != RegClass::I64 || !size.is32() {
         // We can't do any better.
         return s;
     }
 
     if reg.is_real() {
         // Change (eg) "x42" into "w42" as appropriate
-        if reg.get_class() == RegClass::I64 && is32 && s.starts_with("x") {
+        if reg.get_class() == RegClass::I64 && size.is32() && s.starts_with("x") {
             s = "w".to_string() + &s[1..];
         }
     } else {
         // Add a "w" suffix to RegClass::I64 vregs used in a 32-bit role
-        if reg.get_class() == RegClass::I64 && is32 {
-            s = s + &"w";
+        if reg.get_class() == RegClass::I64 && size.is32() {
+            s.push('w');
         }
     }
     s
 }
 
 /// Show a vector register when its use as a 32-bit or 64-bit float is known.
-pub fn show_freg_sized(reg: Reg, mb_rru: Option<&RealRegUniverse>, is32: bool) -> String {
-    let s = reg.show_rru(mb_rru);
+pub fn show_freg_sized(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: InstSize) -> String {
+    let mut s = reg.show_rru(mb_rru);
     if reg.get_class() != RegClass::V128 {
         return s;
     }
-    let prefix = if is32 { "s" } else { "d" };
-    prefix.to_string() + &s[1..]
+    let prefix = if size.is32() { "s" } else { "d" };
+    s.replace_range(0..1, prefix);
+    s
 }
 
 /// Show a vector register used in a scalar context.
@@ -261,12 +258,12 @@ pub fn show_vreg_scalar(reg: Reg, mb_rru: Option<&RealRegUniverse>) -> String {
     if reg.is_real() {
         // Change (eg) "v0" into "d0".
         if reg.get_class() == RegClass::V128 && s.starts_with("v") {
-            s = "d".to_string() + &s[1..];
+            s.replace_range(0..1, "d");
         }
     } else {
         // Add a "d" suffix to RegClass::V128 vregs.
         if reg.get_class() == RegClass::V128 {
-            s = s + &"d";
+            s.push('d');
         }
     }
     s
diff --git a/cranelift/codegen/src/isa/arm64/lower.rs b/cranelift/codegen/src/isa/aarch64/lower.rs
similarity index 95%
rename from cranelift/codegen/src/isa/arm64/lower.rs
rename to cranelift/codegen/src/isa/aarch64/lower.rs
index 9979802c79..07a8e896e6 100644
--- a/cranelift/codegen/src/isa/arm64/lower.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower.rs
@@ -1,4 +1,4 @@
-//! Lowering rules for ARM64.
+//! Lowering rules for AArch64.
 //!
 //! TODO: opportunities for better code generation:
 //!
@@ -6,45 +6,24 @@
 //!   and incorporate sign/zero extension on indicies. Recognize pre/post-index
 //!   opportunities.
 //!
-//! - Logical-immediate args.
-//!
-//! - Floating-point immediates.
-
-#![allow(dead_code)]
+//! - Floating-point immediates (FIMM instruction).
 
 use crate::ir::condcodes::{FloatCC, IntCC};
 use crate::ir::types::*;
 use crate::ir::Inst as IRInst;
-use crate::ir::{Block, InstructionData, Opcode, TrapCode, Type};
+use crate::ir::{InstructionData, Opcode, TrapCode, Type};
 use crate::machinst::lower::*;
 use crate::machinst::*;
 
-use crate::isa::arm64::abi::*;
-use crate::isa::arm64::inst::*;
-use crate::isa::arm64::Arm64Backend;
+use crate::isa::aarch64::abi::*;
+use crate::isa::aarch64::inst::*;
+use crate::isa::aarch64::AArch64Backend;
 
 use regalloc::{Reg, RegClass, Writable};
 
 use alloc::vec::Vec;
 use smallvec::SmallVec;
 
-//============================================================================
-// Helpers: opcode conversions
-
-fn op_to_aluop(op: Opcode, ty: Type) -> Option<ALUOp> {
-    match (op, ty) {
-        (Opcode::Iadd, I32) => Some(ALUOp::Add32),
-        (Opcode::Iadd, I64) => Some(ALUOp::Add64),
-        (Opcode::Isub, I32) => Some(ALUOp::Sub32),
-        (Opcode::Isub, I64) => Some(ALUOp::Sub64),
-        _ => None,
-    }
-}
-
-fn is_alu_op(op: Opcode, ctrl_typevar: Type) -> bool {
-    op_to_aluop(op, ctrl_typevar).is_some()
-}
-
 //============================================================================
 // Result enum types.
 //
@@ -163,7 +142,7 @@ impl InsnInputSource {
     }
 }
 
-fn get_input<C: LowerCtx<Inst>>(ctx: &mut C, output: InsnOutput, num: usize) -> InsnInput {
+fn get_input<C: LowerCtx<I = Inst>>(ctx: &mut C, output: InsnOutput, num: usize) -> InsnInput {
     assert!(num <= ctx.num_inputs(output.insn));
     InsnInput {
         insn: output.insn,
@@ -173,7 +152,7 @@ fn get_input<C: LowerCtx<Inst>>(ctx: &mut C, output: InsnOutput, num: usize) ->
 
 /// Convert an instruction input to a producing instruction's output if possible (in same BB), or a
 /// register otherwise.
-fn input_source<C: LowerCtx<Inst>>(ctx: &mut C, input: InsnInput) -> InsnInputSource {
+fn input_source<C: LowerCtx<I = Inst>>(ctx: &mut C, input: InsnInput) -> InsnInputSource {
     if let Some((input_inst, result_num)) = ctx.input_inst(input.insn, input.input) {
         let out = InsnOutput {
             insn: input_inst,
@@ -190,7 +169,7 @@ fn input_source<C: LowerCtx<Inst>>(ctx: &mut C, input: InsnInput) -> InsnInputSo
 // Lowering: convert instruction outputs to result types.
 
 /// Lower an instruction output to a 64-bit constant, if possible.
-fn output_to_const<C: LowerCtx<Inst>>(ctx: &mut C, out: InsnOutput) -> Option<u64> {
+fn output_to_const<C: LowerCtx<I = Inst>>(ctx: &mut C, out: InsnOutput) -> Option<u64> {
     if out.output > 0 {
         None
     } else {
@@ -204,7 +183,7 @@ fn output_to_const<C: LowerCtx<Inst>>(ctx: &mut C, out: InsnOutput) -> Option<u6
                     let imm: i64 = imm.into();
                     Some(imm as u64)
                 }
-                &InstructionData::UnaryIeee32 { opcode: _, imm } => Some(imm.bits() as u64),
+                &InstructionData::UnaryIeee32 { opcode: _, imm } => Some(u64::from(imm.bits())),
                 &InstructionData::UnaryIeee64 { opcode: _, imm } => Some(imm.bits()),
                 _ => None,
             }
@@ -212,16 +191,19 @@ fn output_to_const<C: LowerCtx<Inst>>(ctx: &mut C, out: InsnOutput) -> Option<u6
     }
 }
 
-fn output_to_const_f32<C: LowerCtx<Inst>>(ctx: &mut C, out: InsnOutput) -> Option<f32> {
+fn output_to_const_f32<C: LowerCtx<I = Inst>>(ctx: &mut C, out: InsnOutput) -> Option<f32> {
     output_to_const(ctx, out).map(|value| f32::from_bits(value as u32))
 }
 
-fn output_to_const_f64<C: LowerCtx<Inst>>(ctx: &mut C, out: InsnOutput) -> Option<f64> {
+fn output_to_const_f64<C: LowerCtx<I = Inst>>(ctx: &mut C, out: InsnOutput) -> Option<f64> {
     output_to_const(ctx, out).map(|value| f64::from_bits(value))
 }
 
 /// Lower an instruction output to a constant register-shift amount, if possible.
-fn output_to_shiftimm<C: LowerCtx<Inst>>(ctx: &mut C, out: InsnOutput) -> Option<ShiftOpShiftImm> {
+fn output_to_shiftimm<C: LowerCtx<I = Inst>>(
+    ctx: &mut C,
+    out: InsnOutput,
+) -> Option<ShiftOpShiftImm> {
     output_to_const(ctx, out).and_then(ShiftOpShiftImm::maybe_from_shift)
 }
 
@@ -251,7 +233,7 @@ impl NarrowValueMode {
 }
 
 /// Lower an instruction output to a reg.
-fn output_to_reg<C: LowerCtx<Inst>>(ctx: &mut C, out: InsnOutput) -> Writable<Reg> {
+fn output_to_reg<C: LowerCtx<I = Inst>>(ctx: &mut C, out: InsnOutput) -> Writable<Reg> {
     ctx.output(out.insn, out.output)
 }
 
@@ -260,7 +242,7 @@ fn output_to_reg<C: LowerCtx<Inst>>(ctx: &mut C, out: InsnOutput) -> Writable<Re
 /// The given register will be extended appropriately, according to
 /// `narrow_mode` and the input's type. If extended, the value is
 /// always extended to 64 bits, for simplicity.
-fn input_to_reg<C: LowerCtx<Inst>>(
+fn input_to_reg<C: LowerCtx<I = Inst>>(
     ctx: &mut C,
     input: InsnInput,
     narrow_mode: NarrowValueMode,
@@ -292,9 +274,7 @@ fn input_to_reg<C: LowerCtx<Inst>>(
             });
             tmp.to_reg()
         }
-        (NarrowValueMode::ZeroExtend32, n) | (NarrowValueMode::SignExtend32, n) if n == 32 => {
-            in_reg
-        }
+        (NarrowValueMode::ZeroExtend32, 32) | (NarrowValueMode::SignExtend32, 32) => in_reg,
 
         (NarrowValueMode::ZeroExtend64, n) if n < 64 => {
             let tmp = ctx.tmp(RegClass::I64, I32);
@@ -318,7 +298,7 @@ fn input_to_reg<C: LowerCtx<Inst>>(
             });
             tmp.to_reg()
         }
-        (_, n) if n == 64 => in_reg,
+        (_, 64) => in_reg,
 
         _ => panic!(
             "Unsupported input width: input ty {} bits {} mode {:?}",
@@ -340,7 +320,7 @@ fn input_to_reg<C: LowerCtx<Inst>>(
 /// divide or a right-shift or a compare-to-zero), `narrow_mode` should be
 /// set to `ZeroExtend` or `SignExtend` as appropriate, and the resulting
 /// register will be provided the extended value.
-fn input_to_rs<C: LowerCtx<Inst>>(
+fn input_to_rs<C: LowerCtx<I = Inst>>(
     ctx: &mut C,
     input: InsnInput,
     narrow_mode: NarrowValueMode,
@@ -374,7 +354,7 @@ fn input_to_rs<C: LowerCtx<Inst>>(
 /// vreg into which the source instruction will generate its value.
 ///
 /// See note on `input_to_rs` for a description of `narrow_mode`.
-fn input_to_rse<C: LowerCtx<Inst>>(
+fn input_to_rse<C: LowerCtx<I = Inst>>(
     ctx: &mut C,
     input: InsnInput,
     narrow_mode: NarrowValueMode,
@@ -448,7 +428,7 @@ fn input_to_rse<C: LowerCtx<Inst>>(
     ResultRSE::from_rs(input_to_rs(ctx, input, narrow_mode))
 }
 
-fn input_to_rse_imm12<C: LowerCtx<Inst>>(
+fn input_to_rse_imm12<C: LowerCtx<I = Inst>>(
     ctx: &mut C,
     input: InsnInput,
     narrow_mode: NarrowValueMode,
@@ -465,7 +445,7 @@ fn input_to_rse_imm12<C: LowerCtx<Inst>>(
     ResultRSEImm12::from_rse(input_to_rse(ctx, input, narrow_mode))
 }
 
-fn input_to_rs_immlogic<C: LowerCtx<Inst>>(
+fn input_to_rs_immlogic<C: LowerCtx<I = Inst>>(
     ctx: &mut C,
     input: InsnInput,
     narrow_mode: NarrowValueMode,
@@ -484,7 +464,10 @@ fn input_to_rs_immlogic<C: LowerCtx<Inst>>(
     ResultRSImmLogic::from_rs(input_to_rs(ctx, input, narrow_mode))
 }
 
-fn input_to_reg_immshift<C: LowerCtx<Inst>>(ctx: &mut C, input: InsnInput) -> ResultRegImmShift {
+fn input_to_reg_immshift<C: LowerCtx<I = Inst>>(
+    ctx: &mut C,
+    input: InsnInput,
+) -> ResultRegImmShift {
     if let InsnInputSource::Output(out) = input_source(ctx, input) {
         if let Some(imm_value) = output_to_const(ctx, out) {
             if let Some(immshift) = ImmShift::maybe_from_u64(imm_value) {
@@ -577,7 +560,7 @@ fn alu_inst_immshift(op: ALUOp, rd: Writable<Reg>, rn: Reg, rm: ResultRegImmShif
 // than an `InsnInput`, to do more introspection.
 
 /// Lower the address of a load or store.
-fn lower_address<C: LowerCtx<Inst>>(
+fn lower_address<C: LowerCtx<I = Inst>>(
     ctx: &mut C,
     elem_ty: Type,
     addends: &[InsnInput],
@@ -598,7 +581,7 @@ fn lower_address<C: LowerCtx<Inst>>(
     if addends.len() == 2 && offset == 0 {
         let ra = input_to_reg(ctx, addends[0], NarrowValueMode::ZeroExtend64);
         let rb = input_to_reg(ctx, addends[1], NarrowValueMode::ZeroExtend64);
-        return MemArg::reg_reg(ra, rb);
+        return MemArg::reg_plus_reg(ra, rb);
     }
 
     // Otherwise, generate add instructions.
@@ -621,17 +604,17 @@ fn lower_address<C: LowerCtx<Inst>>(
     MemArg::reg(addr.to_reg())
 }
 
-fn lower_constant_u64<C: LowerCtx<Inst>>(ctx: &mut C, rd: Writable<Reg>, value: u64) {
+fn lower_constant_u64<C: LowerCtx<I = Inst>>(ctx: &mut C, rd: Writable<Reg>, value: u64) {
     for inst in Inst::load_constant(rd, value) {
         ctx.emit(inst);
     }
 }
 
-fn lower_constant_f32<C: LowerCtx<Inst>>(ctx: &mut C, rd: Writable<Reg>, value: f32) {
+fn lower_constant_f32<C: LowerCtx<I = Inst>>(ctx: &mut C, rd: Writable<Reg>, value: f32) {
     ctx.emit(Inst::load_fp_constant32(rd, value));
 }
 
-fn lower_constant_f64<C: LowerCtx<Inst>>(ctx: &mut C, rd: Writable<Reg>, value: f64) {
+fn lower_constant_f64<C: LowerCtx<I = Inst>>(ctx: &mut C, rd: Writable<Reg>, value: f64) {
     ctx.emit(Inst::load_fp_constant64(rd, value));
 }
 
@@ -653,7 +636,7 @@ fn lower_condcode(cc: IntCC) -> Cond {
 }
 
 fn lower_fp_condcode(cc: FloatCC) -> Cond {
-    // Refer to `codegen/shared/src/condcodes.rs` and to the `FCMP` ARM64 docs.
+    // Refer to `codegen/shared/src/condcodes.rs` and to the `FCMP` AArch64 docs.
     // The FCMP instruction sets:
     //               NZCV
     // - PCSR.NZCV = 0011 on UN (unordered),
@@ -717,7 +700,7 @@ pub fn condcode_is_signed(cc: IntCC) -> bool {
 // Top-level instruction lowering entry point, for one instruction.
 
 /// Actually codegen an instruction's results into registers.
-fn lower_insn_to_regs<C: LowerCtx<Inst>>(ctx: &mut C, insn: IRInst) {
+fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst) {
     let op = ctx.data(insn).opcode();
     let inputs: SmallVec<[InsnInput; 4]> = (0..ctx.num_inputs(insn))
         .map(|i| InsnInput { insn, input: i })
@@ -1032,13 +1015,13 @@ fn lower_insn_to_regs<C: LowerCtx<Inst>>(ctx: &mut C, insn: IRInst) {
 
         Opcode::Ishl | Opcode::Ushr | Opcode::Sshr => {
             let ty = ty.unwrap();
-            let is32 = ty_bits(ty) <= 32;
-            let narrow_mode = match (op, is32) {
+            let size = InstSize::from_bits(ty_bits(ty));
+            let narrow_mode = match (op, size) {
                 (Opcode::Ishl, _) => NarrowValueMode::None,
-                (Opcode::Ushr, false) => NarrowValueMode::ZeroExtend64,
-                (Opcode::Ushr, true) => NarrowValueMode::ZeroExtend32,
-                (Opcode::Sshr, false) => NarrowValueMode::SignExtend64,
-                (Opcode::Sshr, true) => NarrowValueMode::SignExtend32,
+                (Opcode::Ushr, InstSize::Size64) => NarrowValueMode::ZeroExtend64,
+                (Opcode::Ushr, InstSize::Size32) => NarrowValueMode::ZeroExtend32,
+                (Opcode::Sshr, InstSize::Size64) => NarrowValueMode::SignExtend64,
+                (Opcode::Sshr, InstSize::Size32) => NarrowValueMode::SignExtend32,
                 _ => unreachable!(),
             };
             let rd = output_to_reg(ctx, outputs[0]);
@@ -1160,7 +1143,7 @@ fn lower_insn_to_regs<C: LowerCtx<Inst>>(ctx: &mut C, insn: IRInst) {
         }
 
         Opcode::Rotl => {
-            // ARM64 does not have a ROL instruction, so we always synthesize
+            // AArch64 does not have a ROL instruction, so we always synthesize
             // this as:
             //
             //    rotl rd, rn, rm
@@ -1854,26 +1837,17 @@ fn lower_insn_to_regs<C: LowerCtx<Inst>>(ctx: &mut C, insn: IRInst) {
                 Opcode::Call => {
                     let extname = ctx.call_target(insn).unwrap();
                     let extname = extname.clone();
-                    // HACK: get the function address with an Abs8 reloc in the constant pool.
-                    //let tmp = ctx.tmp(RegClass::I64, I64);
-                    //ctx.emit(Inst::LoadExtName {
-                    //rd: tmp,
-                    //name: extname,
-                    //srcloc: loc,
-                    //offset: 0,
-                    //});
                     let sig = ctx.call_sig(insn).unwrap();
                     assert!(inputs.len() == sig.params.len());
                     assert!(outputs.len() == sig.returns.len());
-                    (ARM64ABICall::from_func(sig, &extname, loc), &inputs[..])
-                    //(ARM64ABICall::from_ptr(sig, tmp.to_reg(), loc), &inputs[..])
+                    (AArch64ABICall::from_func(sig, &extname, loc), &inputs[..])
                 }
                 Opcode::CallIndirect => {
                     let ptr = input_to_reg(ctx, inputs[0], NarrowValueMode::ZeroExtend64);
                     let sig = ctx.call_sig(insn).unwrap();
                     assert!(inputs.len() - 1 == sig.params.len());
                     assert!(outputs.len() == sig.returns.len());
-                    (ARM64ABICall::from_ptr(sig, ptr, loc, op), &inputs[1..])
+                    (AArch64ABICall::from_ptr(sig, ptr, loc, op), &inputs[1..])
                 }
                 _ => unreachable!(),
             };
@@ -2357,21 +2331,6 @@ fn choose_32_64<T: Copy>(ty: Type, op32: T, op64: T) -> T {
     }
 }
 
-fn branch_target(data: &InstructionData) -> Option<Block> {
-    match data {
-        &InstructionData::BranchIcmp { destination, .. }
-        | &InstructionData::Branch { destination, .. }
-        | &InstructionData::BranchInt { destination, .. }
-        | &InstructionData::Jump { destination, .. }
-        | &InstructionData::BranchTable { destination, .. }
-        | &InstructionData::BranchFloat { destination, .. } => Some(destination),
-        _ => {
-            assert!(!data.opcode().is_branch());
-            None
-        }
-    }
-}
-
 fn ldst_offset(data: &InstructionData) -> Option<i32> {
     match data {
         &InstructionData::Load { offset, .. }
@@ -2418,7 +2377,11 @@ fn inst_trapcode(data: &InstructionData) -> Option<TrapCode> {
 }
 
 /// Checks for an instance of `op` feeding the given input. Marks as merged (decrementing refcount) if so.
-fn maybe_input_insn<C: LowerCtx<Inst>>(c: &mut C, input: InsnInput, op: Opcode) -> Option<IRInst> {
+fn maybe_input_insn<C: LowerCtx<I = Inst>>(
+    c: &mut C,
+    input: InsnInput,
+    op: Opcode,
+) -> Option<IRInst> {
     if let InsnInputSource::Output(out) = input_source(c, input) {
         let data = c.data(out.insn);
         if data.opcode() == op {
@@ -2434,7 +2397,7 @@ fn maybe_input_insn<C: LowerCtx<Inst>>(c: &mut C, input: InsnInput, op: Opcode)
 ///
 /// FIXME cfallin 2020-03-30: this is really ugly. Factor out tree-matching stuff and make it
 /// a bit more generic.
-fn maybe_input_insn_via_conv<C: LowerCtx<Inst>>(
+fn maybe_input_insn_via_conv<C: LowerCtx<I = Inst>>(
     c: &mut C,
     input: InsnInput,
     op: Opcode,
@@ -2461,7 +2424,7 @@ fn maybe_input_insn_via_conv<C: LowerCtx<Inst>>(
     None
 }
 
-fn lower_icmp_or_ifcmp_to_flags<C: LowerCtx<Inst>>(ctx: &mut C, insn: IRInst, is_signed: bool) {
+fn lower_icmp_or_ifcmp_to_flags<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst, is_signed: bool) {
     let ty = ctx.input_ty(insn, 0);
     let bits = ty_bits(ty);
     let narrow_mode = match (bits <= 32, is_signed) {
@@ -2488,7 +2451,7 @@ fn lower_icmp_or_ifcmp_to_flags<C: LowerCtx<Inst>>(ctx: &mut C, insn: IRInst, is
     ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm));
 }
 
-fn lower_fcmp_or_ffcmp_to_flags<C: LowerCtx<Inst>>(ctx: &mut C, insn: IRInst) {
+fn lower_fcmp_or_ffcmp_to_flags<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst) {
     let ty = ctx.input_ty(insn, 0);
     let bits = ty_bits(ty);
     let inputs = [
@@ -2517,14 +2480,14 @@ fn lower_fcmp_or_ffcmp_to_flags<C: LowerCtx<Inst>>(ctx: &mut C, insn: IRInst) {
 //=============================================================================
 // Lowering-backend trait implementation.
 
-impl LowerBackend for Arm64Backend {
+impl LowerBackend for AArch64Backend {
     type MInst = Inst;
 
-    fn lower<C: LowerCtx<Inst>>(&self, ctx: &mut C, ir_inst: IRInst) {
+    fn lower<C: LowerCtx<I = Inst>>(&self, ctx: &mut C, ir_inst: IRInst) {
         lower_insn_to_regs(ctx, ir_inst);
     }
 
-    fn lower_branch_group<C: LowerCtx<Inst>>(
+    fn lower_branch_group<C: LowerCtx<I = Inst>>(
         &self,
         ctx: &mut C,
         branches: &[IRInst],
diff --git a/cranelift/codegen/src/isa/arm64/mod.rs b/cranelift/codegen/src/isa/aarch64/mod.rs
similarity index 78%
rename from cranelift/codegen/src/isa/arm64/mod.rs
rename to cranelift/codegen/src/isa/aarch64/mod.rs
index fb35439332..2a71085929 100644
--- a/cranelift/codegen/src/isa/arm64/mod.rs
+++ b/cranelift/codegen/src/isa/aarch64/mod.rs
@@ -2,7 +2,6 @@
 
 use crate::ir::Function;
 use crate::isa::Builder as IsaBuilder;
-use crate::isa::TargetIsa;
 use crate::machinst::{
     compile, MachBackend, MachCompileResult, ShowWithRRU, TargetIsaAdapter, VCode,
 };
@@ -10,10 +9,9 @@ use crate::result::CodegenResult;
 use crate::settings;
 
 use alloc::boxed::Box;
-use std::str::FromStr;
 
 use regalloc::RealRegUniverse;
-use target_lexicon::Triple;
+use target_lexicon::{Aarch64Architecture, Architecture, Triple};
 
 // New backend:
 mod abi;
@@ -22,29 +20,30 @@ mod lower;
 
 use inst::create_reg_universe;
 
-/// An ARM64 backend.
-pub struct Arm64Backend {
+/// An AArch64 backend.
+pub struct AArch64Backend {
+    triple: Triple,
     flags: settings::Flags,
 }
 
-impl Arm64Backend {
-    /// Create a new ARM64 backend with the given (shared) flags.
-    pub fn new_with_flags(flags: settings::Flags) -> Arm64Backend {
-        Arm64Backend { flags }
+impl AArch64Backend {
+    /// Create a new AArch64 backend with the given (shared) flags.
+    pub fn new_with_flags(triple: Triple, flags: settings::Flags) -> AArch64Backend {
+        AArch64Backend { triple, flags }
     }
 
-    fn compile_vcode(&self, mut func: Function, flags: &settings::Flags) -> VCode<inst::Inst> {
+    fn compile_vcode(&self, func: &Function, flags: &settings::Flags) -> VCode<inst::Inst> {
         // This performs lowering to VCode, register-allocates the code, computes
         // block layout and finalizes branches. The result is ready for binary emission.
-        let abi = Box::new(abi::ARM64ABIBody::new(&func));
-        compile::compile::<Arm64Backend>(&mut func, self, abi, flags)
+        let abi = Box::new(abi::AArch64ABIBody::new(func));
+        compile::compile::<AArch64Backend>(func, self, abi, flags)
     }
 }
 
-impl MachBackend for Arm64Backend {
+impl MachBackend for AArch64Backend {
     fn compile_function(
         &self,
-        func: Function,
+        func: &Function,
         want_disasm: bool,
     ) -> CodegenResult<MachCompileResult> {
         let flags = self.flags();
@@ -66,11 +65,11 @@ impl MachBackend for Arm64Backend {
     }
 
     fn name(&self) -> &'static str {
-        "arm64"
+        "aarch64"
     }
 
     fn triple(&self) -> Triple {
-        FromStr::from_str("arm64").unwrap()
+        self.triple.clone()
     }
 
     fn flags(&self) -> &settings::Flags {
@@ -84,32 +83,28 @@ impl MachBackend for Arm64Backend {
 
 /// Create a new `isa::Builder`.
 pub fn isa_builder(triple: Triple) -> IsaBuilder {
+    assert!(triple.architecture == Architecture::Aarch64(Aarch64Architecture::Aarch64));
     IsaBuilder {
         triple,
         setup: settings::builder(),
-        constructor: isa_constructor,
+        constructor: |triple, shared_flags, _| {
+            let backend = AArch64Backend::new_with_flags(triple, shared_flags);
+            Box::new(TargetIsaAdapter::new(backend))
+        },
     }
 }
 
-fn isa_constructor(
-    _: Triple,
-    shared_flags: settings::Flags,
-    _arch_flag_builder: settings::Builder,
-) -> Box<dyn TargetIsa> {
-    let backend = Arm64Backend::new_with_flags(shared_flags);
-    Box::new(TargetIsaAdapter::new(backend))
-}
-
 #[cfg(test)]
 mod test {
     use super::*;
-    use crate::binemit::{NullRelocSink, NullStackmapSink, NullTrapSink};
     use crate::cursor::{Cursor, FuncCursor};
     use crate::ir::types::*;
     use crate::ir::{AbiParam, ExternalName, Function, InstBuilder, Signature};
     use crate::isa::CallConv;
     use crate::settings;
     use crate::settings::Configurable;
+    use core::str::FromStr;
+    use target_lexicon::Triple;
 
     #[test]
     fn test_compile_function() {
@@ -130,8 +125,11 @@ mod test {
 
         let mut shared_flags = settings::builder();
         shared_flags.set("opt_level", "none").unwrap();
-        let backend = Arm64Backend::new_with_flags(settings::Flags::new(shared_flags));
-        let sections = backend.compile_function(func, false).unwrap().sections;
+        let backend = AArch64Backend::new_with_flags(
+            Triple::from_str("aarch64").unwrap(),
+            settings::Flags::new(shared_flags),
+        );
+        let sections = backend.compile_function(&mut func, false).unwrap().sections;
         let code = &sections.sections[0].data;
 
         // stp x29, x30, [sp, #-16]!
@@ -182,9 +180,12 @@ mod test {
 
         let mut shared_flags = settings::builder();
         shared_flags.set("opt_level", "none").unwrap();
-        let backend = Arm64Backend::new_with_flags(settings::Flags::new(shared_flags));
+        let backend = AArch64Backend::new_with_flags(
+            Triple::from_str("aarch64").unwrap(),
+            settings::Flags::new(shared_flags),
+        );
         let result = backend
-            .compile_function(func, /* want_disasm = */ false)
+            .compile_function(&mut func, /* want_disasm = */ false)
             .unwrap();
         let code = &result.sections.sections[0].data;
 
diff --git a/cranelift/codegen/src/isa/mod.rs b/cranelift/codegen/src/isa/mod.rs
index a0a2a5de87..c07082836f 100644
--- a/cranelift/codegen/src/isa/mod.rs
+++ b/cranelift/codegen/src/isa/mod.rs
@@ -84,7 +84,7 @@ pub mod fde;
 mod arm32;
 
 #[cfg(feature = "arm64")]
-mod arm64;
+mod aarch64;
 
 mod call_conv;
 mod constraints;
@@ -93,6 +93,9 @@ mod encoding;
 pub mod registers;
 mod stack;
 
+#[cfg(test)]
+mod test_utils;
+
 /// Returns a builder that can create a corresponding `TargetIsa`
 /// or `Err(LookupError::SupportDisabled)` if not enabled.
 macro_rules! isa_builder {
@@ -117,7 +120,7 @@ pub fn lookup(triple: Triple) -> Result<Builder, LookupError> {
             isa_builder!(x86, "x86", triple)
         }
         Architecture::Arm { .. } => isa_builder!(arm32, "arm32", triple),
-        Architecture::Aarch64 { .. } => isa_builder!(arm64, "arm64", triple),
+        Architecture::Aarch64 { .. } => isa_builder!(aarch64, "arm64", triple),
         _ => Err(LookupError::Unsupported),
     }
 }
diff --git a/cranelift/codegen/src/isa/test_utils.rs b/cranelift/codegen/src/isa/test_utils.rs
index 826fabf949..c7802b052a 100644
--- a/cranelift/codegen/src/isa/test_utils.rs
+++ b/cranelift/codegen/src/isa/test_utils.rs
@@ -1,10 +1,13 @@
+// This is unused when no platforms with the new backend are enabled.
+#![allow(dead_code)]
+
 use crate::binemit::{Addend, CodeOffset, CodeSink, Reloc};
 use crate::ir::Value;
 use crate::ir::{ConstantOffset, ExternalName, Function, JumpTable, Opcode, SourceLoc, TrapCode};
 use crate::isa::TargetIsa;
 
 use alloc::vec::Vec;
-use std::string::{String, ToString};
+use std::string::String;
 
 pub struct TestCodeSink {
     bytes: Vec<u8>,
@@ -16,11 +19,13 @@ impl TestCodeSink {
         TestCodeSink { bytes: vec![] }
     }
 
-    /// This is pretty lame, but whatever ..
+    /// Return the code emitted to this sink as a hex string.
     pub fn stringify(&self) -> String {
-        let mut s = "".to_string();
+        // This is pretty lame, but whatever ..
+        use std::fmt::Write;
+        let mut s = String::with_capacity(self.bytes.len() * 2);
         for b in &self.bytes {
-            s = s + &format!("{:02X}", b).to_string();
+            write!(&mut s, "{:02X}", b).unwrap();
         }
         s
     }
diff --git a/cranelift/codegen/src/lib.rs b/cranelift/codegen/src/lib.rs
index 2d6651a67e..d87bbf26b8 100644
--- a/cranelift/codegen/src/lib.rs
+++ b/cranelift/codegen/src/lib.rs
@@ -87,6 +87,7 @@ mod context;
 mod dce;
 mod divconst_magic_numbers;
 mod fx;
+mod inst_predicates;
 mod iterators;
 mod legalizer;
 mod licm;
diff --git a/cranelift/codegen/src/machinst/abi.rs b/cranelift/codegen/src/machinst/abi.rs
index 7aaa66fe14..11a96c58b2 100644
--- a/cranelift/codegen/src/machinst/abi.rs
+++ b/cranelift/codegen/src/machinst/abi.rs
@@ -1,15 +1,17 @@
 //! ABI definitions.
 
-use crate::ir;
 use crate::ir::StackSlot;
 use crate::machinst::*;
 use crate::settings;
 
-use regalloc::{Reg, Set, SpillSlot, VirtualReg, Writable};
+use regalloc::{Reg, Set, SpillSlot, Writable};
 
 /// Trait implemented by an object that tracks ABI-related state (e.g., stack
 /// layout) and can generate code while emitting the *body* of a function.
-pub trait ABIBody<I: VCodeInst> {
+pub trait ABIBody {
+    /// The instruction type for the ISA associated with this ABI.
+    type I: VCodeInst;
+
     /// Get the liveins of the function.
     fn liveins(&self) -> Set<RealReg>;
 
@@ -27,17 +29,19 @@ pub trait ABIBody<I: VCodeInst> {
 
     /// Generate an instruction which copies an argument to a destination
     /// register.
-    fn gen_copy_arg_to_reg(&self, idx: usize, into_reg: Writable<Reg>) -> I;
+    fn gen_copy_arg_to_reg(&self, idx: usize, into_reg: Writable<Reg>) -> Self::I;
 
     /// Generate an instruction which copies a source register to a return
     /// value slot.
-    fn gen_copy_reg_to_retval(&self, idx: usize, from_reg: Reg) -> I;
+    fn gen_copy_reg_to_retval(&self, idx: usize, from_reg: Reg) -> Self::I;
 
     /// Generate a return instruction.
-    fn gen_ret(&self) -> I;
+    fn gen_ret(&self) -> Self::I;
 
-    /// Generate an epilogue placeholder.
-    fn gen_epilogue_placeholder(&self) -> I;
+    /// Generate an epilogue placeholder. The returned instruction should return `true` from
+    /// `is_epilogue_placeholder()`; this is used to indicate to the lowering driver when
+    /// the epilogue should be inserted.
+    fn gen_epilogue_placeholder(&self) -> Self::I;
 
     // -----------------------------------------------------------------
     // Every function above this line may only be called pre-regalloc.
@@ -56,32 +60,32 @@ pub trait ABIBody<I: VCodeInst> {
     fn load_stackslot(
         &self,
         slot: StackSlot,
-        offset: usize,
+        offset: u32,
         ty: Type,
         into_reg: Writable<Reg>,
-    ) -> I;
+    ) -> Self::I;
 
     /// Store to a stackslot.
-    fn store_stackslot(&self, slot: StackSlot, offset: usize, ty: Type, from_reg: Reg) -> I;
+    fn store_stackslot(&self, slot: StackSlot, offset: u32, ty: Type, from_reg: Reg) -> Self::I;
 
     /// Load from a spillslot.
-    fn load_spillslot(&self, slot: SpillSlot, ty: Type, into_reg: Writable<Reg>) -> I;
+    fn load_spillslot(&self, slot: SpillSlot, ty: Type, into_reg: Writable<Reg>) -> Self::I;
 
     /// Store to a spillslot.
-    fn store_spillslot(&self, slot: SpillSlot, ty: Type, from_reg: Reg) -> I;
+    fn store_spillslot(&self, slot: SpillSlot, ty: Type, from_reg: Reg) -> Self::I;
 
     /// Generate a prologue, post-regalloc. This should include any stack
     /// frame or other setup necessary to use the other methods (`load_arg`,
-    /// `store_retval`, and spillslot accesses.)  |self| is mutable so that we
+    /// `store_retval`, and spillslot accesses.)  `self` is mutable so that we
     /// can store information in it which will be useful when creating the
     /// epilogue.
-    fn gen_prologue(&mut self, flags: &settings::Flags) -> Vec<I>;
+    fn gen_prologue(&mut self, flags: &settings::Flags) -> Vec<Self::I>;
 
     /// Generate an epilogue, post-regalloc. Note that this must generate the
     /// actual return instruction (rather than emitting this in the lowering
     /// logic), because the epilogue code comes before the return and the two are
     /// likely closely related.
-    fn gen_epilogue(&self, flags: &settings::Flags) -> Vec<I>;
+    fn gen_epilogue(&self, flags: &settings::Flags) -> Vec<Self::I>;
 
     /// Returns the full frame size for the given function, after prologue emission has run. This
     /// comprises the spill space, incoming argument space, alignment padding, etc.
@@ -91,10 +95,10 @@ pub trait ABIBody<I: VCodeInst> {
     fn get_spillslot_size(&self, rc: RegClass, ty: Type) -> u32;
 
     /// Generate a spill.
-    fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, ty: Type) -> I;
+    fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, ty: Type) -> Self::I;
 
     /// Generate a reload (fill).
-    fn gen_reload(&self, to_reg: Writable<RealReg>, from_slot: SpillSlot, ty: Type) -> I;
+    fn gen_reload(&self, to_reg: Writable<RealReg>, from_slot: SpillSlot, ty: Type) -> Self::I;
 }
 
 /// Trait implemented by an object that tracks ABI-related state and can
@@ -111,22 +115,25 @@ pub trait ABIBody<I: VCodeInst> {
 /// and retval copies, and attach the register use/def info to the call.
 ///
 /// This trait is thus provided for convenience to the backends.
-pub trait ABICall<I: VCodeInst> {
+pub trait ABICall {
+    /// The instruction type for the ISA associated with this ABI.
+    type I: VCodeInst;
+
     /// Get the number of arguments expected.
     fn num_args(&self) -> usize;
 
     /// Save the clobbered registers.
     /// Copy an argument value from a source register, prior to the call.
-    fn gen_copy_reg_to_arg(&self, idx: usize, from_reg: Reg) -> I;
+    fn gen_copy_reg_to_arg(&self, idx: usize, from_reg: Reg) -> Self::I;
 
     /// Copy a return value into a destination register, after the call returns.
-    fn gen_copy_retval_to_reg(&self, idx: usize, into_reg: Writable<Reg>) -> I;
+    fn gen_copy_retval_to_reg(&self, idx: usize, into_reg: Writable<Reg>) -> Self::I;
 
     /// Pre-adjust the stack, prior to argument copies and call.
-    fn gen_stack_pre_adjust(&self) -> Vec<I>;
+    fn gen_stack_pre_adjust(&self) -> Vec<Self::I>;
 
     /// Post-adjust the satck, after call return and return-value copies.
-    fn gen_stack_post_adjust(&self) -> Vec<I>;
+    fn gen_stack_post_adjust(&self) -> Vec<Self::I>;
 
     /// Generate the call itself.
     ///
@@ -138,5 +145,5 @@ pub trait ABICall<I: VCodeInst> {
     /// registers are also logically defs, but should never be read; their
     /// values are "defined" (to the regalloc) but "undefined" in every other
     /// sense.)
-    fn gen_call(&self) -> Vec<I>;
+    fn gen_call(&self) -> Vec<Self::I>;
 }
diff --git a/cranelift/codegen/src/machinst/adapter.rs b/cranelift/codegen/src/machinst/adapter.rs
index 3f7c5b7b57..c9cf41f359 100644
--- a/cranelift/codegen/src/machinst/adapter.rs
+++ b/cranelift/codegen/src/machinst/adapter.rs
@@ -4,9 +4,12 @@ use crate::binemit;
 use crate::ir;
 use crate::isa::{EncInfo, Encoding, Encodings, Legalize, RegClass, RegInfo, TargetIsa};
 use crate::machinst::*;
-use crate::regalloc::{RegDiversions, RegisterSet};
+use crate::regalloc::RegisterSet;
 use crate::settings::Flags;
 
+#[cfg(feature = "testing_hooks")]
+use crate::regalloc::RegDiversions;
+
 use std::borrow::Cow;
 use std::fmt;
 use target_lexicon::Triple;
@@ -30,7 +33,11 @@ impl TargetIsaAdapter {
 
 impl fmt::Display for TargetIsaAdapter {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "MachBackend")
+        f.debug_struct("MachBackend")
+            .field("name", &self.backend.name())
+            .field("triple", &self.backend.triple())
+            .field("flags", &format!("{}", self.backend.flags()))
+            .finish()
     }
 }
 
diff --git a/cranelift/codegen/src/machinst/blockorder.rs b/cranelift/codegen/src/machinst/blockorder.rs
index bfd4bf665a..847f2a6b66 100644
--- a/cranelift/codegen/src/machinst/blockorder.rs
+++ b/cranelift/codegen/src/machinst/blockorder.rs
@@ -1,6 +1,7 @@
 //! Computation of basic block order in emitted code.
 
 use crate::machinst::*;
+use regalloc::{BlockIx, Function};
 
 /// Simple reverse postorder-based block order emission.
 ///
@@ -29,9 +30,8 @@ impl BlockRPO {
             }
         }
 
-        let (start, end) = &vcode.block_ranges[block as usize];
-        for i in *start..*end {
-            if vcode.insts[i as usize].is_epilogue_placeholder() {
+        for i in vcode.block_insns(BlockIx::new(block)) {
+            if vcode.get_insn(i).is_epilogue_placeholder() {
                 debug_assert!(self.deferred_last.is_none());
                 self.deferred_last = Some(block);
                 return;
diff --git a/cranelift/codegen/src/machinst/compile.rs b/cranelift/codegen/src/machinst/compile.rs
index 458db9ea36..eda3955f88 100644
--- a/cranelift/codegen/src/machinst/compile.rs
+++ b/cranelift/codegen/src/machinst/compile.rs
@@ -7,14 +7,13 @@ use crate::timing;
 
 use log::debug;
 use regalloc::{allocate_registers, RegAllocAlgorithm};
-use std::env;
 
 /// Compile the given function down to VCode with allocated registers, ready
 /// for binary emission.
 pub fn compile<B: LowerBackend>(
-    f: &mut Function,
+    f: &Function,
     b: &B,
-    abi: Box<dyn ABIBody<B::MInst>>,
+    abi: Box<dyn ABIBody<I = B::MInst>>,
     flags: &settings::Flags,
 ) -> VCode<B::MInst>
 where
@@ -28,18 +27,8 @@ where
     debug!("vcode from lowering: \n{}", vcode.show_rru(Some(universe)));
 
     // Perform register allocation.
-    let algorithm = match env::var("REGALLOC") {
-        Ok(str) => match str.as_str() {
-            "lsrac" => RegAllocAlgorithm::LinearScanChecked,
-            "lsra" => RegAllocAlgorithm::LinearScan,
-            // to wit: btc doesn't mean "bitcoin" here
-            "btc" => RegAllocAlgorithm::BacktrackingChecked,
-            _ => RegAllocAlgorithm::Backtracking,
-        },
-        // By default use backtracking, which is the fastest.
-        Err(_) => RegAllocAlgorithm::Backtracking,
-    };
-
+    // TODO: select register allocation algorithm from flags.
+    let algorithm = RegAllocAlgorithm::Backtracking;
     let result = {
         let _tt = timing::regalloc();
         allocate_registers(
@@ -70,7 +59,5 @@ where
         vcode.show_rru(Some(universe))
     );
 
-    //println!("{}\n", vcode.show_rru(Some(&B::MInst::reg_universe())));
-
     vcode
 }
diff --git a/cranelift/codegen/src/machinst/lower.rs b/cranelift/codegen/src/machinst/lower.rs
index 2165416ebc..0d8fb1ff0e 100644
--- a/cranelift/codegen/src/machinst/lower.rs
+++ b/cranelift/codegen/src/machinst/lower.rs
@@ -2,39 +2,37 @@
 //! to machine instructions with virtual registers. This is *almost* the final
 //! machine code, except for register allocation.
 
-use crate::binemit::CodeSink;
-use crate::dce::has_side_effect;
 use crate::entity::SecondaryMap;
+use crate::inst_predicates::has_side_effect;
+use crate::ir::instructions::BranchInfo;
 use crate::ir::{
     Block, ExternalName, Function, GlobalValueData, Inst, InstructionData, MemFlags, Opcode,
     Signature, SourceLoc, Type, Value, ValueDef,
 };
-use crate::isa::registers::RegUnit;
-use crate::machinst::{
-    ABIBody, BlockIndex, MachInst, MachInstEmit, VCode, VCodeBuilder, VCodeInst,
-};
+use crate::machinst::{ABIBody, BlockIndex, VCode, VCodeBuilder, VCodeInst};
 use crate::num_uses::NumUses;
 
-use regalloc::Function as RegallocFunction;
-use regalloc::{RealReg, Reg, RegClass, Set, VirtualReg, Writable};
+use regalloc::{Reg, RegClass, Set, VirtualReg, Writable};
 
 use alloc::boxed::Box;
 use alloc::vec::Vec;
 use log::debug;
 use smallvec::SmallVec;
 use std::collections::VecDeque;
-use std::ops::Range;
 
 /// A context that machine-specific lowering code can use to emit lowered instructions. This is the
 /// view of the machine-independent per-function lowering context that is seen by the machine
 /// backend.
-pub trait LowerCtx<I> {
+pub trait LowerCtx {
+    /// The instruction type for which this lowering framework is instantiated.
+    type I;
+
     /// Get the instdata for a given IR instruction.
     fn data(&self, ir_inst: Inst) -> &InstructionData;
     /// Get the controlling type for a polymorphic IR instruction.
     fn ty(&self, ir_inst: Inst) -> Type;
     /// Emit a machine instruction.
-    fn emit(&mut self, mach_inst: I);
+    fn emit(&mut self, mach_inst: Self::I);
     /// Indicate that an IR instruction has been merged, and so one of its
     /// uses is gone (replaced by uses of the instruction's inputs). This
     /// helps the lowering algorithm to perform on-the-fly DCE, skipping over
@@ -87,11 +85,11 @@ pub trait LowerBackend {
     /// Lower a single instruction. Instructions are lowered in reverse order.
     /// This function need not handle branches; those are always passed to
     /// `lower_branch_group` below.
-    fn lower<C: LowerCtx<Self::MInst>>(&self, ctx: &mut C, inst: Inst);
+    fn lower<C: LowerCtx<I = Self::MInst>>(&self, ctx: &mut C, inst: Inst);
 
     /// Lower a block-terminating group of branches (which together can be seen as one
     /// N-way branch), given a vcode BlockIndex for each target.
-    fn lower_branch_group<C: LowerCtx<Self::MInst>>(
+    fn lower_branch_group<C: LowerCtx<I = Self::MInst>>(
         &self,
         ctx: &mut C,
         insts: &[Inst],
@@ -103,22 +101,22 @@ pub trait LowerBackend {
 /// Machine-independent lowering driver / machine-instruction container. Maintains a correspondence
 /// from original Inst to MachInsts.
 pub struct Lower<'a, I: VCodeInst> {
-    // The function to lower.
+    /// The function to lower.
     f: &'a Function,
 
-    // Lowered machine instructions.
+    /// Lowered machine instructions.
     vcode: VCodeBuilder<I>,
 
-    // Number of active uses (minus `dec_use()` calls by backend) of each instruction.
+    /// Number of active uses (minus `dec_use()` calls by backend) of each instruction.
     num_uses: SecondaryMap<Inst, u32>,
 
-    // Mapping from `Value` (SSA value in IR) to virtual register.
+    /// Mapping from `Value` (SSA value in IR) to virtual register.
     value_regs: SecondaryMap<Value, Reg>,
 
-    // Return-value vregs.
+    /// Return-value vregs.
     retval_regs: Vec<Reg>,
 
-    // Next virtual register number to allocate.
+    /// Next virtual register number to allocate.
     next_vreg: u32,
 }
 
@@ -144,7 +142,7 @@ enum GenerateReturn {
 
 impl<'a, I: VCodeInst> Lower<'a, I> {
     /// Prepare a new lowering context for the given IR function.
-    pub fn new(f: &'a Function, abi: Box<dyn ABIBody<I>>) -> Lower<'a, I> {
+    pub fn new(f: &'a Function, abi: Box<dyn ABIBody<I = I>>) -> Lower<'a, I> {
         let mut vcode = VCodeBuilder::new(abi);
 
         let num_uses = NumUses::compute(f).take_uses();
@@ -244,7 +242,9 @@ impl<'a, I: VCodeInst> Lower<'a, I> {
                 let mut succs: SmallVec<[Block; 16]> = SmallVec::new();
                 for inst in self.f.layout.block_insts(b) {
                     if self.f.dfg[inst].opcode().is_branch() {
-                        succs.extend(branch_targets(self.f, b, inst).into_iter());
+                        visit_branch_targets(self.f, b, inst, |succ| {
+                            succs.push(succ);
+                        });
                     }
                 }
                 for succ in succs.into_iter() {
@@ -264,17 +264,14 @@ impl<'a, I: VCodeInst> Lower<'a, I> {
     /// Lower the function.
     pub fn lower<B: LowerBackend<MInst = I>>(mut self, backend: &B) -> VCode<I> {
         // Find all reachable blocks.
-        let mut bbs = self.find_reachable_bbs();
-        // Work backward (reverse block order, reverse through each block), skipping insns with zero
-        // uses.
-        bbs.reverse();
+        let bbs = self.find_reachable_bbs();
 
         // This records a Block-to-BlockIndex map so that branch targets can be resolved.
         let mut next_bindex = self.vcode.init_bb_map(&bbs[..]);
 
         // Allocate a separate BlockIndex for each control-flow instruction so that we can create
         // the edge blocks later. Each entry for a control-flow inst is the edge block; the list
-        // has (cf-inst, edge block, orig block) tuples.
+        // has (control flow inst, edge block, orig block) tuples.
         let mut edge_blocks_by_inst: SecondaryMap<Inst, Vec<BlockIndex>> =
             SecondaryMap::with_default(vec![]);
         let mut edge_blocks: Vec<(Inst, BlockIndex, Block)> = vec![];
@@ -282,7 +279,9 @@ impl<'a, I: VCodeInst> Lower<'a, I> {
         debug!("about to lower function: {:?}", self.f);
         debug!("bb map: {:?}", self.vcode.blocks_by_bb());
 
-        for bb in bbs.iter() {
+        // Work backward (reverse block order, reverse through each block), skipping insns with zero
+        // uses.
+        for bb in bbs.iter().rev() {
             for inst in self.f.layout.block_insts(*bb) {
                 let op = self.f.dfg[inst].opcode();
                 if op.is_branch() {
@@ -293,9 +292,9 @@ impl<'a, I: VCodeInst> Lower<'a, I> {
                         edge_blocks_by_inst[inst].push(edge_block);
                         edge_blocks.push((inst, edge_block, next_bb));
                     };
-                    for succ in branch_targets(self.f, *bb, inst).into_iter() {
+                    visit_branch_targets(self.f, *bb, inst, |succ| {
                         add_succ(succ);
-                    }
+                    });
                 }
             }
         }
@@ -303,7 +302,9 @@ impl<'a, I: VCodeInst> Lower<'a, I> {
         for bb in bbs.iter() {
             debug!("lowering bb: {}", bb);
 
-            // If this is a return block, produce the return value setup.
+            // If this is a return block, produce the return value setup.  N.B.: this comes
+            // *before* the below because it must occur *after* any other instructions, and
+            // instructions are lowered in reverse order.
             let last_insn = self.f.layout.block_insts(*bb).last().unwrap();
             let last_insn_opcode = self.f.dfg[last_insn].opcode();
             if last_insn_opcode.is_return() {
@@ -513,7 +514,9 @@ impl<'a, I: VCodeInst> Lower<'a, I> {
     }
 }
 
-impl<'a, I: VCodeInst> LowerCtx<I> for Lower<'a, I> {
+impl<'a, I: VCodeInst> LowerCtx for Lower<'a, I> {
+    type I = I;
+
     /// Get the instdata for a given IR instruction.
     fn data(&self, ir_inst: Inst) -> &InstructionData {
         &self.f.dfg[ir_inst]
@@ -695,29 +698,23 @@ impl<'a, I: VCodeInst> LowerCtx<I> for Lower<'a, I> {
     }
 }
 
-fn branch_targets(f: &Function, block: Block, inst: Inst) -> SmallVec<[Block; 16]> {
-    let mut ret = SmallVec::new();
+fn visit_branch_targets<F: FnMut(Block)>(f: &Function, block: Block, inst: Inst, mut visit: F) {
     if f.dfg[inst].opcode() == Opcode::Fallthrough {
-        ret.push(f.layout.next_block(block).unwrap());
+        visit(f.layout.next_block(block).unwrap());
     } else {
-        match &f.dfg[inst] {
-            &InstructionData::Jump { destination, .. }
-            | &InstructionData::Branch { destination, .. }
-            | &InstructionData::BranchInt { destination, .. }
-            | &InstructionData::BranchIcmp { destination, .. }
-            | &InstructionData::BranchFloat { destination, .. } => {
-                ret.push(destination);
+        match f.dfg[inst].analyze_branch(&f.dfg.value_lists) {
+            BranchInfo::NotABranch => {}
+            BranchInfo::SingleDest(dest, _) => {
+                visit(dest);
             }
-            &InstructionData::BranchTable {
-                destination, table, ..
-            } => {
-                ret.push(destination);
-                for dest in f.jump_tables[table].as_slice() {
-                    ret.push(*dest);
+            BranchInfo::Table(table, maybe_dest) => {
+                if let Some(dest) = maybe_dest {
+                    visit(dest);
+                }
+                for &dest in f.jump_tables[table].as_slice() {
+                    visit(dest);
                 }
             }
-            _ => {}
         }
     }
-    ret
 }
diff --git a/cranelift/codegen/src/machinst/mod.rs b/cranelift/codegen/src/machinst/mod.rs
index 93c9126b32..844d0d1a4f 100644
--- a/cranelift/codegen/src/machinst/mod.rs
+++ b/cranelift/codegen/src/machinst/mod.rs
@@ -17,105 +17,97 @@
 //! (N.B.: though we show the VCode separately at each stage, the passes
 //! mutate the VCode in place; these are not separate copies of the code.)
 //!
-//! |    ir::Function                (SSA IR, machine-independent opcodes)
-//! |        |
-//! |        |  [lower]
-//! |        |
-//! |    VCode<arch_backend::Inst>   (machine instructions:
-//! |        |                        - mostly virtual registers.
-//! |        |                        - cond branches in two-target form.
-//! |        |                        - branch targets are block indices.
-//! |        |                        - in-memory constants held by insns,
-//! |        |                          with unknown offsets.
-//! |        |                        - critical edges (actually all edges)
-//! |        |                          are split.)
-//! |        | [regalloc]
-//! |        |
-//! |    VCode<arch_backend::Inst>   (machine instructions:
-//! |        |                        - all real registers.
-//! |        |                        - new instruction sequence returned
-//! |        |                          out-of-band in RegAllocResult.
-//! |        |                        - instruction sequence has spills,
-//! |        |                          reloads, and moves inserted.
-//! |        |                        - other invariants same as above.)
-//! |        |
-//! |        | [preamble/postamble]
-//! |        |
-//! |    VCode<arch_backend::Inst>   (machine instructions:
-//! |        |                        - stack-frame size known.
-//! |        |                        - out-of-band instruction sequence
-//! |        |                          has preamble prepended to entry
-//! |        |                          block, and postamble injected before
-//! |        |                          every return instruction.
-//! |        |                        - all symbolic stack references to
-//! |        |                          stackslots and spillslots are resolved
-//! |        |                          to concrete FP-offset mem addresses.)
-//! |        | [block/insn ordering]
-//! |        |
-//! |    VCode<arch_backend::Inst>   (machine instructions:
-//! |        |                        - vcode.final_block_order is filled in.
-//! |        |                        - new insn sequence from regalloc is
-//! |        |                          placed back into vcode and block
-//! |        |                          boundaries are updated.)
-//! |        | [redundant branch/block
-//! |        |  removal]
-//! |        |
-//! |    VCode<arch_backend::Inst>   (machine instructions:
-//! |        |                        - all blocks that were just an
-//! |        |                          unconditional branch are removed.)
-//! |        |
-//! |        | [branch finalization
-//! |        |  (fallthroughs)]
-//! |        |
-//! |    VCode<arch_backend::Inst>   (machine instructions:
-//! |        |                        - all branches are in lowered one-
-//! |        |                          target form, but targets are still
-//! |        |                          block indices.)
-//! |        |
-//! |        | [branch finalization
-//! |        |  (offsets)]
-//! |        |
-//! |    VCode<arch_backend::Inst>   (machine instructions:
-//! |        |                        - all branch offsets from start of
-//! |        |                          function are known, and all branches
-//! |        |                          have resolved-offset targets.)
-//! |        |
-//! |        | [MemArg finalization]
-//! |        |
-//! |    VCode<arch_backend::Inst>   (machine instructions:
-//! |        |                        - all MemArg references to the constant
-//! |        |                          pool are replaced with offsets.
-//! |        |                        - all constant-pool data is collected
-//! |        |                          in the VCode.)
-//! |        |
-//! |        | [binary emission]
-//! |        |
-//! |    Vec<u8>                     (machine code!)
-//! |
+//! ```plain
+//!
+//!     ir::Function                (SSA IR, machine-independent opcodes)
+//!         |
+//!         |  [lower]
+//!         |
+//!     VCode<arch_backend::Inst>   (machine instructions:
+//!         |                        - mostly virtual registers.
+//!         |                        - cond branches in two-target form.
+//!         |                        - branch targets are block indices.
+//!         |                        - in-memory constants held by insns,
+//!         |                          with unknown offsets.
+//!         |                        - critical edges (actually all edges)
+//!         |                          are split.)
+//!         | [regalloc]
+//!         |
+//!     VCode<arch_backend::Inst>   (machine instructions:
+//!         |                        - all real registers.
+//!         |                        - new instruction sequence returned
+//!         |                          out-of-band in RegAllocResult.
+//!         |                        - instruction sequence has spills,
+//!         |                          reloads, and moves inserted.
+//!         |                        - other invariants same as above.)
+//!         |
+//!         | [preamble/postamble]
+//!         |
+//!     VCode<arch_backend::Inst>   (machine instructions:
+//!         |                        - stack-frame size known.
+//!         |                        - out-of-band instruction sequence
+//!         |                          has preamble prepended to entry
+//!         |                          block, and postamble injected before
+//!         |                          every return instruction.
+//!         |                        - all symbolic stack references to
+//!         |                          stackslots and spillslots are resolved
+//!         |                          to concrete FP-offset mem addresses.)
+//!         | [block/insn ordering]
+//!         |
+//!     VCode<arch_backend::Inst>   (machine instructions:
+//!         |                        - vcode.final_block_order is filled in.
+//!         |                        - new insn sequence from regalloc is
+//!         |                          placed back into vcode and block
+//!         |                          boundaries are updated.)
+//!         | [redundant branch/block
+//!         |  removal]
+//!         |
+//!     VCode<arch_backend::Inst>   (machine instructions:
+//!         |                        - all blocks that were just an
+//!         |                          unconditional branch are removed.)
+//!         |
+//!         | [branch finalization
+//!         |  (fallthroughs)]
+//!         |
+//!     VCode<arch_backend::Inst>   (machine instructions:
+//!         |                        - all branches are in lowered one-
+//!         |                          target form, but targets are still
+//!         |                          block indices.)
+//!         |
+//!         | [branch finalization
+//!         |  (offsets)]
+//!         |
+//!     VCode<arch_backend::Inst>   (machine instructions:
+//!         |                        - all branch offsets from start of
+//!         |                          function are known, and all branches
+//!         |                          have resolved-offset targets.)
+//!         |
+//!         | [MemArg finalization]
+//!         |
+//!     VCode<arch_backend::Inst>   (machine instructions:
+//!         |                        - all MemArg references to the constant
+//!         |                          pool are replaced with offsets.
+//!         |                        - all constant-pool data is collected
+//!         |                          in the VCode.)
+//!         |
+//!         | [binary emission]
+//!         |
+//!     Vec<u8>                     (machine code!)
+//!
+//! ```
 
-#![allow(unused_imports)]
-
-use crate::binemit::{
-    CodeInfo, CodeOffset, CodeSink, MemoryCodeSink, RelocSink, StackmapSink, TrapSink,
-};
-use crate::entity::EntityRef;
+use crate::binemit::{CodeInfo, CodeOffset};
 use crate::entity::SecondaryMap;
 use crate::ir::condcodes::IntCC;
-use crate::ir::ValueLocations;
-use crate::ir::{DataFlowGraph, Function, Inst, Opcode, Type, Value};
-use crate::isa::RegUnit;
+use crate::ir::{Function, Type};
 use crate::result::CodegenResult;
 use crate::settings::Flags;
-use crate::HashMap;
 use alloc::boxed::Box;
 use alloc::vec::Vec;
 use core::fmt::Debug;
-use core::iter::Sum;
 use regalloc::Map as RegallocMap;
 use regalloc::RegUsageCollector;
 use regalloc::{RealReg, RealRegUniverse, Reg, RegClass, SpillSlot, VirtualReg, Writable};
-use smallvec::SmallVec;
-use std::hash::Hash;
 use std::string::String;
 use target_lexicon::Triple;
 
@@ -129,8 +121,8 @@ pub mod blockorder;
 pub use blockorder::*;
 pub mod abi;
 pub use abi::*;
-pub mod pp;
-pub use pp::*;
+pub mod pretty_print;
+pub use pretty_print::*;
 pub mod sections;
 pub use sections::*;
 pub mod adapter;
@@ -255,10 +247,10 @@ impl MachCompileResult {
 /// Top-level machine backend trait, which wraps all monomorphized code and
 /// allows a virtual call from the machine-independent `Function::compile()`.
 pub trait MachBackend {
-    /// Compile the given function. Consumes the function.
+    /// Compile the given function.
     fn compile_function(
         &self,
-        func: Function,
+        func: &Function,
         want_disasm: bool,
     ) -> CodegenResult<MachCompileResult>;
 
diff --git a/cranelift/codegen/src/machinst/pp.rs b/cranelift/codegen/src/machinst/pretty_print.rs
similarity index 100%
rename from cranelift/codegen/src/machinst/pp.rs
rename to cranelift/codegen/src/machinst/pretty_print.rs
diff --git a/cranelift/codegen/src/machinst/sections.rs b/cranelift/codegen/src/machinst/sections.rs
index 3e387239d0..247adf5cef 100644
--- a/cranelift/codegen/src/machinst/sections.rs
+++ b/cranelift/codegen/src/machinst/sections.rs
@@ -3,7 +3,7 @@
 //! simultaneously, so we buffer the result in memory and hand off to the
 //! caller at the end of compilation.
 
-use crate::binemit::{Addend, CodeOffset, CodeSink, Reloc, RelocSink, StackmapSink, TrapSink};
+use crate::binemit::{Addend, CodeOffset, CodeSink, Reloc};
 use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode};
 
 use alloc::vec::Vec;
@@ -104,28 +104,31 @@ pub trait MachSectionOutput {
 
     /// Add 2 bytes to the section.
     fn put2(&mut self, value: u16) {
-        self.put1((value & 0xff) as u8);
-        self.put1(((value >> 8) & 0xff) as u8);
+        let [b0, b1] = value.to_le_bytes();
+        self.put1(b0);
+        self.put1(b1);
     }
 
     /// Add 4 bytes to the section.
     fn put4(&mut self, value: u32) {
-        self.put1((value & 0xff) as u8);
-        self.put1(((value >> 8) & 0xff) as u8);
-        self.put1(((value >> 16) & 0xff) as u8);
-        self.put1(((value >> 24) & 0xff) as u8);
+        let [b0, b1, b2, b3] = value.to_le_bytes();
+        self.put1(b0);
+        self.put1(b1);
+        self.put1(b2);
+        self.put1(b3);
     }
 
     /// Add 8 bytes to the section.
     fn put8(&mut self, value: u64) {
-        self.put1((value & 0xff) as u8);
-        self.put1(((value >> 8) & 0xff) as u8);
-        self.put1(((value >> 16) & 0xff) as u8);
-        self.put1(((value >> 24) & 0xff) as u8);
-        self.put1(((value >> 32) & 0xff) as u8);
-        self.put1(((value >> 40) & 0xff) as u8);
-        self.put1(((value >> 48) & 0xff) as u8);
-        self.put1(((value >> 56) & 0xff) as u8);
+        let [b0, b1, b2, b3, b4, b5, b6, b7] = value.to_le_bytes();
+        self.put1(b0);
+        self.put1(b1);
+        self.put1(b2);
+        self.put1(b3);
+        self.put1(b4);
+        self.put1(b5);
+        self.put1(b6);
+        self.put1(b7);
     }
 
     /// Add a slice of bytes to the section.
diff --git a/cranelift/codegen/src/machinst/vcode.rs b/cranelift/codegen/src/machinst/vcode.rs
index 64b1a4012a..6e3adea53a 100644
--- a/cranelift/codegen/src/machinst/vcode.rs
+++ b/cranelift/codegen/src/machinst/vcode.rs
@@ -17,7 +17,6 @@
 //! See the main module comment in `mod.rs` for more details on the VCode-based
 //! backend pipeline.
 
-use crate::binemit::Reloc;
 use crate::ir;
 use crate::machinst::*;
 use crate::settings;
@@ -32,7 +31,6 @@ use log::debug;
 use smallvec::SmallVec;
 use std::fmt;
 use std::iter;
-use std::ops::Index;
 use std::string::String;
 
 /// Index referring to an instruction in VCode.
@@ -59,13 +57,13 @@ pub struct VCode<I: VCodeInst> {
     vreg_types: Vec<Type>,
 
     /// Lowered machine instructions in order corresponding to the original IR.
-    pub insts: Vec<I>,
+    insts: Vec<I>,
 
     /// Entry block.
     entry: BlockIndex,
 
     /// Block instruction indices.
-    pub block_ranges: Vec<(InsnIndex, InsnIndex)>,
+    block_ranges: Vec<(InsnIndex, InsnIndex)>,
 
     /// Block successors: index range in the successor-list below.
     block_succ_range: Vec<(usize, usize)>,
@@ -94,7 +92,7 @@ pub struct VCode<I: VCodeInst> {
     code_size: CodeOffset,
 
     /// ABI object.
-    abi: Box<dyn ABIBody<I>>,
+    abi: Box<dyn ABIBody<I = I>>,
 }
 
 /// A builder for a VCode function body. This builder is designed for the
@@ -128,7 +126,7 @@ pub struct VCodeBuilder<I: VCodeInst> {
 
 impl<I: VCodeInst> VCodeBuilder<I> {
     /// Create a new VCodeBuilder.
-    pub fn new(abi: Box<dyn ABIBody<I>>) -> VCodeBuilder<I> {
+    pub fn new(abi: Box<dyn ABIBody<I = I>>) -> VCodeBuilder<I> {
         let vcode = VCode::new(abi);
         VCodeBuilder {
             vcode,
@@ -139,7 +137,7 @@ impl<I: VCodeInst> VCodeBuilder<I> {
     }
 
     /// Access the ABI object.
-    pub fn abi(&mut self) -> &mut dyn ABIBody<I> {
+    pub fn abi(&mut self) -> &mut dyn ABIBody<I = I> {
         &mut *self.vcode.abi
     }
 
@@ -282,7 +280,7 @@ fn is_trivial_jump_block<I: VCodeInst>(vcode: &VCode<I>, block: BlockIndex) -> O
 
 impl<I: VCodeInst> VCode<I> {
     /// New empty VCode.
-    fn new(abi: Box<dyn ABIBody<I>>) -> VCode<I> {
+    fn new(abi: Box<dyn ABIBody<I = I>>) -> VCode<I> {
         VCode {
             liveins: abi.liveins(),
             liveouts: abi.liveouts(),
@@ -472,10 +470,10 @@ impl<I: VCodeInst> VCode<I> {
         // Compute block offsets.
         let mut code_section = MachSectionSize::new(0);
         let mut block_offsets = vec![0; self.num_blocks()];
-        for block in &self.final_block_order {
+        for &block in &self.final_block_order {
             code_section.offset = I::align_basic_block(code_section.offset);
-            block_offsets[*block as usize] = code_section.offset;
-            let (start, end) = self.block_ranges[*block as usize];
+            block_offsets[block as usize] = code_section.offset;
+            let (start, end) = self.block_ranges[block as usize];
             for iix in start..end {
                 self.insts[iix as usize].emit(&mut code_section);
             }
@@ -490,9 +488,9 @@ impl<I: VCodeInst> VCode<I> {
         // it (so forward references are now possible), and (ii) mutates the
         // instructions.
         let mut code_section = MachSectionSize::new(0);
-        for block in &self.final_block_order {
+        for &block in &self.final_block_order {
             code_section.offset = I::align_basic_block(code_section.offset);
-            let (start, end) = self.block_ranges[*block as usize];
+            let (start, end) = self.block_ranges[block as usize];
             for iix in start..end {
                 self.insts[iix as usize]
                     .with_block_offsets(code_section.offset, &self.final_block_offsets[..]);
@@ -510,7 +508,7 @@ impl<I: VCodeInst> VCode<I> {
         let code_idx = sections.add_section(0, self.code_size);
         let code_section = sections.get_section(code_idx);
 
-        for block in &self.final_block_order {
+        for &block in &self.final_block_order {
             let new_offset = I::align_basic_block(code_section.cur_offset_from_start());
             while new_offset > code_section.cur_offset_from_start() {
                 // Pad with NOPs up to the aligned block offset.
@@ -519,7 +517,7 @@ impl<I: VCodeInst> VCode<I> {
             }
             assert_eq!(code_section.cur_offset_from_start(), new_offset);
 
-            let (start, end) = self.block_ranges[*block as usize];
+            let (start, end) = self.block_ranges[block as usize];
             for iix in start..end {
                 self.insts[iix as usize].emit(code_section);
             }
@@ -639,9 +637,6 @@ impl<I: VCodeInst> RegallocFunction for VCode<I> {
     }
 }
 
-// N.B.: Debug impl assumes that VCode has already been through all compilation
-// passes, and so has a final block order and offsets.
-
 impl<I: VCodeInst> fmt::Debug for VCode<I> {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         writeln!(f, "VCode_Debug {{")?;
@@ -665,22 +660,21 @@ impl<I: VCodeInst> fmt::Debug for VCode<I> {
     }
 }
 
-// Pretty-printing with `RealRegUniverse` context.
+/// Pretty-printing with `RealRegUniverse` context.
 impl<I: VCodeInst + ShowWithRRU> ShowWithRRU for VCode<I> {
     fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
-        use crate::alloc::string::ToString;
         use std::fmt::Write;
 
         // Calculate an order in which to display the blocks.  This is the same
         // as final_block_order, but also includes blocks which are in the
         // representation but not in final_block_order.
         let mut display_order = Vec::<usize>::new();
-        // First display blocks in |final_block_order|
+        // First display blocks in `final_block_order`
         for bix in &self.final_block_order {
             assert!((*bix as usize) < self.num_blocks());
             display_order.push(*bix as usize);
         }
-        // Now also take care of those not listed in |final_block_order|.
+        // Now also take care of those not listed in `final_block_order`.
         // This is quadratic, but it's also debug-only code.
         for bix in 0..self.num_blocks() {
             if display_order.contains(&bix) {
@@ -690,48 +684,46 @@ impl<I: VCodeInst + ShowWithRRU> ShowWithRRU for VCode<I> {
         }
 
         let mut s = String::new();
-        s = s + &format!("VCode_ShowWithRRU {{{{");
-        s = s + &"\n".to_string();
-        s = s + &format!("  Entry block: {}", self.entry);
-        s = s + &"\n".to_string();
-        s = s + &format!("  Final block order: {:?}", self.final_block_order);
-        s = s + &"\n".to_string();
+        write!(&mut s, "VCode_ShowWithRRU {{{{\n").unwrap();
+        write!(&mut s, "  Entry block: {}\n", self.entry).unwrap();
+        write!(
+            &mut s,
+            "  Final block order: {:?}\n",
+            self.final_block_order
+        )
+        .unwrap();
 
         for i in 0..self.num_blocks() {
             let block = display_order[i];
 
-            let omitted =
-                (if !self.final_block_order.is_empty() && i >= self.final_block_order.len() {
-                    "** OMITTED **"
-                } else {
-                    ""
-                })
-                .to_string();
+            let omitted = if !self.final_block_order.is_empty() && i >= self.final_block_order.len()
+            {
+                "** OMITTED **"
+            } else {
+                ""
+            };
 
-            s = s + &format!("Block {}: {}", block, omitted);
-            s = s + &"\n".to_string();
+            write!(&mut s, "Block {}: {}\n", block, omitted).unwrap();
             if let Some(bb) = self.bindex_to_bb(block as BlockIndex) {
-                s = s + &format!("  (original IR block: {})\n", bb);
+                write!(&mut s, "  (original IR block: {})\n", bb).unwrap();
             }
             for succ in self.succs(block as BlockIndex) {
-                s = s + &format!("  (successor: Block {})", succ);
-                s = s + &"\n".to_string();
+                write!(&mut s, "  (successor: Block {})\n", succ).unwrap();
             }
             let (start, end) = self.block_ranges[block];
-            s = s + &format!("  (instruction range: {} .. {})", start, end);
-            s = s + &"\n".to_string();
+            write!(&mut s, "  (instruction range: {} .. {})\n", start, end).unwrap();
             for inst in start..end {
-                s = s + &format!(
-                    "  Inst {}:   {}",
+                write!(
+                    &mut s,
+                    "  Inst {}:   {}\n",
                     inst,
                     self.insts[inst as usize].show_rru(mb_rru)
-                );
-                s = s + &"\n".to_string();
+                )
+                .unwrap();
             }
         }
 
-        s = s + &format!("}}}}");
-        s = s + &"\n".to_string();
+        write!(&mut s, "}}}}\n").unwrap();
 
         s
     }
diff --git a/cranelift/codegen/src/num_uses.rs b/cranelift/codegen/src/num_uses.rs
index c08741020c..fd6eee8ec1 100644
--- a/cranelift/codegen/src/num_uses.rs
+++ b/cranelift/codegen/src/num_uses.rs
@@ -1,15 +1,9 @@
 //! A pass that computes the number of uses of any given instruction.
 
-#![allow(dead_code)]
-#![allow(unused_imports)]
-
-use crate::cursor::{Cursor, FuncCursor};
-use crate::dce::has_side_effect;
 use crate::entity::SecondaryMap;
 use crate::ir::dfg::ValueDef;
-use crate::ir::instructions::InstructionData;
 use crate::ir::Value;
-use crate::ir::{DataFlowGraph, Function, Inst, Opcode};
+use crate::ir::{DataFlowGraph, Function, Inst};
 
 /// Auxiliary data structure that counts the number of uses of any given
 /// instruction in a Function. This is used during instruction selection
@@ -51,16 +45,6 @@ impl NumUses {
         }
     }
 
-    /// How many times is an instruction used?
-    pub fn use_count(&self, i: Inst) -> usize {
-        self.uses[i] as usize
-    }
-
-    /// Is an instruction used at all?
-    pub fn is_used(&self, i: Inst) -> bool {
-        self.use_count(i) > 0
-    }
-
     /// Take the complete uses map, consuming this analysis result.
     pub fn take_uses(self) -> SecondaryMap<Inst, u32> {
         self.uses
diff --git a/cranelift/codegen/src/postopt.rs b/cranelift/codegen/src/postopt.rs
index b6c36434a1..9e2179982d 100644
--- a/cranelift/codegen/src/postopt.rs
+++ b/cranelift/codegen/src/postopt.rs
@@ -364,19 +364,17 @@ pub fn do_postopt(func: &mut Function, isa: &dyn TargetIsa) {
     while let Some(_block) = pos.next_block() {
         let mut last_flags_clobber = None;
         while let Some(inst) = pos.next_inst() {
-            if isa.uses_cpu_flags() {
+            if !is_mach_backend && isa.uses_cpu_flags() {
                 // Optimize instructions to make use of flags.
                 optimize_cpu_flags(&mut pos, inst, last_flags_clobber, isa);
 
-                if !is_mach_backend {
-                    // Track the most recent seen instruction that clobbers the flags.
-                    if let Some(constraints) = isa
-                        .encoding_info()
-                        .operand_constraints(pos.func.encodings[inst])
-                    {
-                        if constraints.clobbers_flags {
-                            last_flags_clobber = Some(inst)
-                        }
+                // Track the most recent seen instruction that clobbers the flags.
+                if let Some(constraints) = isa
+                    .encoding_info()
+                    .operand_constraints(pos.func.encodings[inst])
+                {
+                    if constraints.clobbers_flags {
+                        last_flags_clobber = Some(inst)
                     }
                 }
             }
diff --git a/cranelift/codegen/src/verifier/flags.rs b/cranelift/codegen/src/verifier/flags.rs
index 76e83ab88a..e4cfc80462 100644
--- a/cranelift/codegen/src/verifier/flags.rs
+++ b/cranelift/codegen/src/verifier/flags.rs
@@ -28,17 +28,18 @@ pub fn verify_flags(
     errors: &mut VerifierErrors,
 ) -> VerifierStepResult<()> {
     let _tt = timing::verify_flags();
-    if isa.is_none() || isa.unwrap().get_mach_backend().is_none() {
-        let mut verifier = FlagsVerifier {
-            func,
-            cfg,
-            encinfo: isa.map(|isa| isa.encoding_info()),
-            livein: SecondaryMap::new(),
-        };
-        verifier.check(errors)
+    let encinfo = if isa.is_none() || isa.unwrap().get_mach_backend().is_some() {
+        None
     } else {
-        Ok(())
-    }
+        Some(isa.unwrap().encoding_info())
+    };
+    let mut verifier = FlagsVerifier {
+        func,
+        cfg,
+        encinfo,
+        livein: SecondaryMap::new(),
+    };
+    verifier.check(errors)
 }
 
 struct FlagsVerifier<'a> {
diff --git a/cranelift/filetests/filetests/vcode/arm64/arithmetic.clif b/cranelift/filetests/filetests/vcode/aarch64/arithmetic.clif
similarity index 99%
rename from cranelift/filetests/filetests/vcode/arm64/arithmetic.clif
rename to cranelift/filetests/filetests/vcode/aarch64/arithmetic.clif
index 7fbda32d08..1f6dcf6b82 100644
--- a/cranelift/filetests/filetests/vcode/arm64/arithmetic.clif
+++ b/cranelift/filetests/filetests/vcode/aarch64/arithmetic.clif
@@ -1,4 +1,5 @@
-test vcode arch=arm64
+test vcode
+target aarch64
 
 function %f(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
diff --git a/cranelift/filetests/filetests/vcode/arm64/basic1.clif b/cranelift/filetests/filetests/vcode/aarch64/basic1.clif
similarity index 90%
rename from cranelift/filetests/filetests/vcode/arm64/basic1.clif
rename to cranelift/filetests/filetests/vcode/aarch64/basic1.clif
index 29713d3427..b5ec1ae160 100644
--- a/cranelift/filetests/filetests/vcode/arm64/basic1.clif
+++ b/cranelift/filetests/filetests/vcode/aarch64/basic1.clif
@@ -1,4 +1,5 @@
-test vcode arch=arm64
+test vcode
+target aarch64
 
 function %f(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
diff --git a/cranelift/filetests/filetests/vcode/arm64/bitops.clif b/cranelift/filetests/filetests/vcode/aarch64/bitops.clif
similarity index 99%
rename from cranelift/filetests/filetests/vcode/arm64/bitops.clif
rename to cranelift/filetests/filetests/vcode/aarch64/bitops.clif
index f2ebc5f003..8f5e81d322 100644
--- a/cranelift/filetests/filetests/vcode/arm64/bitops.clif
+++ b/cranelift/filetests/filetests/vcode/aarch64/bitops.clif
@@ -1,4 +1,5 @@
-test vcode arch=arm64
+test vcode
+target aarch64
 
 function %a(i32) -> i32 {
 block0(v0: i32):
diff --git a/cranelift/filetests/filetests/vcode/arm64/call-indirect.clif b/cranelift/filetests/filetests/vcode/aarch64/call-indirect.clif
similarity index 91%
rename from cranelift/filetests/filetests/vcode/arm64/call-indirect.clif
rename to cranelift/filetests/filetests/vcode/aarch64/call-indirect.clif
index 84fa72d2db..c5e8ea0596 100644
--- a/cranelift/filetests/filetests/vcode/arm64/call-indirect.clif
+++ b/cranelift/filetests/filetests/vcode/aarch64/call-indirect.clif
@@ -1,4 +1,5 @@
-test vcode arch=arm64
+test vcode
+target aarch64
 
 function %f(i64, i64) -> i64 {
     sig0 = (i64) -> i64
diff --git a/cranelift/filetests/filetests/vcode/arm64/call.clif b/cranelift/filetests/filetests/vcode/aarch64/call.clif
similarity index 90%
rename from cranelift/filetests/filetests/vcode/arm64/call.clif
rename to cranelift/filetests/filetests/vcode/aarch64/call.clif
index 3210db3959..1429dceed6 100644
--- a/cranelift/filetests/filetests/vcode/arm64/call.clif
+++ b/cranelift/filetests/filetests/vcode/aarch64/call.clif
@@ -1,4 +1,5 @@
-test vcode arch=arm64
+test vcode
+target aarch64
 
 function %f(i64) -> i64 {
     fn0 = %g(i64) -> i64
diff --git a/cranelift/filetests/filetests/vcode/arm64/condbr.clif b/cranelift/filetests/filetests/vcode/aarch64/condbr.clif
similarity index 94%
rename from cranelift/filetests/filetests/vcode/arm64/condbr.clif
rename to cranelift/filetests/filetests/vcode/aarch64/condbr.clif
index e85e309ce5..596557d8e0 100644
--- a/cranelift/filetests/filetests/vcode/arm64/condbr.clif
+++ b/cranelift/filetests/filetests/vcode/aarch64/condbr.clif
@@ -1,4 +1,5 @@
-test vcode arch=arm64
+test vcode
+target aarch64
 
 function %f(i64, i64) -> b1 {
 block0(v0: i64, v1: i64):
@@ -33,7 +34,7 @@ block2:
 ; nextln: mov fp, sp
 ; nextln: subs xzr, x0, x1
 ; nextln: b.eq 20
-; check: Block 0:
+; check: Block 2:
 ; check: movz x0, #2
 ; nextln: mov sp, fp
 ; nextln: ldp fp, lr, [sp], #16
@@ -58,7 +59,7 @@ block1:
 ; check: stp fp, lr, [sp, #-16]!
 ; nextln: mov fp, sp
 ; nextln: subs xzr, x0, x1
-; check: Block 0:
+; check: Block 1:
 ; check: movz x0, #1
 ; nextln: mov sp, fp
 ; nextln: ldp fp, lr, [sp], #16
diff --git a/cranelift/filetests/filetests/vcode/arm64/condops.clif b/cranelift/filetests/filetests/vcode/aarch64/condops.clif
similarity index 96%
rename from cranelift/filetests/filetests/vcode/arm64/condops.clif
rename to cranelift/filetests/filetests/vcode/aarch64/condops.clif
index 01d2637e88..e489836527 100644
--- a/cranelift/filetests/filetests/vcode/arm64/condops.clif
+++ b/cranelift/filetests/filetests/vcode/aarch64/condops.clif
@@ -1,4 +1,5 @@
-test vcode arch=arm64
+test vcode
+target aarch64
 
 function %f(i8, i64, i64) -> i64 {
 block0(v0: i8, v1: i64, v2: i64):
diff --git a/cranelift/filetests/filetests/vcode/arm64/constants.clif b/cranelift/filetests/filetests/vcode/aarch64/constants.clif
similarity index 99%
rename from cranelift/filetests/filetests/vcode/arm64/constants.clif
rename to cranelift/filetests/filetests/vcode/aarch64/constants.clif
index 5eca5402d7..67667d59c1 100644
--- a/cranelift/filetests/filetests/vcode/arm64/constants.clif
+++ b/cranelift/filetests/filetests/vcode/aarch64/constants.clif
@@ -1,4 +1,5 @@
-test vcode arch=arm64
+test vcode
+target aarch64
 
 function %f() -> i64 {
 block0:
diff --git a/cranelift/filetests/filetests/vcode/arm64/extend-op.clif b/cranelift/filetests/filetests/vcode/aarch64/extend-op.clif
similarity index 92%
rename from cranelift/filetests/filetests/vcode/arm64/extend-op.clif
rename to cranelift/filetests/filetests/vcode/aarch64/extend-op.clif
index 74879c8c11..6194dd563f 100644
--- a/cranelift/filetests/filetests/vcode/arm64/extend-op.clif
+++ b/cranelift/filetests/filetests/vcode/aarch64/extend-op.clif
@@ -1,4 +1,5 @@
-test vcode arch=arm64
+test vcode
+target aarch64
 
 function %f(i8) -> i64 {
 block0(v0: i8):
diff --git a/cranelift/filetests/filetests/vcode/arm64/jumptable.clif b/cranelift/filetests/filetests/vcode/aarch64/jumptable.clif
similarity index 96%
rename from cranelift/filetests/filetests/vcode/arm64/jumptable.clif
rename to cranelift/filetests/filetests/vcode/aarch64/jumptable.clif
index 0677c3cb7d..0789173acb 100644
--- a/cranelift/filetests/filetests/vcode/arm64/jumptable.clif
+++ b/cranelift/filetests/filetests/vcode/aarch64/jumptable.clif
@@ -1,4 +1,5 @@
-test vcode arch=arm64
+test vcode
+target aarch64
 
 function %f(i64) -> i64 {
   jt0 = jump_table [block1, block2, block3]
diff --git a/cranelift/filetests/filetests/vcode/arm64/narrow-arithmetic.clif b/cranelift/filetests/filetests/vcode/aarch64/narrow-arithmetic.clif
similarity index 98%
rename from cranelift/filetests/filetests/vcode/arm64/narrow-arithmetic.clif
rename to cranelift/filetests/filetests/vcode/aarch64/narrow-arithmetic.clif
index 345a527d88..d11fc22417 100644
--- a/cranelift/filetests/filetests/vcode/arm64/narrow-arithmetic.clif
+++ b/cranelift/filetests/filetests/vcode/aarch64/narrow-arithmetic.clif
@@ -1,4 +1,5 @@
-test vcode arch=arm64
+test vcode
+target aarch64
 
 function %add8(i8, i8) -> i8 {
 block0(v0: i8, v1: i8):
diff --git a/cranelift/filetests/filetests/vcode/arm64/saturating-ops.clif b/cranelift/filetests/filetests/vcode/aarch64/saturating-ops.clif
similarity index 96%
rename from cranelift/filetests/filetests/vcode/arm64/saturating-ops.clif
rename to cranelift/filetests/filetests/vcode/aarch64/saturating-ops.clif
index a281a25e4b..60b45cc07a 100644
--- a/cranelift/filetests/filetests/vcode/arm64/saturating-ops.clif
+++ b/cranelift/filetests/filetests/vcode/aarch64/saturating-ops.clif
@@ -1,4 +1,5 @@
-test vcode arch=arm64
+test vcode
+target aarch64
 
 function %uaddsat64(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
diff --git a/cranelift/filetests/filetests/vcode/arm64/shift-op.clif b/cranelift/filetests/filetests/vcode/aarch64/shift-op.clif
similarity index 91%
rename from cranelift/filetests/filetests/vcode/arm64/shift-op.clif
rename to cranelift/filetests/filetests/vcode/aarch64/shift-op.clif
index 852668081d..12984620a1 100644
--- a/cranelift/filetests/filetests/vcode/arm64/shift-op.clif
+++ b/cranelift/filetests/filetests/vcode/aarch64/shift-op.clif
@@ -1,4 +1,5 @@
-test vcode arch=arm64
+test vcode
+target aarch64
 
 function %f(i64) -> i64 {
 block0(v0: i64):
diff --git a/cranelift/filetests/filetests/vcode/arm64/shift-rotate.clif b/cranelift/filetests/filetests/vcode/aarch64/shift-rotate.clif
similarity index 99%
rename from cranelift/filetests/filetests/vcode/arm64/shift-rotate.clif
rename to cranelift/filetests/filetests/vcode/aarch64/shift-rotate.clif
index bd56d4da5a..b865cc2902 100644
--- a/cranelift/filetests/filetests/vcode/arm64/shift-rotate.clif
+++ b/cranelift/filetests/filetests/vcode/aarch64/shift-rotate.clif
@@ -1,4 +1,5 @@
-test vcode arch=arm64
+test vcode
+target aarch64
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; ROR, variable
diff --git a/cranelift/filetests/filetests/vcode/arm64/symbol-value.clif b/cranelift/filetests/filetests/vcode/aarch64/symbol-value.clif
similarity index 90%
rename from cranelift/filetests/filetests/vcode/arm64/symbol-value.clif
rename to cranelift/filetests/filetests/vcode/aarch64/symbol-value.clif
index cf22b20ff9..01c0a8a46b 100644
--- a/cranelift/filetests/filetests/vcode/arm64/symbol-value.clif
+++ b/cranelift/filetests/filetests/vcode/aarch64/symbol-value.clif
@@ -1,4 +1,5 @@
-test vcode arch=arm64
+test vcode
+target aarch64
 
 function %f() -> i64 {
   gv0 = symbol %my_global
diff --git a/cranelift/filetests/filetests/vcode/arm64/traps.clif b/cranelift/filetests/filetests/vcode/aarch64/traps.clif
similarity index 91%
rename from cranelift/filetests/filetests/vcode/arm64/traps.clif
rename to cranelift/filetests/filetests/vcode/aarch64/traps.clif
index 9f4a40ef12..b4c4be344b 100644
--- a/cranelift/filetests/filetests/vcode/arm64/traps.clif
+++ b/cranelift/filetests/filetests/vcode/aarch64/traps.clif
@@ -1,4 +1,5 @@
-test vcode arch=arm64
+test vcode
+target aarch64
 
 function %f() {
 block0:
diff --git a/cranelift/filetests/filetests/vcode/arm64/uextend-sextend.clif b/cranelift/filetests/filetests/vcode/aarch64/uextend-sextend.clif
similarity index 99%
rename from cranelift/filetests/filetests/vcode/arm64/uextend-sextend.clif
rename to cranelift/filetests/filetests/vcode/aarch64/uextend-sextend.clif
index 85a5c488a2..86084ff0cc 100644
--- a/cranelift/filetests/filetests/vcode/arm64/uextend-sextend.clif
+++ b/cranelift/filetests/filetests/vcode/aarch64/uextend-sextend.clif
@@ -1,4 +1,5 @@
-test vcode arch=arm64
+test vcode
+target aarch64
 
 function %f_u_8_64(i8) -> i64 {
 block0(v0: i8):
diff --git a/cranelift/filetests/src/test_vcode.rs b/cranelift/filetests/src/test_vcode.rs
index f97aef47ea..93bce57a59 100644
--- a/cranelift/filetests/src/test_vcode.rs
+++ b/cranelift/filetests/src/test_vcode.rs
@@ -4,11 +4,9 @@ use cranelift_codegen::isa::lookup;
 use cranelift_codegen::settings;
 use cranelift_codegen::Context as CodegenContext;
 use cranelift_reader::{TestCommand, TestOption};
-use target_lexicon::Triple;
 
 use log::info;
 use std::borrow::Cow;
-use std::str::FromStr;
 use std::string::String;
 
 struct TestVCode {
@@ -41,15 +39,13 @@ impl SubTest for TestVCode {
     }
 
     fn needs_isa(&self) -> bool {
-        false
+        true
     }
 
     fn run(&self, func: Cow<Function>, context: &Context) -> SubtestResult<()> {
+        let triple = context.isa.unwrap().triple().clone();
         let func = func.into_owned();
 
-        let triple =
-            Triple::from_str(&self.arch).map_err(|_| format!("Unknown arch: '{}'", self.arch))?;
-
         let mut isa = lookup(triple)
             .map_err(|_| format!("Could not look up backend for arch '{}'", self.arch))?
             .finish(settings::Flags::new(settings::builder()));
diff --git a/crates/jit/src/link.rs b/crates/jit/src/link.rs
index 8ffe729526..824c35ced6 100644
--- a/crates/jit/src/link.rs
+++ b/crates/jit/src/link.rs
@@ -142,12 +142,17 @@ cfg_if::cfg_if! {
             pub fn ___chkstk();
         }
         const PROBESTACK: unsafe extern "C" fn() = ___chkstk;
+    } else if #[cfg(not(any(target_arch = "x86_64", target_arch = "x86")))] {
+        // As per
+        // https://github.com/rust-lang/compiler-builtins/blob/cae3e6ea23739166504f9f9fb50ec070097979d4/src/probestack.rs#L39,
+        // LLVM only has stack-probe support on x86-64 and x86. Thus, on any other CPU
+        // architecture, we simply use an empty stack-probe function.
+        extern "C" fn empty_probestack() {}
+        const PROBESTACK: unsafe extern "C" fn() = empty_probestack;
     } else {
         extern "C" {
             pub fn __rust_probestack();
         }
-        static PROBESTACK: unsafe extern "C" fn() = empty_probestack;
+        static PROBESTACK: unsafe extern "C" fn() = __rust_probestack;
     }
 }
-
-extern "C" fn empty_probestack() {}
diff --git a/crates/runtime/src/traphandlers.rs b/crates/runtime/src/traphandlers.rs
index 571f823b3f..e180b6c91b 100644
--- a/crates/runtime/src/traphandlers.rs
+++ b/crates/runtime/src/traphandlers.rs
@@ -31,7 +31,6 @@ cfg_if::cfg_if! {
         static mut PREV_SIGBUS: MaybeUninit<libc::sigaction> = MaybeUninit::uninit();
         static mut PREV_SIGILL: MaybeUninit<libc::sigaction> = MaybeUninit::uninit();
         static mut PREV_SIGFPE: MaybeUninit<libc::sigaction> = MaybeUninit::uninit();
-        static mut PREV_SIGTRAP: MaybeUninit<libc::sigaction> = MaybeUninit::uninit();
 
         unsafe fn platform_init() {
             let register = |slot: &mut MaybeUninit<libc::sigaction>, signal: i32| {
@@ -71,9 +70,6 @@ cfg_if::cfg_if! {
                 register(&mut PREV_SIGFPE, libc::SIGFPE);
             }
 
-            // on ARM64, we use `brk` to report traps, which generates SIGTRAP.
-            register(&mut PREV_SIGTRAP, libc::SIGTRAP);
-
             // On ARM, handle Unaligned Accesses.
             // On Darwin, guard page accesses are raised as SIGBUS.
             if cfg!(target_arch = "arm") || cfg!(target_os = "macos") {
@@ -91,7 +87,6 @@ cfg_if::cfg_if! {
                 libc::SIGBUS => &PREV_SIGBUS,
                 libc::SIGFPE => &PREV_SIGFPE,
                 libc::SIGILL => &PREV_SIGILL,
-                libc::SIGTRAP => &PREV_SIGTRAP,
                 _ => panic!("unknown signal: {}", signum),
             };
             let handled = tls::with(|info| {
diff --git a/tests/custom_signal_handler.rs b/tests/custom_signal_handler.rs
index 8b3c8cd478..27d14fc910 100644
--- a/tests/custom_signal_handler.rs
+++ b/tests/custom_signal_handler.rs
@@ -122,7 +122,7 @@ mod tests {
                 .downcast::<Trap>()?;
             assert!(
                 trap.message()
-                    .starts_with("wasm trap: out of bounds"),
+                    .starts_with("wasm trap: out of bounds memory access"),
                 "bad trap message: {:?}",
                 trap.message()
             );
@@ -149,7 +149,7 @@ mod tests {
                 .downcast::<Trap>()?;
             assert!(trap
                 .message()
-                .starts_with("wasm trap: out of bounds"));
+                .starts_with("wasm trap: out of bounds memory access"));
         }
         Ok(())
     }