Address review comments:

- Undo temporary changes to default features (`all-arch`) and a signal-handler test. - Remove `SIGTRAP` handler: no longer needed now that we've found an "undefined opcode" option on ARM64. - Rename pp.rs to pretty_print.rs in machinst/. - Only use empty stack-probe on non-x86. As per a comment in rust-lang/compiler-builtins [1], LLVM only supports stack probes on x86 and x86-64. Thus, on any other CPU architecture, we cannot refer to `__rust_probestack`, because it does not exist. - Rename arm64 to aarch64. - Use `target` directive in vcode filetests. - Run the flags verifier, but without encinfo, when using new backends. - Clean up warning overrides. - Fix up use of casts: use u32::from(x) and siblings when possible, u32::try_from(x).unwrap() when not, to avoid silent truncation. - Take immutable `Function` borrows as input; we don't actually mutate the input IR. - Lots of other miscellaneous cleanups. [1] cae3e6ea23/src/probestack.rs (L39)
2020-04-15 16:31:44 -07:00
parent 3de504c24c
commit 48cf2c2f50
49 changed files with 1550 additions and 1544 deletions
--- a/cranelift/codegen/Cargo.toml
+++ b/cranelift/codegen/Cargo.toml
@@ -34,7 +34,7 @@ regalloc = "0.0.17"
 cranelift-codegen-meta = { path = "meta", version = "0.62.0" }

 [features]
-default = ["std", "unwind", "all-arch"]
+default = ["std", "unwind"]

 # The "std" feature enables use of libstd. The "core" feature enables use
 # of some minimal std-like replacement libraries. At least one of these two
--- a/cranelift/codegen/src/context.rs
+++ b/cranelift/codegen/src/context.rs
@@ -180,8 +180,7 @@ impl Context {
        }

        if let Some(backend) = isa.get_mach_backend() {
-            let func = std::mem::replace(&mut self.func, Function::new());
-            let result = backend.compile_function(func, self.want_disasm)?;
+            let result = backend.compile_function(&mut self.func, self.want_disasm)?;
            let info = result.code_info();
            self.mach_compile_result = Some(result);
            Ok(info)
@@ -312,15 +311,15 @@ impl Context {

    /// Run the legalizer for `isa` on the function.
    pub fn legalize(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> {
-        if isa.get_mach_backend().is_some() {
-            // Run some specific legalizations only.
-            simple_legalize(&mut self.func, &mut self.cfg, isa);
-            Ok(())
-        } else {
        // Legalization invalidates the domtree and loop_analysis by mutating the CFG.
        // TODO: Avoid doing this when legalization doesn't actually mutate the CFG.
        self.domtree.clear();
        self.loop_analysis.clear();
+        if isa.get_mach_backend().is_some() {
+            // Run some specific legalizations only.
+            simple_legalize(&mut self.func, &mut self.cfg, isa);
+            self.verify_if(isa)
+        } else {
            legalize_function(&mut self.func, &mut self.cfg, isa);
            debug!("Legalized:\n{}", self.func.display(isa));
            self.verify_if(isa)
--- a/cranelift/codegen/src/dce.rs
+++ b/cranelift/codegen/src/dce.rs
@@ -6,48 +6,10 @@
 use crate::cursor::{Cursor, FuncCursor};
 use crate::dominator_tree::DominatorTree;
 use crate::entity::EntityRef;
-use crate::ir::instructions::InstructionData;
-use crate::ir::{DataFlowGraph, Function, Inst, Opcode};
+use crate::inst_predicates::{any_inst_results_used, has_side_effect};
+use crate::ir::Function;
 use crate::timing;

-/// Test whether the given opcode is unsafe to even consider for DCE.
-fn trivially_unsafe_for_dce(opcode: Opcode) -> bool {
-    opcode.is_call()
-        || opcode.is_branch()
-        || opcode.is_terminator()
-        || opcode.is_return()
-        || opcode.can_trap()
-        || opcode.other_side_effects()
-        || opcode.can_store()
-}
-
-/// Preserve instructions with used result values.
-fn any_inst_results_used(inst: Inst, live: &[bool], dfg: &DataFlowGraph) -> bool {
-    dfg.inst_results(inst).iter().any(|v| live[v.index()])
-}
-
-/// Load instructions without the `notrap` flag are defined to trap when
-/// operating on inaccessible memory, so we can't DCE them even if the
-/// loaded value is unused.
-fn is_load_with_defined_trapping(opcode: Opcode, data: &InstructionData) -> bool {
-    if !opcode.can_load() {
-        return false;
-    }
-    match *data {
-        InstructionData::StackLoad { .. } => false,
-        InstructionData::Load { flags, .. } => !flags.notrap(),
-        _ => true,
-    }
-}
-
-/// Does the given instruction have any side-effect that would preclude it from being removed when
-/// its value is unused?
-pub fn has_side_effect(func: &Function, inst: Inst) -> bool {
-    let data = &func.dfg[inst];
-    let opcode = data.opcode();
-    trivially_unsafe_for_dce(opcode) || is_load_with_defined_trapping(opcode, data)
-}
-
 /// Perform DCE on `func`.
 pub fn do_dce(func: &mut Function, domtree: &mut DominatorTree) {
    let _tt = timing::dce();
--- a/cranelift/codegen/src/inst_predicates.rs
+++ b/cranelift/codegen/src/inst_predicates.rs
@@ -0,0 +1,42 @@
+//! Instruction predicates/properties, shared by various analyses.
+
+use crate::ir::{DataFlowGraph, Function, Inst, InstructionData, Opcode};
+use cranelift_entity::EntityRef;
+
+/// Preserve instructions with used result values.
+pub fn any_inst_results_used(inst: Inst, live: &[bool], dfg: &DataFlowGraph) -> bool {
+    dfg.inst_results(inst).iter().any(|v| live[v.index()])
+}
+
+/// Test whether the given opcode is unsafe to even consider as side-effect-free.
+fn trivially_has_side_effects(opcode: Opcode) -> bool {
+    opcode.is_call()
+        || opcode.is_branch()
+        || opcode.is_terminator()
+        || opcode.is_return()
+        || opcode.can_trap()
+        || opcode.other_side_effects()
+        || opcode.can_store()
+}
+
+/// Load instructions without the `notrap` flag are defined to trap when
+/// operating on inaccessible memory, so we can't treat them as side-effect-free even if the loaded
+/// value is unused.
+fn is_load_with_defined_trapping(opcode: Opcode, data: &InstructionData) -> bool {
+    if !opcode.can_load() {
+        return false;
+    }
+    match *data {
+        InstructionData::StackLoad { .. } => false,
+        InstructionData::Load { flags, .. } => !flags.notrap(),
+        _ => true,
+    }
+}
+
+/// Does the given instruction have any side-effect that would preclude it from being removed when
+/// its value is unused?
+pub fn has_side_effect(func: &Function, inst: Inst) -> bool {
+    let data = &func.dfg[inst];
+    let opcode = data.opcode();
+    trivially_has_side_effects(opcode) || is_load_with_defined_trapping(opcode, data)
+}
--- a/cranelift/codegen/src/ir/function.rs
+++ b/cranelift/codegen/src/ir/function.rs
@@ -3,8 +3,6 @@
 //! The `Function` struct defined in this module owns all of its basic blocks and
 //! instructions.

-#![allow(unused_imports)]
-
 use crate::binemit::CodeOffset;
 use crate::entity::{PrimaryMap, SecondaryMap};
 use crate::ir;
@@ -19,7 +17,6 @@ use crate::isa::{CallConv, EncInfo, Encoding, Legalize, TargetIsa};
 use crate::regalloc::{EntryRegDiversions, RegDiversions};
 use crate::value_label::ValueLabelsRanges;
 use crate::write::write_function;
-use alloc::boxed::Box;
 use core::fmt;

 /// A function.
--- a/cranelift/codegen/src/isa/aarch64/abi.rs
+++ b/cranelift/codegen/src/isa/aarch64/abi.rs
@@ -1,11 +1,11 @@
-//! Implementation of the standard ARM64 ABI.
+//! Implementation of the standard AArch64 ABI.

 use crate::ir;
 use crate::ir::types;
 use crate::ir::types::*;
 use crate::ir::StackSlot;
 use crate::isa;
-use crate::isa::arm64::inst::*;
+use crate::isa::aarch64::inst::*;
 use crate::machinst::*;
 use crate::settings;

@@ -15,19 +15,16 @@ use regalloc::{RealReg, Reg, RegClass, Set, SpillSlot, Writable};

 use log::debug;

-// A location for an argument or return value.
-#[derive(Clone, Debug)]
+/// A location for an argument or return value.
+#[derive(Clone, Copy, Debug)]
 enum ABIArg {
-    // In a real register.
+    /// In a real register.
    Reg(RealReg, ir::Type),
-    // Arguments only: on stack, at given offset from SP at entry.
+    /// Arguments only: on stack, at given offset from SP at entry.
    Stack(i64, ir::Type),
-    // (first and only) return value only: in memory pointed to by x8 on entry.
-    #[allow(dead_code)]
-    RetMem(ir::Type),
 }

-/// ARM64 ABI information shared between body (callee) and caller.
+/// AArch64 ABI information shared between body (callee) and caller.
 struct ABISig {
    args: Vec<ABIArg>,
    rets: Vec<ABIArg>,
@@ -161,11 +158,6 @@ impl ABISig {
        let (args, stack_arg_space) = compute_arg_locs(sig.call_conv, &sig.params);
        let (rets, _) = compute_arg_locs(sig.call_conv, &sig.returns);

-        // Verify that there are no arguments in return-memory area.
-        assert!(args.iter().all(|a| match a {
-            &ABIArg::RetMem(..) => false,
-            _ => true,
-        }));
        // Verify that there are no return values on the stack.
        assert!(rets.iter().all(|a| match a {
            &ABIArg::Stack(..) => false,
@@ -181,14 +173,21 @@ impl ABISig {
    }
 }

-/// ARM64 ABI object for a function body.
-pub struct ARM64ABIBody {
-    sig: ABISig,                       // signature: arg and retval regs
-    stackslots: Vec<usize>,            // offsets to each stackslot
-    stackslots_size: usize,            // total stack size of all stackslots
-    clobbered: Set<Writable<RealReg>>, // clobbered registers, from regalloc.
-    spillslots: Option<usize>,         // total number of spillslots, from regalloc.
-    frame_size: Option<usize>,
+/// AArch64 ABI object for a function body.
+pub struct AArch64ABIBody {
+    /// signature: arg and retval regs
+    sig: ABISig,
+    /// offsets to each stackslot
+    stackslots: Vec<u32>,
+    /// total stack size of all stackslots
+    stackslots_size: u32,
+    /// clobbered registers, from regalloc.
+    clobbered: Set<Writable<RealReg>>,
+    /// total number of spillslots, from regalloc.
+    spillslots: Option<usize>,
+    /// Total frame size.
+    frame_size: Option<u32>,
+    /// Calling convention this function expects.
    call_conv: isa::CallConv,
 }

@@ -207,20 +206,31 @@ fn in_vec_reg(ty: ir::Type) -> bool {
    }
 }

-impl ARM64ABIBody {
+impl AArch64ABIBody {
    /// Create a new body ABI instance.
    pub fn new(f: &ir::Function) -> Self {
-        debug!("ARM64 ABI: func signature {:?}", f.signature);
+        debug!("AArch64 ABI: func signature {:?}", f.signature);

        let sig = ABISig::from_func_sig(&f.signature);

+        let call_conv = f.signature.call_conv;
+        // Only these calling conventions are supported.
+        assert!(
+            call_conv == isa::CallConv::SystemV
+                || call_conv == isa::CallConv::Fast
+                || call_conv == isa::CallConv::Cold
+                || call_conv.extends_baldrdash(),
+            "Unsupported calling convention: {:?}",
+            call_conv
+        );
+
        // Compute stackslot locations and total stackslot size.
-        let mut stack_offset: usize = 0;
+        let mut stack_offset: u32 = 0;
        let mut stackslots = vec![];
        for (stackslot, data) in f.stack_slots.iter() {
            let off = stack_offset;
-            stack_offset += data.size as usize;
-            stack_offset = (stack_offset + 7) & !7usize;
+            stack_offset += data.size;
+            stack_offset = (stack_offset + 7) & !7;
            assert_eq!(stackslot.as_u32() as usize, stackslots.len());
            stackslots.push(off);
        }
@@ -232,7 +242,7 @@ impl ARM64ABIBody {
            clobbered: Set::empty(),
            spillslots: None,
            frame_size: None,
-            call_conv: f.signature.call_conv,
+            call_conv,
        }
    }
 }
@@ -264,7 +274,7 @@ fn load_stack(fp_offset: i64, into_reg: Writable<Reg>, ty: Type) -> Inst {
            mem,
            srcloc: None,
        },
-        _ => unimplemented!(),
+        _ => unimplemented!("load_stack({})", ty),
    }
 }

@@ -295,7 +305,7 @@ fn store_stack(fp_offset: i64, from_reg: Reg, ty: Type) -> Inst {
            mem,
            srcloc: None,
        },
-        _ => unimplemented!(),
+        _ => unimplemented!("store_stack({})", ty),
    }
 }

@@ -402,11 +412,13 @@ fn get_caller_saves_set(call_conv: isa::CallConv) -> Set<Writable<Reg>> {
    set
 }

-impl ABIBody<Inst> for ARM64ABIBody {
+impl ABIBody for AArch64ABIBody {
+    type I = Inst;
+
    fn liveins(&self) -> Set<RealReg> {
        let mut set: Set<RealReg> = Set::empty();
-        for arg in &self.sig.args {
-            if let &ABIArg::Reg(r, _) = arg {
+        for &arg in &self.sig.args {
+            if let ABIArg::Reg(r, _) = arg {
                set.insert(r);
            }
        }
@@ -415,8 +427,8 @@ impl ABIBody<Inst> for ARM64ABIBody {

    fn liveouts(&self) -> Set<RealReg> {
        let mut set: Set<RealReg> = Set::empty();
-        for ret in &self.sig.rets {
-            if let &ABIArg::Reg(r, _) = ret {
+        for &ret in &self.sig.rets {
+            if let ABIArg::Reg(r, _) = ret {
                set.insert(r);
            }
        }
@@ -439,7 +451,6 @@ impl ABIBody<Inst> for ARM64ABIBody {
        match &self.sig.args[idx] {
            &ABIArg::Reg(r, ty) => Inst::gen_move(into_reg, r.to_reg(), ty),
            &ABIArg::Stack(off, ty) => load_stack(off + 16, into_reg, ty),
-            _ => unimplemented!(),
        }
    }

@@ -447,7 +458,6 @@ impl ABIBody<Inst> for ARM64ABIBody {
        match &self.sig.rets[idx] {
            &ABIArg::Reg(r, ty) => Inst::gen_move(Writable::from_reg(r.to_reg()), from_reg, ty),
            &ABIArg::Stack(off, ty) => store_stack(off + 16, from_reg, ty),
-            _ => unimplemented!(),
        }
    }

@@ -470,7 +480,7 @@ impl ABIBody<Inst> for ARM64ABIBody {
    fn load_stackslot(
        &self,
        slot: StackSlot,
-        offset: usize,
+        offset: u32,
        ty: Type,
        into_reg: Writable<Reg>,
    ) -> Inst {
@@ -480,7 +490,7 @@ impl ABIBody<Inst> for ARM64ABIBody {
        load_stack(fp_off, into_reg, ty)
    }

-    fn store_stackslot(&self, slot: StackSlot, offset: usize, ty: Type, from_reg: Reg) -> Inst {
+    fn store_stackslot(&self, slot: StackSlot, offset: u32, ty: Type, from_reg: Reg) -> Inst {
        // Offset from beginning of stackslot area, which is at FP - stackslots_size.
        let stack_off = self.stackslots[slot.as_u32() as usize] as i64;
        let fp_off: i64 = -(self.stackslots_size as i64) + stack_off + (offset as i64);
@@ -532,13 +542,13 @@ impl ABIBody<Inst> for ARM64ABIBody {
            });
        }

-        let mut total_stacksize = self.stackslots_size + 8 * self.spillslots.unwrap();
+        let mut total_stacksize = self.stackslots_size + 8 * self.spillslots.unwrap() as u32;
        if self.call_conv.extends_baldrdash() {
            debug_assert!(
                !flags.enable_probestack(),
                "baldrdash does not expect cranelift to emit stack probes"
            );
-            total_stacksize += flags.baldrdash_prologue_words() as usize * 8;
+            total_stacksize += flags.baldrdash_prologue_words() as u32 * 8;
        }
        let total_stacksize = (total_stacksize + 15) & !15; // 16-align the stack.

@@ -692,7 +702,7 @@ impl ABIBody<Inst> for ARM64ABIBody {

    fn frame_size(&self) -> u32 {
        self.frame_size
-            .expect("frame size not computed before prologue generation") as u32
+            .expect("frame size not computed before prologue generation")
    }

    fn get_spillslot_size(&self, rc: RegClass, ty: Type) -> u32 {
@@ -719,8 +729,8 @@ enum CallDest {
    Reg(Reg),
 }

-/// ARM64 ABI object for a function call.
-pub struct ARM64ABICall {
+/// AArch64 ABI object for a function call.
+pub struct AArch64ABICall {
    sig: ABISig,
    uses: Set<Reg>,
    defs: Set<Writable<Reg>>,
@@ -751,16 +761,16 @@ fn abisig_to_uses_and_defs(sig: &ABISig) -> (Set<Reg>, Set<Writable<Reg>>) {
    (uses, defs)
 }

-impl ARM64ABICall {
+impl AArch64ABICall {
    /// Create a callsite ABI object for a call directly to the specified function.
    pub fn from_func(
        sig: &ir::Signature,
        extname: &ir::ExternalName,
        loc: ir::SourceLoc,
-    ) -> ARM64ABICall {
+    ) -> AArch64ABICall {
        let sig = ABISig::from_func_sig(sig);
        let (uses, defs) = abisig_to_uses_and_defs(&sig);
-        ARM64ABICall {
+        AArch64ABICall {
            sig,
            uses,
            defs,
@@ -777,10 +787,10 @@ impl ARM64ABICall {
        ptr: Reg,
        loc: ir::SourceLoc,
        opcode: ir::Opcode,
-    ) -> ARM64ABICall {
+    ) -> AArch64ABICall {
        let sig = ABISig::from_func_sig(sig);
        let (uses, defs) = abisig_to_uses_and_defs(&sig);
-        ARM64ABICall {
+        AArch64ABICall {
            sig,
            uses,
            defs,
@@ -820,7 +830,9 @@ fn adjust_stack(amt: u64, is_sub: bool) -> Vec<Inst> {
    }
 }

-impl ABICall<Inst> for ARM64ABICall {
+impl ABICall for AArch64ABICall {
+    type I = Inst;
+
    fn num_args(&self) -> usize {
        self.sig.args.len()
    }
@@ -841,14 +853,12 @@ impl ABICall<Inst> for ARM64ABICall {
                mem: MemArg::SPOffset(off),
                srcloc: None,
            },
-            _ => unimplemented!(),
        }
    }

    fn gen_copy_retval_to_reg(&self, idx: usize, into_reg: Writable<Reg>) -> Inst {
        match &self.sig.rets[idx] {
            &ABIArg::Reg(reg, ty) => Inst::gen_move(into_reg, reg.to_reg(), ty),
-            &ABIArg::RetMem(..) => panic!("Return-memory area not yet supported"),
            _ => unimplemented!(),
        }
    }
--- a/cranelift/codegen/src/isa/aarch64/inst/args.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/args.rs
@@ -1,48 +1,33 @@
-//! ARM64 ISA definitions: instruction arguments.
+//! AArch64 ISA definitions: instruction arguments.

+// Some variants are never constructed, but we still want them as options in the future.
 #![allow(dead_code)]
-#![allow(non_snake_case)]

-use crate::binemit::{CodeOffset, CodeSink};
-use crate::ir::constant::{ConstantData, ConstantOffset};
+use crate::binemit::CodeOffset;
 use crate::ir::Type;
-use crate::isa::arm64::inst::*;
-use crate::machinst::*;
+use crate::isa::aarch64::inst::*;

-use regalloc::{
-    RealReg, RealRegUniverse, Reg, RegClass, RegClassInfo, SpillSlot, VirtualReg, Writable,
-    NUM_REG_CLASSES,
-};
+use regalloc::{RealRegUniverse, Reg, Writable};

-use std::string::{String, ToString};
+use core::convert::{Into, TryFrom};
+use std::string::String;

 /// A shift operator for a register or immediate.
 #[derive(Clone, Copy, Debug)]
+#[repr(u8)]
 pub enum ShiftOp {
-    ASR,
-    LSR,
-    LSL,
-    ROR,
+    LSL = 0b00,
+    LSR = 0b01,
+    ASR = 0b10,
+    ROR = 0b11,
 }

 impl ShiftOp {
    /// Get the encoding of this shift op.
-    pub fn bits(&self) -> u8 {
-        match self {
-            &ShiftOp::LSL => 0b00,
-            &ShiftOp::LSR => 0b01,
-            &ShiftOp::ASR => 0b10,
-            &ShiftOp::ROR => 0b11,
+    pub fn bits(self) -> u8 {
+        self as u8
    }
 }
-}
-
-/// A shift operator with an amount, guaranteed to be within range.
-#[derive(Clone, Debug)]
-pub struct ShiftOpAndAmt {
-    op: ShiftOp,
-    shift: ShiftOpShiftImm,
-}

 /// A shift operator amount.
 #[derive(Clone, Copy, Debug)]
@@ -62,11 +47,18 @@ impl ShiftOpShiftImm {
    }

    /// Return the shift amount.
-    pub fn value(&self) -> u8 {
+    pub fn value(self) -> u8 {
        self.0
    }
 }

+/// A shift operator with an amount, guaranteed to be within range.
+#[derive(Clone, Debug)]
+pub struct ShiftOpAndAmt {
+    op: ShiftOp,
+    shift: ShiftOpShiftImm,
+}
+
 impl ShiftOpAndAmt {
    pub fn new(op: ShiftOp, shift: ShiftOpShiftImm) -> ShiftOpAndAmt {
        ShiftOpAndAmt { op, shift }
@@ -74,7 +66,7 @@ impl ShiftOpAndAmt {

    /// Get the shift op.
    pub fn op(&self) -> ShiftOp {
-        self.op.clone()
+        self.op
    }

    /// Get the shift amount.
@@ -85,30 +77,22 @@ impl ShiftOpAndAmt {

 /// An extend operator for a register.
 #[derive(Clone, Copy, Debug)]
+#[repr(u8)]
 pub enum ExtendOp {
-    SXTB,
-    SXTH,
-    SXTW,
-    SXTX,
-    UXTB,
-    UXTH,
-    UXTW,
-    UXTX,
+    UXTB = 0b000,
+    UXTH = 0b001,
+    UXTW = 0b010,
+    UXTX = 0b011,
+    SXTB = 0b100,
+    SXTH = 0b101,
+    SXTW = 0b110,
+    SXTX = 0b111,
 }

 impl ExtendOp {
    /// Encoding of this op.
-    pub fn bits(&self) -> u8 {
-        match self {
-            &ExtendOp::UXTB => 0b000,
-            &ExtendOp::UXTH => 0b001,
-            &ExtendOp::UXTW => 0b010,
-            &ExtendOp::UXTX => 0b011,
-            &ExtendOp::SXTB => 0b100,
-            &ExtendOp::SXTH => 0b101,
-            &ExtendOp::SXTW => 0b110,
-            &ExtendOp::SXTX => 0b111,
-        }
+    pub fn bits(self) -> u8 {
+        self as u8
    }
 }

@@ -128,18 +112,34 @@ pub enum MemLabel {
 #[derive(Clone, Debug)]
 pub enum MemArg {
    Label(MemLabel),
+    /// "post-indexed" mode as per AArch64 docs: postincrement reg after address computation.
    PostIndexed(Writable<Reg>, SImm9),
+    /// "pre-indexed" mode as per AArch64 docs: preincrement reg before address computation.
    PreIndexed(Writable<Reg>, SImm9),
+
    // N.B.: RegReg, RegScaled, and RegScaledExtended all correspond to
    // what the ISA calls the "register offset" addressing mode. We split out
    // several options here for more ergonomic codegen.
+    /// Register plus register offset.
    RegReg(Reg, Reg),
+
+    /// Register plus register offset, scaled by type's size.
    RegScaled(Reg, Reg, Type),
+
+    /// Register plus register offset, scaled by type's size, with index sign- or zero-extended
+    /// first.
    RegScaledExtended(Reg, Reg, Type, ExtendOp),
+
+    /// Unscaled signed 9-bit immediate offset from reg.
    Unscaled(Reg, SImm9),
+
+    /// Scaled (by size of a type) unsigned 12-bit immediate offset from reg.
    UnsignedOffset(Reg, UImm12Scaled),
-    /// Offset from the stack pointer or frame pointer.
+
+    /// Offset from the stack pointer. Lowered into a real amode at emission.
    SPOffset(i64),
+
+    /// Offset from the frame pointer. Lowered into a real amode at emission.
    FPOffset(i64),
 }

@@ -153,9 +153,7 @@ impl MemArg {

    /// Memory reference using an address in a register and an offset, if possible.
    pub fn reg_maybe_offset(reg: Reg, offset: i64, value_type: Type) -> Option<MemArg> {
-        if offset == 0 {
-            Some(MemArg::Unscaled(reg, SImm9::zero()))
-        } else if let Some(simm9) = SImm9::maybe_from_i64(offset) {
+        if let Some(simm9) = SImm9::maybe_from_i64(offset) {
            Some(MemArg::Unscaled(reg, simm9))
        } else if let Some(uimm12s) = UImm12Scaled::maybe_from_i64(offset, value_type) {
            Some(MemArg::UnsignedOffset(reg, uimm12s))
@@ -165,17 +163,18 @@ impl MemArg {
    }

    /// Memory reference using the sum of two registers as an address.
-    pub fn reg_reg(reg1: Reg, reg2: Reg) -> MemArg {
+    pub fn reg_plus_reg(reg1: Reg, reg2: Reg) -> MemArg {
        MemArg::RegReg(reg1, reg2)
    }

    /// Memory reference using `reg1 + sizeof(ty) * reg2` as an address.
-    pub fn reg_reg_scaled(reg1: Reg, reg2: Reg, ty: Type) -> MemArg {
+    pub fn reg_plus_reg_scaled(reg1: Reg, reg2: Reg, ty: Type) -> MemArg {
        MemArg::RegScaled(reg1, reg2, ty)
    }

-    /// Memory reference using `reg1 + sizeof(ty) * reg2` as an address.
-    pub fn reg_reg_scaled_extended(reg1: Reg, reg2: Reg, ty: Type, op: ExtendOp) -> MemArg {
+    /// Memory reference using `reg1 + sizeof(ty) * reg2` as an address, with `reg2` sign- or
+    /// zero-extended as per `op`.
+    pub fn reg_plus_reg_scaled_extended(reg1: Reg, reg2: Reg, ty: Type, op: ExtendOp) -> MemArg {
        MemArg::RegScaledExtended(reg1, reg2, ty, op)
    }

@@ -199,23 +198,24 @@ pub enum PairMemArg {

 /// Condition for conditional branches.
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
+#[repr(u8)]
 pub enum Cond {
-    Eq,
-    Ne,
-    Hs,
-    Lo,
-    Mi,
-    Pl,
-    Vs,
-    Vc,
-    Hi,
-    Ls,
-    Ge,
-    Lt,
-    Gt,
-    Le,
-    Al,
-    Nv,
+    Eq = 0,
+    Ne = 1,
+    Hs = 2,
+    Lo = 3,
+    Mi = 4,
+    Pl = 5,
+    Vs = 6,
+    Vc = 7,
+    Hi = 8,
+    Ls = 9,
+    Ge = 10,
+    Lt = 11,
+    Gt = 12,
+    Le = 13,
+    Al = 14,
+    Nv = 15,
 }

 impl Cond {
@@ -224,18 +224,25 @@ impl Cond {
        match self {
            Cond::Eq => Cond::Ne,
            Cond::Ne => Cond::Eq,
+
            Cond::Hs => Cond::Lo,
            Cond::Lo => Cond::Hs,
+
            Cond::Mi => Cond::Pl,
            Cond::Pl => Cond::Mi,
+
            Cond::Vs => Cond::Vc,
            Cond::Vc => Cond::Vs,
+
            Cond::Hi => Cond::Ls,
            Cond::Ls => Cond::Hi,
+
            Cond::Ge => Cond::Lt,
            Cond::Lt => Cond::Ge,
+
            Cond::Gt => Cond::Le,
            Cond::Le => Cond::Gt,
+
            Cond::Al => Cond::Nv,
            Cond::Nv => Cond::Al,
        }
@@ -243,24 +250,7 @@ impl Cond {

    /// Return the machine encoding of this condition.
    pub fn bits(self) -> u32 {
-        match self {
-            Cond::Eq => 0,
-            Cond::Ne => 1,
-            Cond::Hs => 2,
-            Cond::Lo => 3,
-            Cond::Mi => 4,
-            Cond::Pl => 5,
-            Cond::Vs => 6,
-            Cond::Vc => 7,
-            Cond::Hi => 8,
-            Cond::Ls => 9,
-            Cond::Ge => 10,
-            Cond::Lt => 11,
-            Cond::Gt => 12,
-            Cond::Le => 13,
-            Cond::Al => 14,
-            Cond::Nv => 15,
-        }
+        self as u32
    }
 }

@@ -305,7 +295,7 @@ impl BranchTarget {
    pub fn lower(&mut self, targets: &[CodeOffset], my_offset: CodeOffset) {
        match self {
            &mut BranchTarget::Block(bix) => {
-                let bix = bix as usize;
+                let bix = usize::try_from(bix).unwrap();
                assert!(bix < targets.len());
                let block_offset_in_func = targets[bix];
                let branch_offset = (block_offset_in_func as isize) - (my_offset as isize);
@@ -343,7 +333,7 @@ impl BranchTarget {
        }
    }

-    /// Get the offset as a 16-bit offset, or `None` if overflow.
+    /// Get the offset as a 19-bit offset, or `None` if overflow.
    pub fn as_off19(&self) -> Option<u32> {
        let off = self.as_offset_words();
        if (off < (1 << 18)) && (off >= -(1 << 18)) {
@@ -357,7 +347,7 @@ impl BranchTarget {
    pub fn map(&mut self, block_index_map: &[BlockIndex]) {
        match self {
            &mut BranchTarget::Block(ref mut bix) => {
-                let n = block_index_map[*bix as usize];
+                let n = block_index_map[usize::try_from(*bix).unwrap()];
                *bix = n;
            }
            &mut BranchTarget::ResolvedOffset(_) => {}
@@ -392,7 +382,7 @@ fn shift_for_type(ty: Type) -> usize {
        4 => 2,
        8 => 3,
        16 => 4,
-        _ => panic!("unknown type"),
+        _ => panic!("unknown type: {}", ty),
    }
 }

@@ -427,15 +417,15 @@ impl ShowWithRRU for MemArg {
            }
            &MemArg::RegScaledExtended(r1, r2, ty, op) => {
                let shift = shift_for_type(ty);
-                let is32 = match op {
-                    ExtendOp::SXTW | ExtendOp::UXTW => true,
-                    _ => false,
+                let size = match op {
+                    ExtendOp::SXTW | ExtendOp::UXTW => InstSize::Size32,
+                    _ => InstSize::Size64,
                };
                let op = op.show_rru(mb_rru);
                format!(
                    "[{}, {}, {} #{}]",
                    r1.show_rru(mb_rru),
-                    show_ireg_sized(r2, mb_rru, is32),
+                    show_ireg_sized(r2, mb_rru, size),
                    op,
                    shift
                )
@@ -499,3 +489,40 @@ impl ShowWithRRU for BranchTarget {
        }
    }
 }
+
+/// Type used to communicate the operand size of a machine instruction, as AArch64 has 32- and
+/// 64-bit variants of many instructions (and integer registers).
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum InstSize {
+    Size32,
+    Size64,
+}
+
+impl InstSize {
+    /// 32-bit case?
+    pub fn is32(self) -> bool {
+        self == InstSize::Size32
+    }
+    /// 64-bit case?
+    pub fn is64(self) -> bool {
+        self == InstSize::Size64
+    }
+    /// Convert from an `is32` boolean flag to an `InstSize`.
+    pub fn from_is32(is32: bool) -> InstSize {
+        if is32 {
+            InstSize::Size32
+        } else {
+            InstSize::Size64
+        }
+    }
+    /// Convert from a needed width to the smallest size that fits.
+    pub fn from_bits<I: Into<usize>>(bits: I) -> InstSize {
+        let bits: usize = bits.into();
+        assert!(bits <= 64);
+        if bits <= 32 {
+            InstSize::Size32
+        } else {
+            InstSize::Size64
+        }
+    }
+}
--- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs
@@ -1,22 +1,14 @@
-//! ARM64 ISA: binary code emission.
+//! AArch64 ISA: binary code emission.

-#![allow(dead_code)]
-#![allow(non_snake_case)]
-
-use crate::binemit::{CodeOffset, CodeSink, Reloc};
+use crate::binemit::{CodeOffset, Reloc};
 use crate::ir::constant::ConstantData;
 use crate::ir::types::*;
-use crate::ir::{Opcode, TrapCode, Type};
-use crate::isa::arm64::inst::*;
-use crate::machinst::*;
-use cranelift_entity::EntityRef;
+use crate::ir::TrapCode;
+use crate::isa::aarch64::inst::*;

-use std::env;
+use core::convert::TryFrom;

-use regalloc::{
-    RealReg, RealRegUniverse, Reg, RegClass, RegClassInfo, SpillSlot, VirtualReg, Writable,
-    NUM_REG_CLASSES,
-};
+use regalloc::{Reg, RegClass, Writable};

 use alloc::vec::Vec;

@@ -66,16 +58,7 @@ pub fn mem_finalize(insn_off: CodeOffset, mem: &MemArg) -> (Vec<Inst>, MemArg) {

 /// Helper: get a ConstantData from a u64.
 pub fn u64_constant(bits: u64) -> ConstantData {
-    let data = [
-        (bits & 0xff) as u8,
-        ((bits >> 8) & 0xff) as u8,
-        ((bits >> 16) & 0xff) as u8,
-        ((bits >> 24) & 0xff) as u8,
-        ((bits >> 32) & 0xff) as u8,
-        ((bits >> 40) & 0xff) as u8,
-        ((bits >> 48) & 0xff) as u8,
-        ((bits >> 56) & 0xff) as u8,
-    ];
+    let data = bits.to_le_bytes();
    ConstantData::from(&data[..])
 }

@@ -84,41 +67,42 @@ pub fn u64_constant(bits: u64) -> ConstantData {

 fn machreg_to_gpr(m: Reg) -> u32 {
    assert!(m.get_class() == RegClass::I64);
-    assert!(m.is_real());
-    m.to_real_reg().get_hw_encoding() as u32
+    u32::try_from(m.to_real_reg().get_hw_encoding()).unwrap()
 }

 fn machreg_to_vec(m: Reg) -> u32 {
    assert!(m.get_class() == RegClass::V128);
-    assert!(m.is_real());
-    m.to_real_reg().get_hw_encoding() as u32
+    u32::try_from(m.to_real_reg().get_hw_encoding()).unwrap()
 }

 fn machreg_to_gpr_or_vec(m: Reg) -> u32 {
-    m.to_real_reg().get_hw_encoding() as u32
+    u32::try_from(m.to_real_reg().get_hw_encoding()).unwrap()
 }

-fn enc_arith_rrr(bits_31_21: u16, bits_15_10: u8, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32 {
-    ((bits_31_21 as u32) << 21)
-        | ((bits_15_10 as u32) << 10)
+fn enc_arith_rrr(bits_31_21: u32, bits_15_10: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32 {
+    (bits_31_21 << 21)
+        | (bits_15_10 << 10)
        | machreg_to_gpr(rd.to_reg())
        | (machreg_to_gpr(rn) << 5)
        | (machreg_to_gpr(rm) << 16)
 }

-fn enc_arith_rr_imm12(bits_31_24: u8, immshift: u8, imm12: u16, rn: Reg, rd: Writable<Reg>) -> u32 {
-    ((bits_31_24 as u32) << 24)
-        | ((immshift as u32) << 22)
-        | ((imm12 as u32) << 10)
+fn enc_arith_rr_imm12(
+    bits_31_24: u32,
+    immshift: u32,
+    imm12: u32,
+    rn: Reg,
+    rd: Writable<Reg>,
+) -> u32 {
+    (bits_31_24 << 24)
+        | (immshift << 22)
+        | (imm12 << 10)
        | (machreg_to_gpr(rn) << 5)
        | machreg_to_gpr(rd.to_reg())
 }

-fn enc_arith_rr_imml(bits_31_23: u16, imm_bits: u16, rn: Reg, rd: Writable<Reg>) -> u32 {
-    ((bits_31_23 as u32) << 23)
-        | ((imm_bits as u32) << 10)
-        | (machreg_to_gpr(rn) << 5)
-        | machreg_to_gpr(rd.to_reg())
+fn enc_arith_rr_imml(bits_31_23: u32, imm_bits: u32, rn: Reg, rd: Writable<Reg>) -> u32 {
+    (bits_31_23 << 23) | (imm_bits << 10) | (machreg_to_gpr(rn) << 5) | machreg_to_gpr(rd.to_reg())
 }

 fn enc_arith_rrrr(top11: u32, rm: Reg, bit15: u32, ra: Reg, rn: Reg, rd: Writable<Reg>) -> u32 {
@@ -159,8 +143,8 @@ fn enc_move_wide(op: MoveWideOpcode, rd: Writable<Reg>, imm: MoveWideConst) -> u
    assert!(imm.shift <= 0b11);
    MOVE_WIDE_FIXED
        | (op as u32) << 29
-        | (imm.shift as u32) << 21
-        | (imm.bits as u32) << 5
+        | u32::from(imm.shift) << 21
+        | u32::from(imm.bits) << 5
        | machreg_to_gpr(rd.to_reg())
 }

@@ -201,7 +185,7 @@ fn enc_ldst_reg(
        Some(ExtendOp::UXTW) => 0b010,
        Some(ExtendOp::SXTW) => 0b110,
        Some(ExtendOp::SXTX) => 0b111,
-        None => 0b011, /* LSL */
+        None => 0b011, // LSL
        _ => panic!("bad extend mode for ld/st MemArg"),
    };
    (op_31_22 << 22)
@@ -244,7 +228,7 @@ fn enc_br(rn: Reg) -> u32 {
 }

 fn enc_adr(off: i32, rd: Writable<Reg>) -> u32 {
-    let off = off as u32;
+    let off = u32::try_from(off).unwrap();
    let immlo = off & 3;
    let immhi = (off >> 2) & ((1 << 19) - 1);
    (0b00010000 << 24) | (immlo << 29) | (immhi << 5) | machreg_to_gpr(rd.to_reg())
@@ -258,8 +242,8 @@ fn enc_csel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond) -> u32 {
        | (cond.bits() << 12)
 }

-fn enc_fcsel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond, is32: bool) -> u32 {
-    let ty_bit = if is32 { 0 } else { 1 };
+fn enc_fcsel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond, size: InstSize) -> u32 {
+    let ty_bit = if size.is32() { 0 } else { 1 };
    0b000_11110_00_1_00000_0000_11_00000_00000
        | (machreg_to_vec(rm) << 16)
        | (machreg_to_vec(rn) << 5)
@@ -301,8 +285,8 @@ fn enc_fpurrrr(top17: u32, rd: Writable<Reg>, rn: Reg, rm: Reg, ra: Reg) -> u32
        | machreg_to_vec(rd.to_reg())
 }

-fn enc_fcmp(is32: bool, rn: Reg, rm: Reg) -> u32 {
-    let bits = if is32 {
+fn enc_fcmp(size: InstSize, rn: Reg, rm: Reg) -> u32 {
+    let bits = if size.is32() {
        0b000_11110_00_1_00000_00_1000_00000_00000
    } else {
        0b000_11110_01_1_00000_00_1000_00000_00000
@@ -359,7 +343,7 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
                    | ALUOp::SMulH
                    | ALUOp::UMulH => {
                        //// RRRR ops.
-                        panic!("Bad ALUOp in RRR form!");
+                        panic!("Bad ALUOp {:?} in RRR form!", alu_op);
                    }
                };
                let bit15_10 = match alu_op {
@@ -450,14 +434,14 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
            } => {
                let amt = immshift.value();
                let (top10, immr, imms) = match alu_op {
-                    ALUOp::RotR32 => (0b0001001110, machreg_to_gpr(rn), amt as u32),
-                    ALUOp::RotR64 => (0b1001001111, machreg_to_gpr(rn), amt as u32),
-                    ALUOp::Lsr32 => (0b0101001100, amt as u32, 0b011111),
-                    ALUOp::Lsr64 => (0b1101001101, amt as u32, 0b111111),
-                    ALUOp::Asr32 => (0b0001001100, amt as u32, 0b011111),
-                    ALUOp::Asr64 => (0b1001001101, amt as u32, 0b111111),
-                    ALUOp::Lsl32 => (0b0101001100, (32 - amt) as u32, (31 - amt) as u32),
-                    ALUOp::Lsl64 => (0b1101001101, (64 - amt) as u32, (63 - amt) as u32),
+                    ALUOp::RotR32 => (0b0001001110, machreg_to_gpr(rn), u32::from(amt)),
+                    ALUOp::RotR64 => (0b1001001111, machreg_to_gpr(rn), u32::from(amt)),
+                    ALUOp::Lsr32 => (0b0101001100, u32::from(amt), 0b011111),
+                    ALUOp::Lsr64 => (0b1101001101, u32::from(amt), 0b111111),
+                    ALUOp::Asr32 => (0b0001001100, u32::from(amt), 0b011111),
+                    ALUOp::Asr64 => (0b1001001101, u32::from(amt), 0b111111),
+                    ALUOp::Lsl32 => (0b0101001100, u32::from(32 - amt), u32::from(31 - amt)),
+                    ALUOp::Lsl64 => (0b1101001101, u32::from(64 - amt), u32::from(63 - amt)),
                    _ => unimplemented!("{:?}", alu_op),
                };
                sink.put4(
@@ -476,7 +460,7 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
                rm,
                ref shiftop,
            } => {
-                let top11: u16 = match alu_op {
+                let top11: u32 = match alu_op {
                    ALUOp::Add32 => 0b000_01011000,
                    ALUOp::Add64 => 0b100_01011000,
                    ALUOp::AddS32 => 0b001_01011000,
@@ -499,8 +483,8 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
                    ALUOp::AndNot64 => 0b100_01010001,
                    _ => unimplemented!("{:?}", alu_op),
                };
-                let top11 = top11 | ((shiftop.op().bits() as u16) << 1);
-                let bits_15_10 = shiftop.amt().value();
+                let top11 = top11 | (u32::from(shiftop.op().bits()) << 1);
+                let bits_15_10 = u32::from(shiftop.amt().value());
                sink.put4(enc_arith_rrr(top11, bits_15_10, rd, rn, rm));
            }

@@ -511,7 +495,7 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
                rm,
                extendop,
            } => {
-                let top11 = match alu_op {
+                let top11: u32 = match alu_op {
                    ALUOp::Add32 => 0b00001011001,
                    ALUOp::Add64 => 0b10001011001,
                    ALUOp::Sub32 => 0b01001011001,
@@ -522,12 +506,12 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
                    ALUOp::SubS64 => 0b11101011001,
                    _ => unimplemented!("{:?}", alu_op),
                };
-                let bits_15_10 = extendop.bits() << 3;
+                let bits_15_10 = u32::from(extendop.bits()) << 3;
                sink.put4(enc_arith_rrr(top11, bits_15_10, rd, rn, rm));
            }

            &Inst::BitRR { op, rd, rn, .. } => {
-                let size = if op.is_32_bit() { 0b0 } else { 0b1 };
+                let size = if op.inst_size().is32() { 0b0 } else { 0b1 };
                let (op1, op2) = match op {
                    BitOp::RBit32 | BitOp::RBit64 => (0b00000, 0b000000),
                    BitOp::Clz32 | BitOp::Clz64 => (0b00000, 0b000100),
@@ -655,6 +639,7 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
                    }
                    &MemArg::Label(ref label) => {
                        let offset = match label {
+                            // cast i32 to u32 (two's-complement)
                            &MemLabel::PCRel(off) => off as u32,
                        } / 4;
                        assert!(offset < (1 << 19));
@@ -825,10 +810,16 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
            &Inst::Mov { rd, rm } => {
                assert!(rd.to_reg().get_class() == rm.get_class());
                assert!(rm.get_class() == RegClass::I64);
+                // MOV to SP is interpreted as MOV to XZR instead. And our codegen
+                // should never MOV to XZR.
+                assert!(machreg_to_gpr(rd.to_reg()) != 31);
                // Encoded as ORR rd, rm, zero.
                sink.put4(enc_arith_rrr(0b10101010_000, 0b000_000, rd, zero_reg(), rm));
            }
            &Inst::Mov32 { rd, rm } => {
+                // MOV to SP is interpreted as MOV to XZR instead. And our codegen
+                // should never MOV to XZR.
+                assert!(machreg_to_gpr(rd.to_reg()) != 31);
                // Encoded as ORR rd, rm, zero.
                sink.put4(enc_arith_rrr(0b00101010_000, 0b000_000, rd, zero_reg(), rm));
            }
@@ -888,10 +879,10 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
                sink.put4(enc_fpurrrr(top17, rd, rn, rm, ra));
            }
            &Inst::FpuCmp32 { rn, rm } => {
-                sink.put4(enc_fcmp(/* is32 = */ true, rn, rm));
+                sink.put4(enc_fcmp(InstSize::Size32, rn, rm));
            }
            &Inst::FpuCmp64 { rn, rm } => {
-                sink.put4(enc_fcmp(/* is32 = */ false, rn, rm));
+                sink.put4(enc_fcmp(InstSize::Size64, rn, rm));
            }
            &Inst::FpuToInt { op, rd, rn } => {
                let top16 = match op {
@@ -962,10 +953,10 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
                sink.put8(const_data.to_bits());
            }
            &Inst::FpuCSel32 { rd, rn, rm, cond } => {
-                sink.put4(enc_fcsel(rd, rn, rm, cond, /* is32 = */ true));
+                sink.put4(enc_fcsel(rd, rn, rm, cond, InstSize::Size32));
            }
            &Inst::FpuCSel64 { rd, rn, rm, cond } => {
-                sink.put4(enc_fcsel(rd, rn, rm, cond, /* is32 = */ false));
+                sink.put4(enc_fcsel(rd, rn, rm, cond, InstSize::Size64));
            }
            &Inst::FpuRound { op, rd, rn } => {
                let top22 = match op {
@@ -1093,10 +1084,10 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
                // do early (fake) emission for size computation.
                sink.put4(enc_jump26(0b000101, dest.as_off26().unwrap()));
            }
-            &Inst::Ret {} => {
+            &Inst::Ret => {
                sink.put4(0xd65f03c0);
            }
-            &Inst::EpiloguePlaceholder {} => {
+            &Inst::EpiloguePlaceholder => {
                // Noop; this is just a placeholder for epilogues.
            }
            &Inst::Call {
@@ -1168,7 +1159,7 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
            &Inst::IndirectBr { rn, .. } => {
                sink.put4(enc_br(rn));
            }
-            &Inst::Nop => {}
+            &Inst::Nop0 => {}
            &Inst::Nop4 => {
                sink.put4(0xd503201f);
            }
@@ -1204,7 +1195,7 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
                // the middle; we depend on hardcoded PC-rel addressing below.
                //
                // N.B.: if PC-rel addressing on ADR below is changed, also update
-                // `Inst::with_block_offsets()` in arm64/inst/mod.rs.
+                // `Inst::with_block_offsets()` in aarch64/inst/mod.rs.

                // Save index in a tmp (the live range of ridx only goes to start of this
                // sequence; rtmp1 or rtmp2 may overwrite it).
@@ -1219,7 +1210,7 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
                // Load value out of jump table
                let inst = Inst::SLoad32 {
                    rd: rtmp2,
-                    mem: MemArg::reg_reg_scaled_extended(
+                    mem: MemArg::reg_plus_reg_scaled_extended(
                        rtmp1.to_reg(),
                        rtmp2.to_reg(),
                        I32,
@@ -1246,7 +1237,9 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
                // Emit jump table (table of 32-bit offsets).
                for target in targets {
                    let off = target.as_offset_words() * 4;
-                    let off = off as i32 as u32;
+                    let off = i32::try_from(off).unwrap();
+                    // cast i32 to u32 (two's-complement)
+                    let off = off as u32;
                    sink.put4(off);
                }
            }
@@ -1292,7 +1285,7 @@ mod test {
    use crate::isa::test_utils;

    #[test]
-    fn test_arm64_binemit() {
+    fn test_aarch64_binemit() {
        let mut insns = Vec::<(Inst, &str, &str)>::new();

        // N.B.: the architecture is little-endian, so when transcribing the 32-bit
@@ -1310,10 +1303,10 @@ mod test {
        //
        // Then:
        //
-        //      $ echo "mov x1, x2" | arm64inst.sh
-        insns.push((Inst::Ret {}, "C0035FD6", "ret"));
-        insns.push((Inst::Nop {}, "", "nop-zero-len"));
-        insns.push((Inst::Nop4 {}, "1F2003D5", "nop"));
+        //      $ echo "mov x1, x2" | aarch64inst.sh
+        insns.push((Inst::Ret, "C0035FD6", "ret"));
+        insns.push((Inst::Nop0, "", "nop-zero-len"));
+        insns.push((Inst::Nop4, "1F2003D5", "nop"));
        insns.push((
            Inst::AluRRR {
                alu_op: ALUOp::Add32,
@@ -4052,7 +4045,7 @@ mod test {
        let rru = create_reg_universe();
        for (insn, expected_encoding, expected_printing) in insns {
            println!(
-                "ARM64: {:?}, {}, {}",
+                "AArch64: {:?}, {}, {}",
                insn, expected_encoding, expected_printing
            );

--- a/cranelift/codegen/src/isa/aarch64/inst/imms.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/imms.rs
@@ -1,8 +1,7 @@
-//! ARM64 ISA definitions: immediate constants.
-
-#![allow(dead_code)]
-#![allow(non_snake_case)]
+//! AArch64 ISA definitions: immediate constants.

+// Some variants are never constructed, but we still want them as options in the future.
+#[allow(dead_code)]
 use crate::ir::types::*;
 use crate::ir::Type;
 use crate::machinst::*;
@@ -28,12 +27,12 @@ impl SImm7Scaled {
        assert!(scale_ty == I64 || scale_ty == I32);
        let scale = scale_ty.bytes();
        assert!(scale.is_power_of_two());
-        let scale = scale as i64;
+        let scale = i64::from(scale);
        let upper_limit = 63 * scale;
        let lower_limit = -(64 * scale);
        if value >= lower_limit && value <= upper_limit && (value & (scale - 1)) == 0 {
            Some(SImm7Scaled {
-                value: value as i16,
+                value: i16::try_from(value).unwrap(),
                scale_ty,
            })
        } else {
@@ -48,7 +47,12 @@ impl SImm7Scaled {

    /// Bits for encoding.
    pub fn bits(&self) -> u32 {
-        ((self.value / self.scale_ty.bytes() as i16) as u32) & 0x7f
+        let ty_bytes: i16 = self.scale_ty.bytes() as i16;
+        let scaled: i16 = self.value / ty_bytes;
+        assert!(scaled <= 63 && scaled >= -64);
+        let scaled: i8 = scaled as i8;
+        let encoded: u32 = scaled as u32;
+        encoded & 0x7f
    }
 }

@@ -125,7 +129,7 @@ impl UImm12Scaled {
 #[derive(Clone, Debug)]
 pub struct Imm12 {
    /// The immediate bits.
-    pub bits: usize,
+    pub bits: u16,
    /// Whether the immediate bits are shifted left by 12 or not.
    pub shift12: bool,
 }
@@ -140,12 +144,12 @@ impl Imm12 {
            })
        } else if val < 0xfff {
            Some(Imm12 {
-                bits: val as usize,
+                bits: val as u16,
                shift12: false,
            })
        } else if val < 0xfff_000 && (val & 0xfff == 0) {
            Some(Imm12 {
-                bits: (val as usize) >> 12,
+                bits: (val >> 12) as u16,
                shift12: true,
            })
        } else {
@@ -154,7 +158,7 @@ impl Imm12 {
    }

    /// Bits for 2-bit "shift" field in e.g. AddI.
-    pub fn shift_bits(&self) -> u8 {
+    pub fn shift_bits(&self) -> u32 {
        if self.shift12 {
            0b01
        } else {
@@ -163,8 +167,8 @@ impl Imm12 {
    }

    /// Bits for 12-bit "imm" field in e.g. AddI.
-    pub fn imm_bits(&self) -> u16 {
-        self.bits as u16
+    pub fn imm_bits(&self) -> u32 {
+        self.bits as u32
    }
 }

@@ -175,11 +179,11 @@ pub struct ImmLogic {
    /// The actual value.
    value: u64,
    /// `N` flag.
-    pub N: bool,
+    pub n: bool,
    /// `S` field: element size and element bits.
-    pub R: u8,
+    pub r: u8,
    /// `R` field: rotate amount.
-    pub S: u8,
+    pub s: u8,
 }

 impl ImmLogic {
@@ -367,24 +371,19 @@ impl ImmLogic {
        debug_assert!(u8::try_from(s).is_ok());
        Some(ImmLogic {
            value: original_value,
-            N: out_n != 0,
-            R: r as u8,
-            S: s as u8,
+            n: out_n != 0,
+            r: r as u8,
+            s: s as u8,
        })
    }

    pub fn from_raw(value: u64, n: bool, r: u8, s: u8) -> ImmLogic {
-        ImmLogic {
-            N: n,
-            R: r,
-            S: s,
-            value,
-        }
+        ImmLogic { n, r, s, value }
    }

    /// Returns bits ready for encoding: (N:1, R:6, S:6)
-    pub fn enc_bits(&self) -> u16 {
-        ((self.N as u16) << 12) | ((self.R as u16) << 6) | (self.S as u16)
+    pub fn enc_bits(&self) -> u32 {
+        ((self.n as u32) << 12) | ((self.r as u32) << 6) | (self.s as u32)
    }

    /// Returns the value that this immediate represents.
@@ -427,7 +426,7 @@ impl ImmShift {
 pub struct MoveWideConst {
    /// The value.
    pub bits: u16,
-    /// shifted 16*shift bits to the left.
+    /// Result is `bits` shifted 16*shift bits to the left.
    pub shift: u8,
 }

@@ -487,7 +486,7 @@ impl MoveWideConst {
 impl ShowWithRRU for Imm12 {
    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
        let shift = if self.shift12 { 12 } else { 0 };
-        let value = self.bits << shift;
+        let value = u32::from(self.bits) << shift;
        format!("#{}", value)
    }
 }
@@ -544,9 +543,9 @@ mod test {
        assert_eq!(
            Some(ImmLogic {
                value: 1,
-                N: true,
-                R: 0,
-                S: 0
+                n: true,
+                r: 0,
+                s: 0
            }),
            ImmLogic::maybe_from_u64(1, I64)
        );
@@ -554,9 +553,9 @@ mod test {
        assert_eq!(
            Some(ImmLogic {
                value: 2,
-                N: true,
-                R: 63,
-                S: 0
+                n: true,
+                r: 63,
+                s: 0
            }),
            ImmLogic::maybe_from_u64(2, I64)
        );
@@ -568,9 +567,9 @@ mod test {
        assert_eq!(
            Some(ImmLogic {
                value: 248,
-                N: true,
-                R: 61,
-                S: 4
+                n: true,
+                r: 61,
+                s: 4
            }),
            ImmLogic::maybe_from_u64(248, I64)
        );
@@ -580,9 +579,9 @@ mod test {
        assert_eq!(
            Some(ImmLogic {
                value: 1920,
-                N: true,
-                R: 57,
-                S: 3
+                n: true,
+                r: 57,
+                s: 3
            }),
            ImmLogic::maybe_from_u64(1920, I64)
        );
@@ -590,9 +589,9 @@ mod test {
        assert_eq!(
            Some(ImmLogic {
                value: 0x7ffe,
-                N: true,
-                R: 63,
-                S: 13
+                n: true,
+                r: 63,
+                s: 13
            }),
            ImmLogic::maybe_from_u64(0x7ffe, I64)
        );
@@ -600,9 +599,9 @@ mod test {
        assert_eq!(
            Some(ImmLogic {
                value: 0x30000,
-                N: true,
-                R: 48,
-                S: 1
+                n: true,
+                r: 48,
+                s: 1
            }),
            ImmLogic::maybe_from_u64(0x30000, I64)
        );
@@ -610,9 +609,9 @@ mod test {
        assert_eq!(
            Some(ImmLogic {
                value: 0x100000,
-                N: true,
-                R: 44,
-                S: 0
+                n: true,
+                r: 44,
+                s: 0
            }),
            ImmLogic::maybe_from_u64(0x100000, I64)
        );
@@ -620,9 +619,9 @@ mod test {
        assert_eq!(
            Some(ImmLogic {
                value: u64::max_value() - 1,
-                N: true,
-                R: 63,
-                S: 62
+                n: true,
+                r: 63,
+                s: 62
            }),
            ImmLogic::maybe_from_u64(u64::max_value() - 1, I64)
        );
@@ -630,9 +629,9 @@ mod test {
        assert_eq!(
            Some(ImmLogic {
                value: 0xaaaaaaaaaaaaaaaa,
-                N: false,
-                R: 1,
-                S: 60
+                n: false,
+                r: 1,
+                s: 60
            }),
            ImmLogic::maybe_from_u64(0xaaaaaaaaaaaaaaaa, I64)
        );
@@ -640,9 +639,9 @@ mod test {
        assert_eq!(
            Some(ImmLogic {
                value: 0x8181818181818181,
-                N: false,
-                R: 1,
-                S: 49
+                n: false,
+                r: 1,
+                s: 49
            }),
            ImmLogic::maybe_from_u64(0x8181818181818181, I64)
        );
@@ -650,9 +649,9 @@ mod test {
        assert_eq!(
            Some(ImmLogic {
                value: 0xffc3ffc3ffc3ffc3,
-                N: false,
-                R: 10,
-                S: 43
+                n: false,
+                r: 10,
+                s: 43
            }),
            ImmLogic::maybe_from_u64(0xffc3ffc3ffc3ffc3, I64)
        );
@@ -660,9 +659,9 @@ mod test {
        assert_eq!(
            Some(ImmLogic {
                value: 0x100000001,
-                N: false,
-                R: 0,
-                S: 0
+                n: false,
+                r: 0,
+                s: 0
            }),
            ImmLogic::maybe_from_u64(0x100000001, I64)
        );
@@ -670,9 +669,9 @@ mod test {
        assert_eq!(
            Some(ImmLogic {
                value: 0x1111111111111111,
-                N: false,
-                R: 0,
-                S: 56
+                n: false,
+                r: 0,
+                s: 56
            }),
            ImmLogic::maybe_from_u64(0x1111111111111111, I64)
        );
--- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs
--- a/cranelift/codegen/src/isa/aarch64/inst/regs.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/regs.rs
@@ -1,13 +1,9 @@
-//! ARM64 ISA definitions: registers.
-
-#![allow(dead_code)]
+//! AArch64 ISA definitions: registers.

+use crate::isa::aarch64::inst::InstSize;
 use crate::machinst::*;

-use regalloc::{
-    RealReg, RealRegUniverse, Reg, RegClass, RegClassInfo, SpillSlot, VirtualReg, Writable,
-    NUM_REG_CLASSES,
-};
+use regalloc::{RealRegUniverse, Reg, RegClass, RegClassInfo, Writable, NUM_REG_CLASSES};

 use std::string::{String, ToString};

@@ -83,7 +79,7 @@ pub fn writable_zero_reg() -> Writable<Reg> {
 /// Get a reference to the stack-pointer register.
 pub fn stack_reg() -> Reg {
    // XSP (stack) and XZR (zero) are logically different registers which have
-    // the same hardware encoding, and whose meaning, in real arm64
+    // the same hardware encoding, and whose meaning, in real aarch64
    // instructions, is context-dependent.  For convenience of
    // universe-construction and for correct printing, we make them be two
    // different real registers.
@@ -134,7 +130,7 @@ pub fn writable_spilltmp_reg() -> Writable<Reg> {
    Writable::from_reg(spilltmp_reg())
 }

-/// Create the register universe for ARM64.
+/// Create the register universe for AArch64.
 pub fn create_reg_universe() -> RealRegUniverse {
    let mut regs = vec![];
    let mut allocable_by_class = [None; NUM_REG_CLASSES];
@@ -217,37 +213,38 @@ pub fn create_reg_universe() -> RealRegUniverse {
    }
 }

-/// If |ireg| denotes an I64-classed reg, make a best-effort attempt to show
+/// If `ireg` denotes an I64-classed reg, make a best-effort attempt to show
 /// its name at the 32-bit size.
-pub fn show_ireg_sized(reg: Reg, mb_rru: Option<&RealRegUniverse>, is32: bool) -> String {
+pub fn show_ireg_sized(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: InstSize) -> String {
    let mut s = reg.show_rru(mb_rru);
-    if reg.get_class() != RegClass::I64 || !is32 {
+    if reg.get_class() != RegClass::I64 || !size.is32() {
        // We can't do any better.
        return s;
    }

    if reg.is_real() {
        // Change (eg) "x42" into "w42" as appropriate
-        if reg.get_class() == RegClass::I64 && is32 && s.starts_with("x") {
+        if reg.get_class() == RegClass::I64 && size.is32() && s.starts_with("x") {
            s = "w".to_string() + &s[1..];
        }
    } else {
        // Add a "w" suffix to RegClass::I64 vregs used in a 32-bit role
-        if reg.get_class() == RegClass::I64 && is32 {
-            s = s + &"w";
+        if reg.get_class() == RegClass::I64 && size.is32() {
+            s.push('w');
        }
    }
    s
 }

 /// Show a vector register when its use as a 32-bit or 64-bit float is known.
-pub fn show_freg_sized(reg: Reg, mb_rru: Option<&RealRegUniverse>, is32: bool) -> String {
-    let s = reg.show_rru(mb_rru);
+pub fn show_freg_sized(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: InstSize) -> String {
+    let mut s = reg.show_rru(mb_rru);
    if reg.get_class() != RegClass::V128 {
        return s;
    }
-    let prefix = if is32 { "s" } else { "d" };
-    prefix.to_string() + &s[1..]
+    let prefix = if size.is32() { "s" } else { "d" };
+    s.replace_range(0..1, prefix);
+    s
 }

 /// Show a vector register used in a scalar context.
@@ -261,12 +258,12 @@ pub fn show_vreg_scalar(reg: Reg, mb_rru: Option<&RealRegUniverse>) -> String {
    if reg.is_real() {
        // Change (eg) "v0" into "d0".
        if reg.get_class() == RegClass::V128 && s.starts_with("v") {
-            s = "d".to_string() + &s[1..];
+            s.replace_range(0..1, "d");
        }
    } else {
        // Add a "d" suffix to RegClass::V128 vregs.
        if reg.get_class() == RegClass::V128 {
-            s = s + &"d";
+            s.push('d');
        }
    }
    s
--- a/cranelift/codegen/src/isa/aarch64/lower.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower.rs
@@ -1,4 +1,4 @@
-//! Lowering rules for ARM64.
+//! Lowering rules for AArch64.
 //!
 //! TODO: opportunities for better code generation:
 //!
@@ -6,45 +6,24 @@
 //!   and incorporate sign/zero extension on indicies. Recognize pre/post-index
 //!   opportunities.
 //!
-//! - Logical-immediate args.
-//!
-//! - Floating-point immediates.
-
-#![allow(dead_code)]
+//! - Floating-point immediates (FIMM instruction).

 use crate::ir::condcodes::{FloatCC, IntCC};
 use crate::ir::types::*;
 use crate::ir::Inst as IRInst;
-use crate::ir::{Block, InstructionData, Opcode, TrapCode, Type};
+use crate::ir::{InstructionData, Opcode, TrapCode, Type};
 use crate::machinst::lower::*;
 use crate::machinst::*;

-use crate::isa::arm64::abi::*;
-use crate::isa::arm64::inst::*;
-use crate::isa::arm64::Arm64Backend;
+use crate::isa::aarch64::abi::*;
+use crate::isa::aarch64::inst::*;
+use crate::isa::aarch64::AArch64Backend;

 use regalloc::{Reg, RegClass, Writable};

 use alloc::vec::Vec;
 use smallvec::SmallVec;

-//============================================================================
-// Helpers: opcode conversions
-
-fn op_to_aluop(op: Opcode, ty: Type) -> Option<ALUOp> {
-    match (op, ty) {
-        (Opcode::Iadd, I32) => Some(ALUOp::Add32),
-        (Opcode::Iadd, I64) => Some(ALUOp::Add64),
-        (Opcode::Isub, I32) => Some(ALUOp::Sub32),
-        (Opcode::Isub, I64) => Some(ALUOp::Sub64),
-        _ => None,
-    }
-}
-
-fn is_alu_op(op: Opcode, ctrl_typevar: Type) -> bool {
-    op_to_aluop(op, ctrl_typevar).is_some()
-}
-
 //============================================================================
 // Result enum types.
 //
@@ -163,7 +142,7 @@ impl InsnInputSource {
    }
 }

-fn get_input<C: LowerCtx<Inst>>(ctx: &mut C, output: InsnOutput, num: usize) -> InsnInput {
+fn get_input<C: LowerCtx<I = Inst>>(ctx: &mut C, output: InsnOutput, num: usize) -> InsnInput {
    assert!(num <= ctx.num_inputs(output.insn));
    InsnInput {
        insn: output.insn,
@@ -173,7 +152,7 @@ fn get_input<C: LowerCtx<Inst>>(ctx: &mut C, output: InsnOutput, num: usize) ->

 /// Convert an instruction input to a producing instruction's output if possible (in same BB), or a
 /// register otherwise.
-fn input_source<C: LowerCtx<Inst>>(ctx: &mut C, input: InsnInput) -> InsnInputSource {
+fn input_source<C: LowerCtx<I = Inst>>(ctx: &mut C, input: InsnInput) -> InsnInputSource {
    if let Some((input_inst, result_num)) = ctx.input_inst(input.insn, input.input) {
        let out = InsnOutput {
            insn: input_inst,
@@ -190,7 +169,7 @@ fn input_source<C: LowerCtx<Inst>>(ctx: &mut C, input: InsnInput) -> InsnInputSo
 // Lowering: convert instruction outputs to result types.

 /// Lower an instruction output to a 64-bit constant, if possible.
-fn output_to_const<C: LowerCtx<Inst>>(ctx: &mut C, out: InsnOutput) -> Option<u64> {
+fn output_to_const<C: LowerCtx<I = Inst>>(ctx: &mut C, out: InsnOutput) -> Option<u64> {
    if out.output > 0 {
        None
    } else {
@@ -204,7 +183,7 @@ fn output_to_const<C: LowerCtx<Inst>>(ctx: &mut C, out: InsnOutput) -> Option<u6
                    let imm: i64 = imm.into();
                    Some(imm as u64)
                }
-                &InstructionData::UnaryIeee32 { opcode: _, imm } => Some(imm.bits() as u64),
+                &InstructionData::UnaryIeee32 { opcode: _, imm } => Some(u64::from(imm.bits())),
                &InstructionData::UnaryIeee64 { opcode: _, imm } => Some(imm.bits()),
                _ => None,
            }
@@ -212,16 +191,19 @@ fn output_to_const<C: LowerCtx<Inst>>(ctx: &mut C, out: InsnOutput) -> Option<u6
    }
 }

-fn output_to_const_f32<C: LowerCtx<Inst>>(ctx: &mut C, out: InsnOutput) -> Option<f32> {
+fn output_to_const_f32<C: LowerCtx<I = Inst>>(ctx: &mut C, out: InsnOutput) -> Option<f32> {
    output_to_const(ctx, out).map(|value| f32::from_bits(value as u32))
 }

-fn output_to_const_f64<C: LowerCtx<Inst>>(ctx: &mut C, out: InsnOutput) -> Option<f64> {
+fn output_to_const_f64<C: LowerCtx<I = Inst>>(ctx: &mut C, out: InsnOutput) -> Option<f64> {
    output_to_const(ctx, out).map(|value| f64::from_bits(value))
 }

 /// Lower an instruction output to a constant register-shift amount, if possible.
-fn output_to_shiftimm<C: LowerCtx<Inst>>(ctx: &mut C, out: InsnOutput) -> Option<ShiftOpShiftImm> {
+fn output_to_shiftimm<C: LowerCtx<I = Inst>>(
+    ctx: &mut C,
+    out: InsnOutput,
+) -> Option<ShiftOpShiftImm> {
    output_to_const(ctx, out).and_then(ShiftOpShiftImm::maybe_from_shift)
 }

@@ -251,7 +233,7 @@ impl NarrowValueMode {
 }

 /// Lower an instruction output to a reg.
-fn output_to_reg<C: LowerCtx<Inst>>(ctx: &mut C, out: InsnOutput) -> Writable<Reg> {
+fn output_to_reg<C: LowerCtx<I = Inst>>(ctx: &mut C, out: InsnOutput) -> Writable<Reg> {
    ctx.output(out.insn, out.output)
 }

@@ -260,7 +242,7 @@ fn output_to_reg<C: LowerCtx<Inst>>(ctx: &mut C, out: InsnOutput) -> Writable<Re
 /// The given register will be extended appropriately, according to
 /// `narrow_mode` and the input's type. If extended, the value is
 /// always extended to 64 bits, for simplicity.
-fn input_to_reg<C: LowerCtx<Inst>>(
+fn input_to_reg<C: LowerCtx<I = Inst>>(
    ctx: &mut C,
    input: InsnInput,
    narrow_mode: NarrowValueMode,
@@ -292,9 +274,7 @@ fn input_to_reg<C: LowerCtx<Inst>>(
            });
            tmp.to_reg()
        }
-        (NarrowValueMode::ZeroExtend32, n) | (NarrowValueMode::SignExtend32, n) if n == 32 => {
-            in_reg
-        }
+        (NarrowValueMode::ZeroExtend32, 32) | (NarrowValueMode::SignExtend32, 32) => in_reg,

        (NarrowValueMode::ZeroExtend64, n) if n < 64 => {
            let tmp = ctx.tmp(RegClass::I64, I32);
@@ -318,7 +298,7 @@ fn input_to_reg<C: LowerCtx<Inst>>(
            });
            tmp.to_reg()
        }
-        (_, n) if n == 64 => in_reg,
+        (_, 64) => in_reg,

        _ => panic!(
            "Unsupported input width: input ty {} bits {} mode {:?}",
@@ -340,7 +320,7 @@ fn input_to_reg<C: LowerCtx<Inst>>(
 /// divide or a right-shift or a compare-to-zero), `narrow_mode` should be
 /// set to `ZeroExtend` or `SignExtend` as appropriate, and the resulting
 /// register will be provided the extended value.
-fn input_to_rs<C: LowerCtx<Inst>>(
+fn input_to_rs<C: LowerCtx<I = Inst>>(
    ctx: &mut C,
    input: InsnInput,
    narrow_mode: NarrowValueMode,
@@ -374,7 +354,7 @@ fn input_to_rs<C: LowerCtx<Inst>>(
 /// vreg into which the source instruction will generate its value.
 ///
 /// See note on `input_to_rs` for a description of `narrow_mode`.
-fn input_to_rse<C: LowerCtx<Inst>>(
+fn input_to_rse<C: LowerCtx<I = Inst>>(
    ctx: &mut C,
    input: InsnInput,
    narrow_mode: NarrowValueMode,
@@ -448,7 +428,7 @@ fn input_to_rse<C: LowerCtx<Inst>>(
    ResultRSE::from_rs(input_to_rs(ctx, input, narrow_mode))
 }

-fn input_to_rse_imm12<C: LowerCtx<Inst>>(
+fn input_to_rse_imm12<C: LowerCtx<I = Inst>>(
    ctx: &mut C,
    input: InsnInput,
    narrow_mode: NarrowValueMode,
@@ -465,7 +445,7 @@ fn input_to_rse_imm12<C: LowerCtx<Inst>>(
    ResultRSEImm12::from_rse(input_to_rse(ctx, input, narrow_mode))
 }

-fn input_to_rs_immlogic<C: LowerCtx<Inst>>(
+fn input_to_rs_immlogic<C: LowerCtx<I = Inst>>(
    ctx: &mut C,
    input: InsnInput,
    narrow_mode: NarrowValueMode,
@@ -484,7 +464,10 @@ fn input_to_rs_immlogic<C: LowerCtx<Inst>>(
    ResultRSImmLogic::from_rs(input_to_rs(ctx, input, narrow_mode))
 }

-fn input_to_reg_immshift<C: LowerCtx<Inst>>(ctx: &mut C, input: InsnInput) -> ResultRegImmShift {
+fn input_to_reg_immshift<C: LowerCtx<I = Inst>>(
+    ctx: &mut C,
+    input: InsnInput,
+) -> ResultRegImmShift {
    if let InsnInputSource::Output(out) = input_source(ctx, input) {
        if let Some(imm_value) = output_to_const(ctx, out) {
            if let Some(immshift) = ImmShift::maybe_from_u64(imm_value) {
@@ -577,7 +560,7 @@ fn alu_inst_immshift(op: ALUOp, rd: Writable<Reg>, rn: Reg, rm: ResultRegImmShif
 // than an `InsnInput`, to do more introspection.

 /// Lower the address of a load or store.
-fn lower_address<C: LowerCtx<Inst>>(
+fn lower_address<C: LowerCtx<I = Inst>>(
    ctx: &mut C,
    elem_ty: Type,
    addends: &[InsnInput],
@@ -598,7 +581,7 @@ fn lower_address<C: LowerCtx<Inst>>(
    if addends.len() == 2 && offset == 0 {
        let ra = input_to_reg(ctx, addends[0], NarrowValueMode::ZeroExtend64);
        let rb = input_to_reg(ctx, addends[1], NarrowValueMode::ZeroExtend64);
-        return MemArg::reg_reg(ra, rb);
+        return MemArg::reg_plus_reg(ra, rb);
    }

    // Otherwise, generate add instructions.
@@ -621,17 +604,17 @@ fn lower_address<C: LowerCtx<Inst>>(
    MemArg::reg(addr.to_reg())
 }

-fn lower_constant_u64<C: LowerCtx<Inst>>(ctx: &mut C, rd: Writable<Reg>, value: u64) {
+fn lower_constant_u64<C: LowerCtx<I = Inst>>(ctx: &mut C, rd: Writable<Reg>, value: u64) {
    for inst in Inst::load_constant(rd, value) {
        ctx.emit(inst);
    }
 }

-fn lower_constant_f32<C: LowerCtx<Inst>>(ctx: &mut C, rd: Writable<Reg>, value: f32) {
+fn lower_constant_f32<C: LowerCtx<I = Inst>>(ctx: &mut C, rd: Writable<Reg>, value: f32) {
    ctx.emit(Inst::load_fp_constant32(rd, value));
 }

-fn lower_constant_f64<C: LowerCtx<Inst>>(ctx: &mut C, rd: Writable<Reg>, value: f64) {
+fn lower_constant_f64<C: LowerCtx<I = Inst>>(ctx: &mut C, rd: Writable<Reg>, value: f64) {
    ctx.emit(Inst::load_fp_constant64(rd, value));
 }

@@ -653,7 +636,7 @@ fn lower_condcode(cc: IntCC) -> Cond {
 }

 fn lower_fp_condcode(cc: FloatCC) -> Cond {
-    // Refer to `codegen/shared/src/condcodes.rs` and to the `FCMP` ARM64 docs.
+    // Refer to `codegen/shared/src/condcodes.rs` and to the `FCMP` AArch64 docs.
    // The FCMP instruction sets:
    //               NZCV
    // - PCSR.NZCV = 0011 on UN (unordered),
@@ -717,7 +700,7 @@ pub fn condcode_is_signed(cc: IntCC) -> bool {
 // Top-level instruction lowering entry point, for one instruction.

 /// Actually codegen an instruction's results into registers.
-fn lower_insn_to_regs<C: LowerCtx<Inst>>(ctx: &mut C, insn: IRInst) {
+fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst) {
    let op = ctx.data(insn).opcode();
    let inputs: SmallVec<[InsnInput; 4]> = (0..ctx.num_inputs(insn))
        .map(|i| InsnInput { insn, input: i })
@@ -1032,13 +1015,13 @@ fn lower_insn_to_regs<C: LowerCtx<Inst>>(ctx: &mut C, insn: IRInst) {

        Opcode::Ishl | Opcode::Ushr | Opcode::Sshr => {
            let ty = ty.unwrap();
-            let is32 = ty_bits(ty) <= 32;
-            let narrow_mode = match (op, is32) {
+            let size = InstSize::from_bits(ty_bits(ty));
+            let narrow_mode = match (op, size) {
                (Opcode::Ishl, _) => NarrowValueMode::None,
-                (Opcode::Ushr, false) => NarrowValueMode::ZeroExtend64,
-                (Opcode::Ushr, true) => NarrowValueMode::ZeroExtend32,
-                (Opcode::Sshr, false) => NarrowValueMode::SignExtend64,
-                (Opcode::Sshr, true) => NarrowValueMode::SignExtend32,
+                (Opcode::Ushr, InstSize::Size64) => NarrowValueMode::ZeroExtend64,
+                (Opcode::Ushr, InstSize::Size32) => NarrowValueMode::ZeroExtend32,
+                (Opcode::Sshr, InstSize::Size64) => NarrowValueMode::SignExtend64,
+                (Opcode::Sshr, InstSize::Size32) => NarrowValueMode::SignExtend32,
                _ => unreachable!(),
            };
            let rd = output_to_reg(ctx, outputs[0]);
@@ -1160,7 +1143,7 @@ fn lower_insn_to_regs<C: LowerCtx<Inst>>(ctx: &mut C, insn: IRInst) {
        }

        Opcode::Rotl => {
-            // ARM64 does not have a ROL instruction, so we always synthesize
+            // AArch64 does not have a ROL instruction, so we always synthesize
            // this as:
            //
            //    rotl rd, rn, rm
@@ -1854,26 +1837,17 @@ fn lower_insn_to_regs<C: LowerCtx<Inst>>(ctx: &mut C, insn: IRInst) {
                Opcode::Call => {
                    let extname = ctx.call_target(insn).unwrap();
                    let extname = extname.clone();
-                    // HACK: get the function address with an Abs8 reloc in the constant pool.
-                    //let tmp = ctx.tmp(RegClass::I64, I64);
-                    //ctx.emit(Inst::LoadExtName {
-                    //rd: tmp,
-                    //name: extname,
-                    //srcloc: loc,
-                    //offset: 0,
-                    //});
                    let sig = ctx.call_sig(insn).unwrap();
                    assert!(inputs.len() == sig.params.len());
                    assert!(outputs.len() == sig.returns.len());
-                    (ARM64ABICall::from_func(sig, &extname, loc), &inputs[..])
-                    //(ARM64ABICall::from_ptr(sig, tmp.to_reg(), loc), &inputs[..])
+                    (AArch64ABICall::from_func(sig, &extname, loc), &inputs[..])
                }
                Opcode::CallIndirect => {
                    let ptr = input_to_reg(ctx, inputs[0], NarrowValueMode::ZeroExtend64);
                    let sig = ctx.call_sig(insn).unwrap();
                    assert!(inputs.len() - 1 == sig.params.len());
                    assert!(outputs.len() == sig.returns.len());
-                    (ARM64ABICall::from_ptr(sig, ptr, loc, op), &inputs[1..])
+                    (AArch64ABICall::from_ptr(sig, ptr, loc, op), &inputs[1..])
                }
                _ => unreachable!(),
            };
@@ -2357,21 +2331,6 @@ fn choose_32_64<T: Copy>(ty: Type, op32: T, op64: T) -> T {
    }
 }

-fn branch_target(data: &InstructionData) -> Option<Block> {
-    match data {
-        &InstructionData::BranchIcmp { destination, .. }
-        | &InstructionData::Branch { destination, .. }
-        | &InstructionData::BranchInt { destination, .. }
-        | &InstructionData::Jump { destination, .. }
-        | &InstructionData::BranchTable { destination, .. }
-        | &InstructionData::BranchFloat { destination, .. } => Some(destination),
-        _ => {
-            assert!(!data.opcode().is_branch());
-            None
-        }
-    }
-}
-
 fn ldst_offset(data: &InstructionData) -> Option<i32> {
    match data {
        &InstructionData::Load { offset, .. }
@@ -2418,7 +2377,11 @@ fn inst_trapcode(data: &InstructionData) -> Option<TrapCode> {
 }

 /// Checks for an instance of `op` feeding the given input. Marks as merged (decrementing refcount) if so.
-fn maybe_input_insn<C: LowerCtx<Inst>>(c: &mut C, input: InsnInput, op: Opcode) -> Option<IRInst> {
+fn maybe_input_insn<C: LowerCtx<I = Inst>>(
+    c: &mut C,
+    input: InsnInput,
+    op: Opcode,
+) -> Option<IRInst> {
    if let InsnInputSource::Output(out) = input_source(c, input) {
        let data = c.data(out.insn);
        if data.opcode() == op {
@@ -2434,7 +2397,7 @@ fn maybe_input_insn<C: LowerCtx<Inst>>(c: &mut C, input: InsnInput, op: Opcode)
 ///
 /// FIXME cfallin 2020-03-30: this is really ugly. Factor out tree-matching stuff and make it
 /// a bit more generic.
-fn maybe_input_insn_via_conv<C: LowerCtx<Inst>>(
+fn maybe_input_insn_via_conv<C: LowerCtx<I = Inst>>(
    c: &mut C,
    input: InsnInput,
    op: Opcode,
@@ -2461,7 +2424,7 @@ fn maybe_input_insn_via_conv<C: LowerCtx<Inst>>(
    None
 }

-fn lower_icmp_or_ifcmp_to_flags<C: LowerCtx<Inst>>(ctx: &mut C, insn: IRInst, is_signed: bool) {
+fn lower_icmp_or_ifcmp_to_flags<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst, is_signed: bool) {
    let ty = ctx.input_ty(insn, 0);
    let bits = ty_bits(ty);
    let narrow_mode = match (bits <= 32, is_signed) {
@@ -2488,7 +2451,7 @@ fn lower_icmp_or_ifcmp_to_flags<C: LowerCtx<Inst>>(ctx: &mut C, insn: IRInst, is
    ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm));
 }

-fn lower_fcmp_or_ffcmp_to_flags<C: LowerCtx<Inst>>(ctx: &mut C, insn: IRInst) {
+fn lower_fcmp_or_ffcmp_to_flags<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst) {
    let ty = ctx.input_ty(insn, 0);
    let bits = ty_bits(ty);
    let inputs = [
@@ -2517,14 +2480,14 @@ fn lower_fcmp_or_ffcmp_to_flags<C: LowerCtx<Inst>>(ctx: &mut C, insn: IRInst) {
 //=============================================================================
 // Lowering-backend trait implementation.

-impl LowerBackend for Arm64Backend {
+impl LowerBackend for AArch64Backend {
    type MInst = Inst;

-    fn lower<C: LowerCtx<Inst>>(&self, ctx: &mut C, ir_inst: IRInst) {
+    fn lower<C: LowerCtx<I = Inst>>(&self, ctx: &mut C, ir_inst: IRInst) {
        lower_insn_to_regs(ctx, ir_inst);
    }

-    fn lower_branch_group<C: LowerCtx<Inst>>(
+    fn lower_branch_group<C: LowerCtx<I = Inst>>(
        &self,
        ctx: &mut C,
        branches: &[IRInst],
--- a/cranelift/codegen/src/isa/aarch64/mod.rs
+++ b/cranelift/codegen/src/isa/aarch64/mod.rs
@@ -2,7 +2,6 @@

 use crate::ir::Function;
 use crate::isa::Builder as IsaBuilder;
-use crate::isa::TargetIsa;
 use crate::machinst::{
    compile, MachBackend, MachCompileResult, ShowWithRRU, TargetIsaAdapter, VCode,
 };
@@ -10,10 +9,9 @@ use crate::result::CodegenResult;
 use crate::settings;

 use alloc::boxed::Box;
-use std::str::FromStr;

 use regalloc::RealRegUniverse;
-use target_lexicon::Triple;
+use target_lexicon::{Aarch64Architecture, Architecture, Triple};

 // New backend:
 mod abi;
@@ -22,29 +20,30 @@ mod lower;

 use inst::create_reg_universe;

-/// An ARM64 backend.
-pub struct Arm64Backend {
+/// An AArch64 backend.
+pub struct AArch64Backend {
+    triple: Triple,
    flags: settings::Flags,
 }

-impl Arm64Backend {
-    /// Create a new ARM64 backend with the given (shared) flags.
-    pub fn new_with_flags(flags: settings::Flags) -> Arm64Backend {
-        Arm64Backend { flags }
+impl AArch64Backend {
+    /// Create a new AArch64 backend with the given (shared) flags.
+    pub fn new_with_flags(triple: Triple, flags: settings::Flags) -> AArch64Backend {
+        AArch64Backend { triple, flags }
    }

-    fn compile_vcode(&self, mut func: Function, flags: &settings::Flags) -> VCode<inst::Inst> {
+    fn compile_vcode(&self, func: &Function, flags: &settings::Flags) -> VCode<inst::Inst> {
        // This performs lowering to VCode, register-allocates the code, computes
        // block layout and finalizes branches. The result is ready for binary emission.
-        let abi = Box::new(abi::ARM64ABIBody::new(&func));
-        compile::compile::<Arm64Backend>(&mut func, self, abi, flags)
+        let abi = Box::new(abi::AArch64ABIBody::new(func));
+        compile::compile::<AArch64Backend>(func, self, abi, flags)
    }
 }

-impl MachBackend for Arm64Backend {
+impl MachBackend for AArch64Backend {
    fn compile_function(
        &self,
-        func: Function,
+        func: &Function,
        want_disasm: bool,
    ) -> CodegenResult<MachCompileResult> {
        let flags = self.flags();
@@ -66,11 +65,11 @@ impl MachBackend for Arm64Backend {
    }

    fn name(&self) -> &'static str {
-        "arm64"
+        "aarch64"
    }

    fn triple(&self) -> Triple {
-        FromStr::from_str("arm64").unwrap()
+        self.triple.clone()
    }

    fn flags(&self) -> &settings::Flags {
@@ -84,32 +83,28 @@ impl MachBackend for Arm64Backend {

 /// Create a new `isa::Builder`.
 pub fn isa_builder(triple: Triple) -> IsaBuilder {
+    assert!(triple.architecture == Architecture::Aarch64(Aarch64Architecture::Aarch64));
    IsaBuilder {
        triple,
        setup: settings::builder(),
-        constructor: isa_constructor,
-    }
-}
-
-fn isa_constructor(
-    _: Triple,
-    shared_flags: settings::Flags,
-    _arch_flag_builder: settings::Builder,
-) -> Box<dyn TargetIsa> {
-    let backend = Arm64Backend::new_with_flags(shared_flags);
+        constructor: |triple, shared_flags, _| {
+            let backend = AArch64Backend::new_with_flags(triple, shared_flags);
            Box::new(TargetIsaAdapter::new(backend))
+        },
+    }
 }

 #[cfg(test)]
 mod test {
    use super::*;
-    use crate::binemit::{NullRelocSink, NullStackmapSink, NullTrapSink};
    use crate::cursor::{Cursor, FuncCursor};
    use crate::ir::types::*;
    use crate::ir::{AbiParam, ExternalName, Function, InstBuilder, Signature};
    use crate::isa::CallConv;
    use crate::settings;
    use crate::settings::Configurable;
+    use core::str::FromStr;
+    use target_lexicon::Triple;

    #[test]
    fn test_compile_function() {
@@ -130,8 +125,11 @@ mod test {

        let mut shared_flags = settings::builder();
        shared_flags.set("opt_level", "none").unwrap();
-        let backend = Arm64Backend::new_with_flags(settings::Flags::new(shared_flags));
-        let sections = backend.compile_function(func, false).unwrap().sections;
+        let backend = AArch64Backend::new_with_flags(
+            Triple::from_str("aarch64").unwrap(),
+            settings::Flags::new(shared_flags),
+        );
+        let sections = backend.compile_function(&mut func, false).unwrap().sections;
        let code = &sections.sections[0].data;

        // stp x29, x30, [sp, #-16]!
@@ -182,9 +180,12 @@ mod test {

        let mut shared_flags = settings::builder();
        shared_flags.set("opt_level", "none").unwrap();
-        let backend = Arm64Backend::new_with_flags(settings::Flags::new(shared_flags));
+        let backend = AArch64Backend::new_with_flags(
+            Triple::from_str("aarch64").unwrap(),
+            settings::Flags::new(shared_flags),
+        );
        let result = backend
-            .compile_function(func, /* want_disasm = */ false)
+            .compile_function(&mut func, /* want_disasm = */ false)
            .unwrap();
        let code = &result.sections.sections[0].data;

--- a/cranelift/codegen/src/isa/mod.rs
+++ b/cranelift/codegen/src/isa/mod.rs
@@ -84,7 +84,7 @@ pub mod fde;
 mod arm32;

 #[cfg(feature = "arm64")]
-mod arm64;
+mod aarch64;

 mod call_conv;
 mod constraints;
@@ -93,6 +93,9 @@ mod encoding;
 pub mod registers;
 mod stack;

+#[cfg(test)]
+mod test_utils;
+
 /// Returns a builder that can create a corresponding `TargetIsa`
 /// or `Err(LookupError::SupportDisabled)` if not enabled.
 macro_rules! isa_builder {
@@ -117,7 +120,7 @@ pub fn lookup(triple: Triple) -> Result<Builder, LookupError> {
            isa_builder!(x86, "x86", triple)
        }
        Architecture::Arm { .. } => isa_builder!(arm32, "arm32", triple),
-        Architecture::Aarch64 { .. } => isa_builder!(arm64, "arm64", triple),
+        Architecture::Aarch64 { .. } => isa_builder!(aarch64, "arm64", triple),
        _ => Err(LookupError::Unsupported),
    }
 }
--- a/cranelift/codegen/src/isa/test_utils.rs
+++ b/cranelift/codegen/src/isa/test_utils.rs
@@ -1,10 +1,13 @@
+// This is unused when no platforms with the new backend are enabled.
+#![allow(dead_code)]
+
 use crate::binemit::{Addend, CodeOffset, CodeSink, Reloc};
 use crate::ir::Value;
 use crate::ir::{ConstantOffset, ExternalName, Function, JumpTable, Opcode, SourceLoc, TrapCode};
 use crate::isa::TargetIsa;

 use alloc::vec::Vec;
-use std::string::{String, ToString};
+use std::string::String;

 pub struct TestCodeSink {
    bytes: Vec<u8>,
@@ -16,11 +19,13 @@ impl TestCodeSink {
        TestCodeSink { bytes: vec![] }
    }

-    /// This is pretty lame, but whatever ..
+    /// Return the code emitted to this sink as a hex string.
    pub fn stringify(&self) -> String {
-        let mut s = "".to_string();
+        // This is pretty lame, but whatever ..
+        use std::fmt::Write;
+        let mut s = String::with_capacity(self.bytes.len() * 2);
        for b in &self.bytes {
-            s = s + &format!("{:02X}", b).to_string();
+            write!(&mut s, "{:02X}", b).unwrap();
        }
        s
    }
--- a/cranelift/codegen/src/lib.rs
+++ b/cranelift/codegen/src/lib.rs
@@ -87,6 +87,7 @@ mod context;
 mod dce;
 mod divconst_magic_numbers;
 mod fx;
+mod inst_predicates;
 mod iterators;
 mod legalizer;
 mod licm;
--- a/cranelift/codegen/src/machinst/abi.rs
+++ b/cranelift/codegen/src/machinst/abi.rs
@@ -1,15 +1,17 @@
 //! ABI definitions.

-use crate::ir;
 use crate::ir::StackSlot;
 use crate::machinst::*;
 use crate::settings;

-use regalloc::{Reg, Set, SpillSlot, VirtualReg, Writable};
+use regalloc::{Reg, Set, SpillSlot, Writable};

 /// Trait implemented by an object that tracks ABI-related state (e.g., stack
 /// layout) and can generate code while emitting the *body* of a function.
-pub trait ABIBody<I: VCodeInst> {
+pub trait ABIBody {
+    /// The instruction type for the ISA associated with this ABI.
+    type I: VCodeInst;
+
    /// Get the liveins of the function.
    fn liveins(&self) -> Set<RealReg>;

@@ -27,17 +29,19 @@ pub trait ABIBody<I: VCodeInst> {

    /// Generate an instruction which copies an argument to a destination
    /// register.
-    fn gen_copy_arg_to_reg(&self, idx: usize, into_reg: Writable<Reg>) -> I;
+    fn gen_copy_arg_to_reg(&self, idx: usize, into_reg: Writable<Reg>) -> Self::I;

    /// Generate an instruction which copies a source register to a return
    /// value slot.
-    fn gen_copy_reg_to_retval(&self, idx: usize, from_reg: Reg) -> I;
+    fn gen_copy_reg_to_retval(&self, idx: usize, from_reg: Reg) -> Self::I;

    /// Generate a return instruction.
-    fn gen_ret(&self) -> I;
+    fn gen_ret(&self) -> Self::I;

-    /// Generate an epilogue placeholder.
-    fn gen_epilogue_placeholder(&self) -> I;
+    /// Generate an epilogue placeholder. The returned instruction should return `true` from
+    /// `is_epilogue_placeholder()`; this is used to indicate to the lowering driver when
+    /// the epilogue should be inserted.
+    fn gen_epilogue_placeholder(&self) -> Self::I;

    // -----------------------------------------------------------------
    // Every function above this line may only be called pre-regalloc.
@@ -56,32 +60,32 @@ pub trait ABIBody<I: VCodeInst> {
    fn load_stackslot(
        &self,
        slot: StackSlot,
-        offset: usize,
+        offset: u32,
        ty: Type,
        into_reg: Writable<Reg>,
-    ) -> I;
+    ) -> Self::I;

    /// Store to a stackslot.
-    fn store_stackslot(&self, slot: StackSlot, offset: usize, ty: Type, from_reg: Reg) -> I;
+    fn store_stackslot(&self, slot: StackSlot, offset: u32, ty: Type, from_reg: Reg) -> Self::I;

    /// Load from a spillslot.
-    fn load_spillslot(&self, slot: SpillSlot, ty: Type, into_reg: Writable<Reg>) -> I;
+    fn load_spillslot(&self, slot: SpillSlot, ty: Type, into_reg: Writable<Reg>) -> Self::I;

    /// Store to a spillslot.
-    fn store_spillslot(&self, slot: SpillSlot, ty: Type, from_reg: Reg) -> I;
+    fn store_spillslot(&self, slot: SpillSlot, ty: Type, from_reg: Reg) -> Self::I;

    /// Generate a prologue, post-regalloc. This should include any stack
    /// frame or other setup necessary to use the other methods (`load_arg`,
-    /// `store_retval`, and spillslot accesses.)  |self| is mutable so that we
+    /// `store_retval`, and spillslot accesses.)  `self` is mutable so that we
    /// can store information in it which will be useful when creating the
    /// epilogue.
-    fn gen_prologue(&mut self, flags: &settings::Flags) -> Vec<I>;
+    fn gen_prologue(&mut self, flags: &settings::Flags) -> Vec<Self::I>;

    /// Generate an epilogue, post-regalloc. Note that this must generate the
    /// actual return instruction (rather than emitting this in the lowering
    /// logic), because the epilogue code comes before the return and the two are
    /// likely closely related.
-    fn gen_epilogue(&self, flags: &settings::Flags) -> Vec<I>;
+    fn gen_epilogue(&self, flags: &settings::Flags) -> Vec<Self::I>;

    /// Returns the full frame size for the given function, after prologue emission has run. This
    /// comprises the spill space, incoming argument space, alignment padding, etc.
@@ -91,10 +95,10 @@ pub trait ABIBody<I: VCodeInst> {
    fn get_spillslot_size(&self, rc: RegClass, ty: Type) -> u32;

    /// Generate a spill.
-    fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, ty: Type) -> I;
+    fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, ty: Type) -> Self::I;

    /// Generate a reload (fill).
-    fn gen_reload(&self, to_reg: Writable<RealReg>, from_slot: SpillSlot, ty: Type) -> I;
+    fn gen_reload(&self, to_reg: Writable<RealReg>, from_slot: SpillSlot, ty: Type) -> Self::I;
 }

 /// Trait implemented by an object that tracks ABI-related state and can
@@ -111,22 +115,25 @@ pub trait ABIBody<I: VCodeInst> {
 /// and retval copies, and attach the register use/def info to the call.
 ///
 /// This trait is thus provided for convenience to the backends.
-pub trait ABICall<I: VCodeInst> {
+pub trait ABICall {
+    /// The instruction type for the ISA associated with this ABI.
+    type I: VCodeInst;
+
    /// Get the number of arguments expected.
    fn num_args(&self) -> usize;

    /// Save the clobbered registers.
    /// Copy an argument value from a source register, prior to the call.
-    fn gen_copy_reg_to_arg(&self, idx: usize, from_reg: Reg) -> I;
+    fn gen_copy_reg_to_arg(&self, idx: usize, from_reg: Reg) -> Self::I;

    /// Copy a return value into a destination register, after the call returns.
-    fn gen_copy_retval_to_reg(&self, idx: usize, into_reg: Writable<Reg>) -> I;
+    fn gen_copy_retval_to_reg(&self, idx: usize, into_reg: Writable<Reg>) -> Self::I;

    /// Pre-adjust the stack, prior to argument copies and call.
-    fn gen_stack_pre_adjust(&self) -> Vec<I>;
+    fn gen_stack_pre_adjust(&self) -> Vec<Self::I>;

    /// Post-adjust the satck, after call return and return-value copies.
-    fn gen_stack_post_adjust(&self) -> Vec<I>;
+    fn gen_stack_post_adjust(&self) -> Vec<Self::I>;

    /// Generate the call itself.
    ///
@@ -138,5 +145,5 @@ pub trait ABICall<I: VCodeInst> {
    /// registers are also logically defs, but should never be read; their
    /// values are "defined" (to the regalloc) but "undefined" in every other
    /// sense.)
-    fn gen_call(&self) -> Vec<I>;
+    fn gen_call(&self) -> Vec<Self::I>;
 }
--- a/cranelift/codegen/src/machinst/adapter.rs
+++ b/cranelift/codegen/src/machinst/adapter.rs
@@ -4,9 +4,12 @@ use crate::binemit;
 use crate::ir;
 use crate::isa::{EncInfo, Encoding, Encodings, Legalize, RegClass, RegInfo, TargetIsa};
 use crate::machinst::*;
-use crate::regalloc::{RegDiversions, RegisterSet};
+use crate::regalloc::RegisterSet;
 use crate::settings::Flags;

+#[cfg(feature = "testing_hooks")]
+use crate::regalloc::RegDiversions;
+
 use std::borrow::Cow;
 use std::fmt;
 use target_lexicon::Triple;
@@ -30,7 +33,11 @@ impl TargetIsaAdapter {

 impl fmt::Display for TargetIsaAdapter {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "MachBackend")
+        f.debug_struct("MachBackend")
+            .field("name", &self.backend.name())
+            .field("triple", &self.backend.triple())
+            .field("flags", &format!("{}", self.backend.flags()))
+            .finish()
    }
 }

--- a/cranelift/codegen/src/machinst/blockorder.rs
+++ b/cranelift/codegen/src/machinst/blockorder.rs
@@ -1,6 +1,7 @@
 //! Computation of basic block order in emitted code.

 use crate::machinst::*;
+use regalloc::{BlockIx, Function};

 /// Simple reverse postorder-based block order emission.
 ///
@@ -29,9 +30,8 @@ impl BlockRPO {
            }
        }

-        let (start, end) = &vcode.block_ranges[block as usize];
-        for i in *start..*end {
-            if vcode.insts[i as usize].is_epilogue_placeholder() {
+        for i in vcode.block_insns(BlockIx::new(block)) {
+            if vcode.get_insn(i).is_epilogue_placeholder() {
                debug_assert!(self.deferred_last.is_none());
                self.deferred_last = Some(block);
                return;
--- a/cranelift/codegen/src/machinst/compile.rs
+++ b/cranelift/codegen/src/machinst/compile.rs
@@ -7,14 +7,13 @@ use crate::timing;

 use log::debug;
 use regalloc::{allocate_registers, RegAllocAlgorithm};
-use std::env;

 /// Compile the given function down to VCode with allocated registers, ready
 /// for binary emission.
 pub fn compile<B: LowerBackend>(
-    f: &mut Function,
+    f: &Function,
    b: &B,
-    abi: Box<dyn ABIBody<B::MInst>>,
+    abi: Box<dyn ABIBody<I = B::MInst>>,
    flags: &settings::Flags,
 ) -> VCode<B::MInst>
 where
@@ -28,18 +27,8 @@ where
    debug!("vcode from lowering: \n{}", vcode.show_rru(Some(universe)));

    // Perform register allocation.
-    let algorithm = match env::var("REGALLOC") {
-        Ok(str) => match str.as_str() {
-            "lsrac" => RegAllocAlgorithm::LinearScanChecked,
-            "lsra" => RegAllocAlgorithm::LinearScan,
-            // to wit: btc doesn't mean "bitcoin" here
-            "btc" => RegAllocAlgorithm::BacktrackingChecked,
-            _ => RegAllocAlgorithm::Backtracking,
-        },
-        // By default use backtracking, which is the fastest.
-        Err(_) => RegAllocAlgorithm::Backtracking,
-    };
-
+    // TODO: select register allocation algorithm from flags.
+    let algorithm = RegAllocAlgorithm::Backtracking;
    let result = {
        let _tt = timing::regalloc();
        allocate_registers(
@@ -70,7 +59,5 @@ where
        vcode.show_rru(Some(universe))
    );

-    //println!("{}\n", vcode.show_rru(Some(&B::MInst::reg_universe())));
-
    vcode
 }
--- a/cranelift/codegen/src/machinst/lower.rs
+++ b/cranelift/codegen/src/machinst/lower.rs
@@ -2,39 +2,37 @@
 //! to machine instructions with virtual registers. This is *almost* the final
 //! machine code, except for register allocation.

-use crate::binemit::CodeSink;
-use crate::dce::has_side_effect;
 use crate::entity::SecondaryMap;
+use crate::inst_predicates::has_side_effect;
+use crate::ir::instructions::BranchInfo;
 use crate::ir::{
    Block, ExternalName, Function, GlobalValueData, Inst, InstructionData, MemFlags, Opcode,
    Signature, SourceLoc, Type, Value, ValueDef,
 };
-use crate::isa::registers::RegUnit;
-use crate::machinst::{
-    ABIBody, BlockIndex, MachInst, MachInstEmit, VCode, VCodeBuilder, VCodeInst,
-};
+use crate::machinst::{ABIBody, BlockIndex, VCode, VCodeBuilder, VCodeInst};
 use crate::num_uses::NumUses;

-use regalloc::Function as RegallocFunction;
-use regalloc::{RealReg, Reg, RegClass, Set, VirtualReg, Writable};
+use regalloc::{Reg, RegClass, Set, VirtualReg, Writable};

 use alloc::boxed::Box;
 use alloc::vec::Vec;
 use log::debug;
 use smallvec::SmallVec;
 use std::collections::VecDeque;
-use std::ops::Range;

 /// A context that machine-specific lowering code can use to emit lowered instructions. This is the
 /// view of the machine-independent per-function lowering context that is seen by the machine
 /// backend.
-pub trait LowerCtx<I> {
+pub trait LowerCtx {
+    /// The instruction type for which this lowering framework is instantiated.
+    type I;
+
    /// Get the instdata for a given IR instruction.
    fn data(&self, ir_inst: Inst) -> &InstructionData;
    /// Get the controlling type for a polymorphic IR instruction.
    fn ty(&self, ir_inst: Inst) -> Type;
    /// Emit a machine instruction.
-    fn emit(&mut self, mach_inst: I);
+    fn emit(&mut self, mach_inst: Self::I);
    /// Indicate that an IR instruction has been merged, and so one of its
    /// uses is gone (replaced by uses of the instruction's inputs). This
    /// helps the lowering algorithm to perform on-the-fly DCE, skipping over
@@ -87,11 +85,11 @@ pub trait LowerBackend {
    /// Lower a single instruction. Instructions are lowered in reverse order.
    /// This function need not handle branches; those are always passed to
    /// `lower_branch_group` below.
-    fn lower<C: LowerCtx<Self::MInst>>(&self, ctx: &mut C, inst: Inst);
+    fn lower<C: LowerCtx<I = Self::MInst>>(&self, ctx: &mut C, inst: Inst);

    /// Lower a block-terminating group of branches (which together can be seen as one
    /// N-way branch), given a vcode BlockIndex for each target.
-    fn lower_branch_group<C: LowerCtx<Self::MInst>>(
+    fn lower_branch_group<C: LowerCtx<I = Self::MInst>>(
        &self,
        ctx: &mut C,
        insts: &[Inst],
@@ -103,22 +101,22 @@ pub trait LowerBackend {
 /// Machine-independent lowering driver / machine-instruction container. Maintains a correspondence
 /// from original Inst to MachInsts.
 pub struct Lower<'a, I: VCodeInst> {
-    // The function to lower.
+    /// The function to lower.
    f: &'a Function,

-    // Lowered machine instructions.
+    /// Lowered machine instructions.
    vcode: VCodeBuilder<I>,

-    // Number of active uses (minus `dec_use()` calls by backend) of each instruction.
+    /// Number of active uses (minus `dec_use()` calls by backend) of each instruction.
    num_uses: SecondaryMap<Inst, u32>,

-    // Mapping from `Value` (SSA value in IR) to virtual register.
+    /// Mapping from `Value` (SSA value in IR) to virtual register.
    value_regs: SecondaryMap<Value, Reg>,

-    // Return-value vregs.
+    /// Return-value vregs.
    retval_regs: Vec<Reg>,

-    // Next virtual register number to allocate.
+    /// Next virtual register number to allocate.
    next_vreg: u32,
 }

@@ -144,7 +142,7 @@ enum GenerateReturn {

 impl<'a, I: VCodeInst> Lower<'a, I> {
    /// Prepare a new lowering context for the given IR function.
-    pub fn new(f: &'a Function, abi: Box<dyn ABIBody<I>>) -> Lower<'a, I> {
+    pub fn new(f: &'a Function, abi: Box<dyn ABIBody<I = I>>) -> Lower<'a, I> {
        let mut vcode = VCodeBuilder::new(abi);

        let num_uses = NumUses::compute(f).take_uses();
@@ -244,7 +242,9 @@ impl<'a, I: VCodeInst> Lower<'a, I> {
                let mut succs: SmallVec<[Block; 16]> = SmallVec::new();
                for inst in self.f.layout.block_insts(b) {
                    if self.f.dfg[inst].opcode().is_branch() {
-                        succs.extend(branch_targets(self.f, b, inst).into_iter());
+                        visit_branch_targets(self.f, b, inst, |succ| {
+                            succs.push(succ);
+                        });
                    }
                }
                for succ in succs.into_iter() {
@@ -264,17 +264,14 @@ impl<'a, I: VCodeInst> Lower<'a, I> {
    /// Lower the function.
    pub fn lower<B: LowerBackend<MInst = I>>(mut self, backend: &B) -> VCode<I> {
        // Find all reachable blocks.
-        let mut bbs = self.find_reachable_bbs();
-        // Work backward (reverse block order, reverse through each block), skipping insns with zero
-        // uses.
-        bbs.reverse();
+        let bbs = self.find_reachable_bbs();

        // This records a Block-to-BlockIndex map so that branch targets can be resolved.
        let mut next_bindex = self.vcode.init_bb_map(&bbs[..]);

        // Allocate a separate BlockIndex for each control-flow instruction so that we can create
        // the edge blocks later. Each entry for a control-flow inst is the edge block; the list
-        // has (cf-inst, edge block, orig block) tuples.
+        // has (control flow inst, edge block, orig block) tuples.
        let mut edge_blocks_by_inst: SecondaryMap<Inst, Vec<BlockIndex>> =
            SecondaryMap::with_default(vec![]);
        let mut edge_blocks: Vec<(Inst, BlockIndex, Block)> = vec![];
@@ -282,7 +279,9 @@ impl<'a, I: VCodeInst> Lower<'a, I> {
        debug!("about to lower function: {:?}", self.f);
        debug!("bb map: {:?}", self.vcode.blocks_by_bb());

-        for bb in bbs.iter() {
+        // Work backward (reverse block order, reverse through each block), skipping insns with zero
+        // uses.
+        for bb in bbs.iter().rev() {
            for inst in self.f.layout.block_insts(*bb) {
                let op = self.f.dfg[inst].opcode();
                if op.is_branch() {
@@ -293,9 +292,9 @@ impl<'a, I: VCodeInst> Lower<'a, I> {
                        edge_blocks_by_inst[inst].push(edge_block);
                        edge_blocks.push((inst, edge_block, next_bb));
                    };
-                    for succ in branch_targets(self.f, *bb, inst).into_iter() {
+                    visit_branch_targets(self.f, *bb, inst, |succ| {
                        add_succ(succ);
-                    }
+                    });
                }
            }
        }
@@ -303,7 +302,9 @@ impl<'a, I: VCodeInst> Lower<'a, I> {
        for bb in bbs.iter() {
            debug!("lowering bb: {}", bb);

-            // If this is a return block, produce the return value setup.
+            // If this is a return block, produce the return value setup.  N.B.: this comes
+            // *before* the below because it must occur *after* any other instructions, and
+            // instructions are lowered in reverse order.
            let last_insn = self.f.layout.block_insts(*bb).last().unwrap();
            let last_insn_opcode = self.f.dfg[last_insn].opcode();
            if last_insn_opcode.is_return() {
@@ -513,7 +514,9 @@ impl<'a, I: VCodeInst> Lower<'a, I> {
    }
 }

-impl<'a, I: VCodeInst> LowerCtx<I> for Lower<'a, I> {
+impl<'a, I: VCodeInst> LowerCtx for Lower<'a, I> {
+    type I = I;
+
    /// Get the instdata for a given IR instruction.
    fn data(&self, ir_inst: Inst) -> &InstructionData {
        &self.f.dfg[ir_inst]
@@ -695,29 +698,23 @@ impl<'a, I: VCodeInst> LowerCtx<I> for Lower<'a, I> {
    }
 }

-fn branch_targets(f: &Function, block: Block, inst: Inst) -> SmallVec<[Block; 16]> {
-    let mut ret = SmallVec::new();
+fn visit_branch_targets<F: FnMut(Block)>(f: &Function, block: Block, inst: Inst, mut visit: F) {
    if f.dfg[inst].opcode() == Opcode::Fallthrough {
-        ret.push(f.layout.next_block(block).unwrap());
+        visit(f.layout.next_block(block).unwrap());
    } else {
-        match &f.dfg[inst] {
-            &InstructionData::Jump { destination, .. }
-            | &InstructionData::Branch { destination, .. }
-            | &InstructionData::BranchInt { destination, .. }
-            | &InstructionData::BranchIcmp { destination, .. }
-            | &InstructionData::BranchFloat { destination, .. } => {
-                ret.push(destination);
+        match f.dfg[inst].analyze_branch(&f.dfg.value_lists) {
+            BranchInfo::NotABranch => {}
+            BranchInfo::SingleDest(dest, _) => {
+                visit(dest);
            }
-            &InstructionData::BranchTable {
-                destination, table, ..
-            } => {
-                ret.push(destination);
-                for dest in f.jump_tables[table].as_slice() {
-                    ret.push(*dest);
+            BranchInfo::Table(table, maybe_dest) => {
+                if let Some(dest) = maybe_dest {
+                    visit(dest);
+                }
+                for &dest in f.jump_tables[table].as_slice() {
+                    visit(dest);
                }
            }
-            _ => {}
        }
    }
-    ret
 }
--- a/cranelift/codegen/src/machinst/mod.rs
+++ b/cranelift/codegen/src/machinst/mod.rs
@@ -17,105 +17,97 @@
 //! (N.B.: though we show the VCode separately at each stage, the passes
 //! mutate the VCode in place; these are not separate copies of the code.)
 //!
-//! |    ir::Function                (SSA IR, machine-independent opcodes)
-//! |        |
-//! |        |  [lower]
-//! |        |
-//! |    VCode<arch_backend::Inst>   (machine instructions:
-//! |        |                        - mostly virtual registers.
-//! |        |                        - cond branches in two-target form.
-//! |        |                        - branch targets are block indices.
-//! |        |                        - in-memory constants held by insns,
-//! |        |                          with unknown offsets.
-//! |        |                        - critical edges (actually all edges)
-//! |        |                          are split.)
-//! |        | [regalloc]
-//! |        |
-//! |    VCode<arch_backend::Inst>   (machine instructions:
-//! |        |                        - all real registers.
-//! |        |                        - new instruction sequence returned
-//! |        |                          out-of-band in RegAllocResult.
-//! |        |                        - instruction sequence has spills,
-//! |        |                          reloads, and moves inserted.
-//! |        |                        - other invariants same as above.)
-//! |        |
-//! |        | [preamble/postamble]
-//! |        |
-//! |    VCode<arch_backend::Inst>   (machine instructions:
-//! |        |                        - stack-frame size known.
-//! |        |                        - out-of-band instruction sequence
-//! |        |                          has preamble prepended to entry
-//! |        |                          block, and postamble injected before
-//! |        |                          every return instruction.
-//! |        |                        - all symbolic stack references to
-//! |        |                          stackslots and spillslots are resolved
-//! |        |                          to concrete FP-offset mem addresses.)
-//! |        | [block/insn ordering]
-//! |        |
-//! |    VCode<arch_backend::Inst>   (machine instructions:
-//! |        |                        - vcode.final_block_order is filled in.
-//! |        |                        - new insn sequence from regalloc is
-//! |        |                          placed back into vcode and block
-//! |        |                          boundaries are updated.)
-//! |        | [redundant branch/block
-//! |        |  removal]
-//! |        |
-//! |    VCode<arch_backend::Inst>   (machine instructions:
-//! |        |                        - all blocks that were just an
-//! |        |                          unconditional branch are removed.)
-//! |        |
-//! |        | [branch finalization
-//! |        |  (fallthroughs)]
-//! |        |
-//! |    VCode<arch_backend::Inst>   (machine instructions:
-//! |        |                        - all branches are in lowered one-
-//! |        |                          target form, but targets are still
-//! |        |                          block indices.)
-//! |        |
-//! |        | [branch finalization
-//! |        |  (offsets)]
-//! |        |
-//! |    VCode<arch_backend::Inst>   (machine instructions:
-//! |        |                        - all branch offsets from start of
-//! |        |                          function are known, and all branches
-//! |        |                          have resolved-offset targets.)
-//! |        |
-//! |        | [MemArg finalization]
-//! |        |
-//! |    VCode<arch_backend::Inst>   (machine instructions:
-//! |        |                        - all MemArg references to the constant
-//! |        |                          pool are replaced with offsets.
-//! |        |                        - all constant-pool data is collected
-//! |        |                          in the VCode.)
-//! |        |
-//! |        | [binary emission]
-//! |        |
-//! |    Vec<u8>                     (machine code!)
+//! ```plain
+//!
+//!     ir::Function                (SSA IR, machine-independent opcodes)
 //!         |
+//!         |  [lower]
+//!         |
+//!     VCode<arch_backend::Inst>   (machine instructions:
+//!         |                        - mostly virtual registers.
+//!         |                        - cond branches in two-target form.
+//!         |                        - branch targets are block indices.
+//!         |                        - in-memory constants held by insns,
+//!         |                          with unknown offsets.
+//!         |                        - critical edges (actually all edges)
+//!         |                          are split.)
+//!         | [regalloc]
+//!         |
+//!     VCode<arch_backend::Inst>   (machine instructions:
+//!         |                        - all real registers.
+//!         |                        - new instruction sequence returned
+//!         |                          out-of-band in RegAllocResult.
+//!         |                        - instruction sequence has spills,
+//!         |                          reloads, and moves inserted.
+//!         |                        - other invariants same as above.)
+//!         |
+//!         | [preamble/postamble]
+//!         |
+//!     VCode<arch_backend::Inst>   (machine instructions:
+//!         |                        - stack-frame size known.
+//!         |                        - out-of-band instruction sequence
+//!         |                          has preamble prepended to entry
+//!         |                          block, and postamble injected before
+//!         |                          every return instruction.
+//!         |                        - all symbolic stack references to
+//!         |                          stackslots and spillslots are resolved
+//!         |                          to concrete FP-offset mem addresses.)
+//!         | [block/insn ordering]
+//!         |
+//!     VCode<arch_backend::Inst>   (machine instructions:
+//!         |                        - vcode.final_block_order is filled in.
+//!         |                        - new insn sequence from regalloc is
+//!         |                          placed back into vcode and block
+//!         |                          boundaries are updated.)
+//!         | [redundant branch/block
+//!         |  removal]
+//!         |
+//!     VCode<arch_backend::Inst>   (machine instructions:
+//!         |                        - all blocks that were just an
+//!         |                          unconditional branch are removed.)
+//!         |
+//!         | [branch finalization
+//!         |  (fallthroughs)]
+//!         |
+//!     VCode<arch_backend::Inst>   (machine instructions:
+//!         |                        - all branches are in lowered one-
+//!         |                          target form, but targets are still
+//!         |                          block indices.)
+//!         |
+//!         | [branch finalization
+//!         |  (offsets)]
+//!         |
+//!     VCode<arch_backend::Inst>   (machine instructions:
+//!         |                        - all branch offsets from start of
+//!         |                          function are known, and all branches
+//!         |                          have resolved-offset targets.)
+//!         |
+//!         | [MemArg finalization]
+//!         |
+//!     VCode<arch_backend::Inst>   (machine instructions:
+//!         |                        - all MemArg references to the constant
+//!         |                          pool are replaced with offsets.
+//!         |                        - all constant-pool data is collected
+//!         |                          in the VCode.)
+//!         |
+//!         | [binary emission]
+//!         |
+//!     Vec<u8>                     (machine code!)
+//!
+//! ```

-#![allow(unused_imports)]
-
-use crate::binemit::{
-    CodeInfo, CodeOffset, CodeSink, MemoryCodeSink, RelocSink, StackmapSink, TrapSink,
-};
-use crate::entity::EntityRef;
+use crate::binemit::{CodeInfo, CodeOffset};
 use crate::entity::SecondaryMap;
 use crate::ir::condcodes::IntCC;
-use crate::ir::ValueLocations;
-use crate::ir::{DataFlowGraph, Function, Inst, Opcode, Type, Value};
-use crate::isa::RegUnit;
+use crate::ir::{Function, Type};
 use crate::result::CodegenResult;
 use crate::settings::Flags;
-use crate::HashMap;
 use alloc::boxed::Box;
 use alloc::vec::Vec;
 use core::fmt::Debug;
-use core::iter::Sum;
 use regalloc::Map as RegallocMap;
 use regalloc::RegUsageCollector;
 use regalloc::{RealReg, RealRegUniverse, Reg, RegClass, SpillSlot, VirtualReg, Writable};
-use smallvec::SmallVec;
-use std::hash::Hash;
 use std::string::String;
 use target_lexicon::Triple;

@@ -129,8 +121,8 @@ pub mod blockorder;
 pub use blockorder::*;
 pub mod abi;
 pub use abi::*;
-pub mod pp;
-pub use pp::*;
+pub mod pretty_print;
+pub use pretty_print::*;
 pub mod sections;
 pub use sections::*;
 pub mod adapter;
@@ -255,10 +247,10 @@ impl MachCompileResult {
 /// Top-level machine backend trait, which wraps all monomorphized code and
 /// allows a virtual call from the machine-independent `Function::compile()`.
 pub trait MachBackend {
-    /// Compile the given function. Consumes the function.
+    /// Compile the given function.
    fn compile_function(
        &self,
-        func: Function,
+        func: &Function,
        want_disasm: bool,
    ) -> CodegenResult<MachCompileResult>;

--- a/cranelift/codegen/src/machinst/pretty_print.rs
+++ b/cranelift/codegen/src/machinst/pretty_print.rs
--- a/cranelift/codegen/src/machinst/sections.rs
+++ b/cranelift/codegen/src/machinst/sections.rs
@@ -3,7 +3,7 @@
 //! simultaneously, so we buffer the result in memory and hand off to the
 //! caller at the end of compilation.

-use crate::binemit::{Addend, CodeOffset, CodeSink, Reloc, RelocSink, StackmapSink, TrapSink};
+use crate::binemit::{Addend, CodeOffset, CodeSink, Reloc};
 use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode};

 use alloc::vec::Vec;
@@ -104,28 +104,31 @@ pub trait MachSectionOutput {

    /// Add 2 bytes to the section.
    fn put2(&mut self, value: u16) {
-        self.put1((value & 0xff) as u8);
-        self.put1(((value >> 8) & 0xff) as u8);
+        let [b0, b1] = value.to_le_bytes();
+        self.put1(b0);
+        self.put1(b1);
    }

    /// Add 4 bytes to the section.
    fn put4(&mut self, value: u32) {
-        self.put1((value & 0xff) as u8);
-        self.put1(((value >> 8) & 0xff) as u8);
-        self.put1(((value >> 16) & 0xff) as u8);
-        self.put1(((value >> 24) & 0xff) as u8);
+        let [b0, b1, b2, b3] = value.to_le_bytes();
+        self.put1(b0);
+        self.put1(b1);
+        self.put1(b2);
+        self.put1(b3);
    }

    /// Add 8 bytes to the section.
    fn put8(&mut self, value: u64) {
-        self.put1((value & 0xff) as u8);
-        self.put1(((value >> 8) & 0xff) as u8);
-        self.put1(((value >> 16) & 0xff) as u8);
-        self.put1(((value >> 24) & 0xff) as u8);
-        self.put1(((value >> 32) & 0xff) as u8);
-        self.put1(((value >> 40) & 0xff) as u8);
-        self.put1(((value >> 48) & 0xff) as u8);
-        self.put1(((value >> 56) & 0xff) as u8);
+        let [b0, b1, b2, b3, b4, b5, b6, b7] = value.to_le_bytes();
+        self.put1(b0);
+        self.put1(b1);
+        self.put1(b2);
+        self.put1(b3);
+        self.put1(b4);
+        self.put1(b5);
+        self.put1(b6);
+        self.put1(b7);
    }

    /// Add a slice of bytes to the section.
--- a/cranelift/codegen/src/machinst/vcode.rs
+++ b/cranelift/codegen/src/machinst/vcode.rs
@@ -17,7 +17,6 @@
 //! See the main module comment in `mod.rs` for more details on the VCode-based
 //! backend pipeline.

-use crate::binemit::Reloc;
 use crate::ir;
 use crate::machinst::*;
 use crate::settings;
@@ -32,7 +31,6 @@ use log::debug;
 use smallvec::SmallVec;
 use std::fmt;
 use std::iter;
-use std::ops::Index;
 use std::string::String;

 /// Index referring to an instruction in VCode.
@@ -59,13 +57,13 @@ pub struct VCode<I: VCodeInst> {
    vreg_types: Vec<Type>,

    /// Lowered machine instructions in order corresponding to the original IR.
-    pub insts: Vec<I>,
+    insts: Vec<I>,

    /// Entry block.
    entry: BlockIndex,

    /// Block instruction indices.
-    pub block_ranges: Vec<(InsnIndex, InsnIndex)>,
+    block_ranges: Vec<(InsnIndex, InsnIndex)>,

    /// Block successors: index range in the successor-list below.
    block_succ_range: Vec<(usize, usize)>,
@@ -94,7 +92,7 @@ pub struct VCode<I: VCodeInst> {
    code_size: CodeOffset,

    /// ABI object.
-    abi: Box<dyn ABIBody<I>>,
+    abi: Box<dyn ABIBody<I = I>>,
 }

 /// A builder for a VCode function body. This builder is designed for the
@@ -128,7 +126,7 @@ pub struct VCodeBuilder<I: VCodeInst> {

 impl<I: VCodeInst> VCodeBuilder<I> {
    /// Create a new VCodeBuilder.
-    pub fn new(abi: Box<dyn ABIBody<I>>) -> VCodeBuilder<I> {
+    pub fn new(abi: Box<dyn ABIBody<I = I>>) -> VCodeBuilder<I> {
        let vcode = VCode::new(abi);
        VCodeBuilder {
            vcode,
@@ -139,7 +137,7 @@ impl<I: VCodeInst> VCodeBuilder<I> {
    }

    /// Access the ABI object.
-    pub fn abi(&mut self) -> &mut dyn ABIBody<I> {
+    pub fn abi(&mut self) -> &mut dyn ABIBody<I = I> {
        &mut *self.vcode.abi
    }

@@ -282,7 +280,7 @@ fn is_trivial_jump_block<I: VCodeInst>(vcode: &VCode<I>, block: BlockIndex) -> O

 impl<I: VCodeInst> VCode<I> {
    /// New empty VCode.
-    fn new(abi: Box<dyn ABIBody<I>>) -> VCode<I> {
+    fn new(abi: Box<dyn ABIBody<I = I>>) -> VCode<I> {
        VCode {
            liveins: abi.liveins(),
            liveouts: abi.liveouts(),
@@ -472,10 +470,10 @@ impl<I: VCodeInst> VCode<I> {
        // Compute block offsets.
        let mut code_section = MachSectionSize::new(0);
        let mut block_offsets = vec![0; self.num_blocks()];
-        for block in &self.final_block_order {
+        for &block in &self.final_block_order {
            code_section.offset = I::align_basic_block(code_section.offset);
-            block_offsets[*block as usize] = code_section.offset;
-            let (start, end) = self.block_ranges[*block as usize];
+            block_offsets[block as usize] = code_section.offset;
+            let (start, end) = self.block_ranges[block as usize];
            for iix in start..end {
                self.insts[iix as usize].emit(&mut code_section);
            }
@@ -490,9 +488,9 @@ impl<I: VCodeInst> VCode<I> {
        // it (so forward references are now possible), and (ii) mutates the
        // instructions.
        let mut code_section = MachSectionSize::new(0);
-        for block in &self.final_block_order {
+        for &block in &self.final_block_order {
            code_section.offset = I::align_basic_block(code_section.offset);
-            let (start, end) = self.block_ranges[*block as usize];
+            let (start, end) = self.block_ranges[block as usize];
            for iix in start..end {
                self.insts[iix as usize]
                    .with_block_offsets(code_section.offset, &self.final_block_offsets[..]);
@@ -510,7 +508,7 @@ impl<I: VCodeInst> VCode<I> {
        let code_idx = sections.add_section(0, self.code_size);
        let code_section = sections.get_section(code_idx);

-        for block in &self.final_block_order {
+        for &block in &self.final_block_order {
            let new_offset = I::align_basic_block(code_section.cur_offset_from_start());
            while new_offset > code_section.cur_offset_from_start() {
                // Pad with NOPs up to the aligned block offset.
@@ -519,7 +517,7 @@ impl<I: VCodeInst> VCode<I> {
            }
            assert_eq!(code_section.cur_offset_from_start(), new_offset);

-            let (start, end) = self.block_ranges[*block as usize];
+            let (start, end) = self.block_ranges[block as usize];
            for iix in start..end {
                self.insts[iix as usize].emit(code_section);
            }
@@ -639,9 +637,6 @@ impl<I: VCodeInst> RegallocFunction for VCode<I> {
    }
 }

-// N.B.: Debug impl assumes that VCode has already been through all compilation
-// passes, and so has a final block order and offsets.
-
 impl<I: VCodeInst> fmt::Debug for VCode<I> {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        writeln!(f, "VCode_Debug {{")?;
@@ -665,22 +660,21 @@ impl<I: VCodeInst> fmt::Debug for VCode<I> {
    }
 }

-// Pretty-printing with `RealRegUniverse` context.
+/// Pretty-printing with `RealRegUniverse` context.
 impl<I: VCodeInst + ShowWithRRU> ShowWithRRU for VCode<I> {
    fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
-        use crate::alloc::string::ToString;
        use std::fmt::Write;

        // Calculate an order in which to display the blocks.  This is the same
        // as final_block_order, but also includes blocks which are in the
        // representation but not in final_block_order.
        let mut display_order = Vec::<usize>::new();
-        // First display blocks in |final_block_order|
+        // First display blocks in `final_block_order`
        for bix in &self.final_block_order {
            assert!((*bix as usize) < self.num_blocks());
            display_order.push(*bix as usize);
        }
-        // Now also take care of those not listed in |final_block_order|.
+        // Now also take care of those not listed in `final_block_order`.
        // This is quadratic, but it's also debug-only code.
        for bix in 0..self.num_blocks() {
            if display_order.contains(&bix) {
@@ -690,48 +684,46 @@ impl<I: VCodeInst + ShowWithRRU> ShowWithRRU for VCode<I> {
        }

        let mut s = String::new();
-        s = s + &format!("VCode_ShowWithRRU {{{{");
-        s = s + &"\n".to_string();
-        s = s + &format!("  Entry block: {}", self.entry);
-        s = s + &"\n".to_string();
-        s = s + &format!("  Final block order: {:?}", self.final_block_order);
-        s = s + &"\n".to_string();
+        write!(&mut s, "VCode_ShowWithRRU {{{{\n").unwrap();
+        write!(&mut s, "  Entry block: {}\n", self.entry).unwrap();
+        write!(
+            &mut s,
+            "  Final block order: {:?}\n",
+            self.final_block_order
+        )
+        .unwrap();

        for i in 0..self.num_blocks() {
            let block = display_order[i];

-            let omitted =
-                (if !self.final_block_order.is_empty() && i >= self.final_block_order.len() {
+            let omitted = if !self.final_block_order.is_empty() && i >= self.final_block_order.len()
+            {
                "** OMITTED **"
            } else {
                ""
-                })
-                .to_string();
+            };

-            s = s + &format!("Block {}: {}", block, omitted);
-            s = s + &"\n".to_string();
+            write!(&mut s, "Block {}: {}\n", block, omitted).unwrap();
            if let Some(bb) = self.bindex_to_bb(block as BlockIndex) {
-                s = s + &format!("  (original IR block: {})\n", bb);
+                write!(&mut s, "  (original IR block: {})\n", bb).unwrap();
            }
            for succ in self.succs(block as BlockIndex) {
-                s = s + &format!("  (successor: Block {})", succ);
-                s = s + &"\n".to_string();
+                write!(&mut s, "  (successor: Block {})\n", succ).unwrap();
            }
            let (start, end) = self.block_ranges[block];
-            s = s + &format!("  (instruction range: {} .. {})", start, end);
-            s = s + &"\n".to_string();
+            write!(&mut s, "  (instruction range: {} .. {})\n", start, end).unwrap();
            for inst in start..end {
-                s = s + &format!(
-                    "  Inst {}:   {}",
+                write!(
+                    &mut s,
+                    "  Inst {}:   {}\n",
                    inst,
                    self.insts[inst as usize].show_rru(mb_rru)
-                );
-                s = s + &"\n".to_string();
+                )
+                .unwrap();
            }
        }

-        s = s + &format!("}}}}");
-        s = s + &"\n".to_string();
+        write!(&mut s, "}}}}\n").unwrap();

        s
    }
--- a/cranelift/codegen/src/num_uses.rs
+++ b/cranelift/codegen/src/num_uses.rs
@@ -1,15 +1,9 @@
 //! A pass that computes the number of uses of any given instruction.

-#![allow(dead_code)]
-#![allow(unused_imports)]
-
-use crate::cursor::{Cursor, FuncCursor};
-use crate::dce::has_side_effect;
 use crate::entity::SecondaryMap;
 use crate::ir::dfg::ValueDef;
-use crate::ir::instructions::InstructionData;
 use crate::ir::Value;
-use crate::ir::{DataFlowGraph, Function, Inst, Opcode};
+use crate::ir::{DataFlowGraph, Function, Inst};

 /// Auxiliary data structure that counts the number of uses of any given
 /// instruction in a Function. This is used during instruction selection
@@ -51,16 +45,6 @@ impl NumUses {
        }
    }

-    /// How many times is an instruction used?
-    pub fn use_count(&self, i: Inst) -> usize {
-        self.uses[i] as usize
-    }
-
-    /// Is an instruction used at all?
-    pub fn is_used(&self, i: Inst) -> bool {
-        self.use_count(i) > 0
-    }
-
    /// Take the complete uses map, consuming this analysis result.
    pub fn take_uses(self) -> SecondaryMap<Inst, u32> {
        self.uses
--- a/cranelift/codegen/src/postopt.rs
+++ b/cranelift/codegen/src/postopt.rs
@@ -364,11 +364,10 @@ pub fn do_postopt(func: &mut Function, isa: &dyn TargetIsa) {
    while let Some(_block) = pos.next_block() {
        let mut last_flags_clobber = None;
        while let Some(inst) = pos.next_inst() {
-            if isa.uses_cpu_flags() {
+            if !is_mach_backend && isa.uses_cpu_flags() {
                // Optimize instructions to make use of flags.
                optimize_cpu_flags(&mut pos, inst, last_flags_clobber, isa);

-                if !is_mach_backend {
                // Track the most recent seen instruction that clobbers the flags.
                if let Some(constraints) = isa
                    .encoding_info()
@@ -379,7 +378,6 @@ pub fn do_postopt(func: &mut Function, isa: &dyn TargetIsa) {
                    }
                }
            }
-            }

            if isa.uses_complex_addresses() {
                optimize_complex_addresses(&mut pos, inst, isa);
--- a/cranelift/codegen/src/verifier/flags.rs
+++ b/cranelift/codegen/src/verifier/flags.rs
@@ -28,17 +28,18 @@ pub fn verify_flags(
    errors: &mut VerifierErrors,
 ) -> VerifierStepResult<()> {
    let _tt = timing::verify_flags();
-    if isa.is_none() || isa.unwrap().get_mach_backend().is_none() {
+    let encinfo = if isa.is_none() || isa.unwrap().get_mach_backend().is_some() {
+        None
+    } else {
+        Some(isa.unwrap().encoding_info())
+    };
    let mut verifier = FlagsVerifier {
        func,
        cfg,
-            encinfo: isa.map(|isa| isa.encoding_info()),
+        encinfo,
        livein: SecondaryMap::new(),
    };
    verifier.check(errors)
-    } else {
-        Ok(())
-    }
 }

 struct FlagsVerifier<'a> {
--- a/cranelift/filetests/filetests/vcode/aarch64/arithmetic.clif
+++ b/cranelift/filetests/filetests/vcode/aarch64/arithmetic.clif
@@ -1,4 +1,5 @@
-test vcode arch=arm64
+test vcode
+target aarch64

 function %f(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
--- a/cranelift/filetests/filetests/vcode/aarch64/basic1.clif
+++ b/cranelift/filetests/filetests/vcode/aarch64/basic1.clif
@@ -1,4 +1,5 @@
-test vcode arch=arm64
+test vcode
+target aarch64

 function %f(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
--- a/cranelift/filetests/filetests/vcode/aarch64/bitops.clif
+++ b/cranelift/filetests/filetests/vcode/aarch64/bitops.clif
@@ -1,4 +1,5 @@
-test vcode arch=arm64
+test vcode
+target aarch64

 function %a(i32) -> i32 {
 block0(v0: i32):
--- a/cranelift/filetests/filetests/vcode/aarch64/call-indirect.clif
+++ b/cranelift/filetests/filetests/vcode/aarch64/call-indirect.clif
@@ -1,4 +1,5 @@
-test vcode arch=arm64
+test vcode
+target aarch64

 function %f(i64, i64) -> i64 {
    sig0 = (i64) -> i64
--- a/cranelift/filetests/filetests/vcode/aarch64/call.clif
+++ b/cranelift/filetests/filetests/vcode/aarch64/call.clif
@@ -1,4 +1,5 @@
-test vcode arch=arm64
+test vcode
+target aarch64

 function %f(i64) -> i64 {
    fn0 = %g(i64) -> i64
--- a/cranelift/filetests/filetests/vcode/aarch64/condbr.clif
+++ b/cranelift/filetests/filetests/vcode/aarch64/condbr.clif
@@ -1,4 +1,5 @@
-test vcode arch=arm64
+test vcode
+target aarch64

 function %f(i64, i64) -> b1 {
 block0(v0: i64, v1: i64):
@@ -33,7 +34,7 @@ block2:
 ; nextln: mov fp, sp
 ; nextln: subs xzr, x0, x1
 ; nextln: b.eq 20
-; check: Block 0:
+; check: Block 2:
 ; check: movz x0, #2
 ; nextln: mov sp, fp
 ; nextln: ldp fp, lr, [sp], #16
@@ -58,7 +59,7 @@ block1:
 ; check: stp fp, lr, [sp, #-16]!
 ; nextln: mov fp, sp
 ; nextln: subs xzr, x0, x1
-; check: Block 0:
+; check: Block 1:
 ; check: movz x0, #1
 ; nextln: mov sp, fp
 ; nextln: ldp fp, lr, [sp], #16
--- a/cranelift/filetests/filetests/vcode/aarch64/condops.clif
+++ b/cranelift/filetests/filetests/vcode/aarch64/condops.clif
@@ -1,4 +1,5 @@
-test vcode arch=arm64
+test vcode
+target aarch64

 function %f(i8, i64, i64) -> i64 {
 block0(v0: i8, v1: i64, v2: i64):
--- a/cranelift/filetests/filetests/vcode/aarch64/constants.clif
+++ b/cranelift/filetests/filetests/vcode/aarch64/constants.clif
@@ -1,4 +1,5 @@
-test vcode arch=arm64
+test vcode
+target aarch64

 function %f() -> i64 {
 block0:
--- a/cranelift/filetests/filetests/vcode/aarch64/extend-op.clif
+++ b/cranelift/filetests/filetests/vcode/aarch64/extend-op.clif
@@ -1,4 +1,5 @@
-test vcode arch=arm64
+test vcode
+target aarch64

 function %f(i8) -> i64 {
 block0(v0: i8):
--- a/cranelift/filetests/filetests/vcode/aarch64/jumptable.clif
+++ b/cranelift/filetests/filetests/vcode/aarch64/jumptable.clif
@@ -1,4 +1,5 @@
-test vcode arch=arm64
+test vcode
+target aarch64

 function %f(i64) -> i64 {
  jt0 = jump_table [block1, block2, block3]
--- a/cranelift/filetests/filetests/vcode/aarch64/narrow-arithmetic.clif
+++ b/cranelift/filetests/filetests/vcode/aarch64/narrow-arithmetic.clif
@@ -1,4 +1,5 @@
-test vcode arch=arm64
+test vcode
+target aarch64

 function %add8(i8, i8) -> i8 {
 block0(v0: i8, v1: i8):
--- a/cranelift/filetests/filetests/vcode/aarch64/saturating-ops.clif
+++ b/cranelift/filetests/filetests/vcode/aarch64/saturating-ops.clif
@@ -1,4 +1,5 @@
-test vcode arch=arm64
+test vcode
+target aarch64

 function %uaddsat64(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
--- a/cranelift/filetests/filetests/vcode/aarch64/shift-op.clif
+++ b/cranelift/filetests/filetests/vcode/aarch64/shift-op.clif
@@ -1,4 +1,5 @@
-test vcode arch=arm64
+test vcode
+target aarch64

 function %f(i64) -> i64 {
 block0(v0: i64):
--- a/cranelift/filetests/filetests/vcode/aarch64/shift-rotate.clif
+++ b/cranelift/filetests/filetests/vcode/aarch64/shift-rotate.clif
@@ -1,4 +1,5 @@
-test vcode arch=arm64
+test vcode
+target aarch64

 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; ROR, variable
--- a/cranelift/filetests/filetests/vcode/aarch64/symbol-value.clif
+++ b/cranelift/filetests/filetests/vcode/aarch64/symbol-value.clif
@@ -1,4 +1,5 @@
-test vcode arch=arm64
+test vcode
+target aarch64

 function %f() -> i64 {
  gv0 = symbol %my_global
--- a/cranelift/filetests/filetests/vcode/aarch64/traps.clif
+++ b/cranelift/filetests/filetests/vcode/aarch64/traps.clif
@@ -1,4 +1,5 @@
-test vcode arch=arm64
+test vcode
+target aarch64

 function %f() {
 block0:
--- a/cranelift/filetests/filetests/vcode/aarch64/uextend-sextend.clif
+++ b/cranelift/filetests/filetests/vcode/aarch64/uextend-sextend.clif
@@ -1,4 +1,5 @@
-test vcode arch=arm64
+test vcode
+target aarch64

 function %f_u_8_64(i8) -> i64 {
 block0(v0: i8):
--- a/cranelift/filetests/src/test_vcode.rs
+++ b/cranelift/filetests/src/test_vcode.rs
@@ -4,11 +4,9 @@ use cranelift_codegen::isa::lookup;
 use cranelift_codegen::settings;
 use cranelift_codegen::Context as CodegenContext;
 use cranelift_reader::{TestCommand, TestOption};
-use target_lexicon::Triple;

 use log::info;
 use std::borrow::Cow;
-use std::str::FromStr;
 use std::string::String;

 struct TestVCode {
@@ -41,15 +39,13 @@ impl SubTest for TestVCode {
    }

    fn needs_isa(&self) -> bool {
-        false
+        true
    }

    fn run(&self, func: Cow<Function>, context: &Context) -> SubtestResult<()> {
+        let triple = context.isa.unwrap().triple().clone();
        let func = func.into_owned();

-        let triple =
-            Triple::from_str(&self.arch).map_err(|_| format!("Unknown arch: '{}'", self.arch))?;
-
        let mut isa = lookup(triple)
            .map_err(|_| format!("Could not look up backend for arch '{}'", self.arch))?
            .finish(settings::Flags::new(settings::builder()));
--- a/crates/jit/src/link.rs
+++ b/crates/jit/src/link.rs
@@ -142,12 +142,17 @@ cfg_if::cfg_if! {
            pub fn ___chkstk();
        }
        const PROBESTACK: unsafe extern "C" fn() = ___chkstk;
+    } else if #[cfg(not(any(target_arch = "x86_64", target_arch = "x86")))] {
+        // As per
+        // https://github.com/rust-lang/compiler-builtins/blob/cae3e6ea23739166504f9f9fb50ec070097979d4/src/probestack.rs#L39,
+        // LLVM only has stack-probe support on x86-64 and x86. Thus, on any other CPU
+        // architecture, we simply use an empty stack-probe function.
+        extern "C" fn empty_probestack() {}
+        const PROBESTACK: unsafe extern "C" fn() = empty_probestack;
    } else {
        extern "C" {
            pub fn __rust_probestack();
        }
-        static PROBESTACK: unsafe extern "C" fn() = empty_probestack;
+        static PROBESTACK: unsafe extern "C" fn() = __rust_probestack;
    }
 }
-
-extern "C" fn empty_probestack() {}
--- a/crates/runtime/src/traphandlers.rs
+++ b/crates/runtime/src/traphandlers.rs
@@ -31,7 +31,6 @@ cfg_if::cfg_if! {
        static mut PREV_SIGBUS: MaybeUninit<libc::sigaction> = MaybeUninit::uninit();
        static mut PREV_SIGILL: MaybeUninit<libc::sigaction> = MaybeUninit::uninit();
        static mut PREV_SIGFPE: MaybeUninit<libc::sigaction> = MaybeUninit::uninit();
-        static mut PREV_SIGTRAP: MaybeUninit<libc::sigaction> = MaybeUninit::uninit();

        unsafe fn platform_init() {
            let register = |slot: &mut MaybeUninit<libc::sigaction>, signal: i32| {
@@ -71,9 +70,6 @@ cfg_if::cfg_if! {
                register(&mut PREV_SIGFPE, libc::SIGFPE);
            }

-            // on ARM64, we use `brk` to report traps, which generates SIGTRAP.
-            register(&mut PREV_SIGTRAP, libc::SIGTRAP);
-
            // On ARM, handle Unaligned Accesses.
            // On Darwin, guard page accesses are raised as SIGBUS.
            if cfg!(target_arch = "arm") || cfg!(target_os = "macos") {
@@ -91,7 +87,6 @@ cfg_if::cfg_if! {
                libc::SIGBUS => &PREV_SIGBUS,
                libc::SIGFPE => &PREV_SIGFPE,
                libc::SIGILL => &PREV_SIGILL,
-                libc::SIGTRAP => &PREV_SIGTRAP,
                _ => panic!("unknown signal: {}", signum),
            };
            let handled = tls::with(|info| {
--- a/tests/custom_signal_handler.rs
+++ b/tests/custom_signal_handler.rs
@@ -122,7 +122,7 @@ mod tests {
                .downcast::<Trap>()?;
            assert!(
                trap.message()
-                    .starts_with("wasm trap: out of bounds"),
+                    .starts_with("wasm trap: out of bounds memory access"),
                "bad trap message: {:?}",
                trap.message()
            );
@@ -149,7 +149,7 @@ mod tests {
                .downcast::<Trap>()?;
            assert!(trap
                .message()
-                .starts_with("wasm trap: out of bounds"));
+                .starts_with("wasm trap: out of bounds memory access"));
        }
        Ok(())
    }