diff --git a/cranelift/codegen/src/isa/arm64/inst/args.rs b/cranelift/codegen/src/isa/arm64/inst/args.rs
new file mode 100644
index 0000000000..75cf12283b
--- /dev/null
+++ b/cranelift/codegen/src/isa/arm64/inst/args.rs
@@ -0,0 +1,501 @@
+//! ARM64 ISA definitions: instruction arguments.
+
+#![allow(dead_code)]
+#![allow(non_snake_case)]
+
+use crate::binemit::{CodeOffset, CodeSink};
+use crate::ir::constant::{ConstantData, ConstantOffset};
+use crate::ir::Type;
+use crate::isa::arm64::inst::*;
+use crate::machinst::*;
+
+use regalloc::{
+    RealReg, RealRegUniverse, Reg, RegClass, RegClassInfo, SpillSlot, VirtualReg, Writable,
+    NUM_REG_CLASSES,
+};
+
+use std::string::{String, ToString};
+
+/// A shift operator for a register or immediate.
+#[derive(Clone, Copy, Debug)]
+pub enum ShiftOp {
+    ASR,
+    LSR,
+    LSL,
+    ROR,
+}
+
+impl ShiftOp {
+    /// Get the encoding of this shift op.
+    pub fn bits(&self) -> u8 {
+        match self {
+            &ShiftOp::LSL => 0b00,
+            &ShiftOp::LSR => 0b01,
+            &ShiftOp::ASR => 0b10,
+            &ShiftOp::ROR => 0b11,
+        }
+    }
+}
+
+/// A shift operator with an amount, guaranteed to be within range.
+#[derive(Clone, Debug)]
+pub struct ShiftOpAndAmt {
+    op: ShiftOp,
+    shift: ShiftOpShiftImm,
+}
+
+/// A shift operator amount.
+#[derive(Clone, Copy, Debug)]
+pub struct ShiftOpShiftImm(u8);
+
+impl ShiftOpShiftImm {
+    /// Maximum shift for shifted-register operands.
+    pub const MAX_SHIFT: u64 = 63;
+
+    /// Create a new shiftop shift amount, if possible.
+    pub fn maybe_from_shift(shift: u64) -> Option<ShiftOpShiftImm> {
+        if shift <= Self::MAX_SHIFT {
+            Some(ShiftOpShiftImm(shift as u8))
+        } else {
+            None
+        }
+    }
+
+    /// Return the shift amount.
+    pub fn value(&self) -> u8 {
+        self.0
+    }
+}
+
+impl ShiftOpAndAmt {
+    pub fn new(op: ShiftOp, shift: ShiftOpShiftImm) -> ShiftOpAndAmt {
+        ShiftOpAndAmt { op, shift }
+    }
+
+    /// Get the shift op.
+    pub fn op(&self) -> ShiftOp {
+        self.op.clone()
+    }
+
+    /// Get the shift amount.
+    pub fn amt(&self) -> ShiftOpShiftImm {
+        self.shift
+    }
+}
+
+/// An extend operator for a register.
+#[derive(Clone, Copy, Debug)]
+pub enum ExtendOp {
+    SXTB,
+    SXTH,
+    SXTW,
+    SXTX,
+    UXTB,
+    UXTH,
+    UXTW,
+    UXTX,
+}
+
+impl ExtendOp {
+    /// Encoding of this op.
+    pub fn bits(&self) -> u8 {
+        match self {
+            &ExtendOp::UXTB => 0b000,
+            &ExtendOp::UXTH => 0b001,
+            &ExtendOp::UXTW => 0b010,
+            &ExtendOp::UXTX => 0b011,
+            &ExtendOp::SXTB => 0b100,
+            &ExtendOp::SXTH => 0b101,
+            &ExtendOp::SXTW => 0b110,
+            &ExtendOp::SXTX => 0b111,
+        }
+    }
+}
+
+//=============================================================================
+// Instruction sub-components (memory addresses): definitions
+
+/// A reference to some memory address.
+#[derive(Clone, Debug)]
+pub enum MemLabel {
+    /// An address in the code, a constant pool or jumptable, with relative
+    /// offset from this instruction. This form must be used at emission time;
+    /// see `memlabel_finalize()` for how other forms are lowered to this one.
+    PCRel(i32),
+}
+
+/// A memory argument to load/store, encapsulating the possible addressing modes.
+#[derive(Clone, Debug)]
+pub enum MemArg {
+    Label(MemLabel),
+    PostIndexed(Writable<Reg>, SImm9),
+    PreIndexed(Writable<Reg>, SImm9),
+    // N.B.: RegReg, RegScaled, and RegScaledExtended all correspond to
+    // what the ISA calls the "register offset" addressing mode. We split out
+    // several options here for more ergonomic codegen.
+    RegReg(Reg, Reg),
+    RegScaled(Reg, Reg, Type),
+    RegScaledExtended(Reg, Reg, Type, ExtendOp),
+    Unscaled(Reg, SImm9),
+    UnsignedOffset(Reg, UImm12Scaled),
+    /// Offset from the stack pointer or frame pointer.
+    SPOffset(i64),
+    FPOffset(i64),
+}
+
+impl MemArg {
+    /// Memory reference using an address in a register.
+    pub fn reg(reg: Reg) -> MemArg {
+        // Use UnsignedOffset rather than Unscaled to use ldr rather than ldur.
+        // This also does not use PostIndexed / PreIndexed as they update the register.
+        MemArg::UnsignedOffset(reg, UImm12Scaled::zero(I64))
+    }
+
+    /// Memory reference using an address in a register and an offset, if possible.
+    pub fn reg_maybe_offset(reg: Reg, offset: i64, value_type: Type) -> Option<MemArg> {
+        if offset == 0 {
+            Some(MemArg::Unscaled(reg, SImm9::zero()))
+        } else if let Some(simm9) = SImm9::maybe_from_i64(offset) {
+            Some(MemArg::Unscaled(reg, simm9))
+        } else if let Some(uimm12s) = UImm12Scaled::maybe_from_i64(offset, value_type) {
+            Some(MemArg::UnsignedOffset(reg, uimm12s))
+        } else {
+            None
+        }
+    }
+
+    /// Memory reference using the sum of two registers as an address.
+    pub fn reg_reg(reg1: Reg, reg2: Reg) -> MemArg {
+        MemArg::RegReg(reg1, reg2)
+    }
+
+    /// Memory reference using `reg1 + sizeof(ty) * reg2` as an address.
+    pub fn reg_reg_scaled(reg1: Reg, reg2: Reg, ty: Type) -> MemArg {
+        MemArg::RegScaled(reg1, reg2, ty)
+    }
+
+    /// Memory reference using `reg1 + sizeof(ty) * reg2` as an address.
+    pub fn reg_reg_scaled_extended(reg1: Reg, reg2: Reg, ty: Type, op: ExtendOp) -> MemArg {
+        MemArg::RegScaledExtended(reg1, reg2, ty, op)
+    }
+
+    /// Memory reference to a label: a global function or value, or data in the constant pool.
+    pub fn label(label: MemLabel) -> MemArg {
+        MemArg::Label(label)
+    }
+}
+
+/// A memory argument to a load/store-pair.
+#[derive(Clone, Debug)]
+pub enum PairMemArg {
+    SignedOffset(Reg, SImm7Scaled),
+    PreIndexed(Writable<Reg>, SImm7Scaled),
+    PostIndexed(Writable<Reg>, SImm7Scaled),
+}
+
+//=============================================================================
+// Instruction sub-components (conditions, branches and branch targets):
+// definitions
+
+/// Condition for conditional branches.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum Cond {
+    Eq,
+    Ne,
+    Hs,
+    Lo,
+    Mi,
+    Pl,
+    Vs,
+    Vc,
+    Hi,
+    Ls,
+    Ge,
+    Lt,
+    Gt,
+    Le,
+    Al,
+    Nv,
+}
+
+impl Cond {
+    /// Return the inverted condition.
+    pub fn invert(self) -> Cond {
+        match self {
+            Cond::Eq => Cond::Ne,
+            Cond::Ne => Cond::Eq,
+            Cond::Hs => Cond::Lo,
+            Cond::Lo => Cond::Hs,
+            Cond::Mi => Cond::Pl,
+            Cond::Pl => Cond::Mi,
+            Cond::Vs => Cond::Vc,
+            Cond::Vc => Cond::Vs,
+            Cond::Hi => Cond::Ls,
+            Cond::Ls => Cond::Hi,
+            Cond::Ge => Cond::Lt,
+            Cond::Lt => Cond::Ge,
+            Cond::Gt => Cond::Le,
+            Cond::Le => Cond::Gt,
+            Cond::Al => Cond::Nv,
+            Cond::Nv => Cond::Al,
+        }
+    }
+
+    /// Return the machine encoding of this condition.
+    pub fn bits(self) -> u32 {
+        match self {
+            Cond::Eq => 0,
+            Cond::Ne => 1,
+            Cond::Hs => 2,
+            Cond::Lo => 3,
+            Cond::Mi => 4,
+            Cond::Pl => 5,
+            Cond::Vs => 6,
+            Cond::Vc => 7,
+            Cond::Hi => 8,
+            Cond::Ls => 9,
+            Cond::Ge => 10,
+            Cond::Lt => 11,
+            Cond::Gt => 12,
+            Cond::Le => 13,
+            Cond::Al => 14,
+            Cond::Nv => 15,
+        }
+    }
+}
+
+/// The kind of conditional branch: the common-case-optimized "reg-is-zero" /
+/// "reg-is-nonzero" variants, or the generic one that tests the machine
+/// condition codes.
+#[derive(Clone, Copy, Debug)]
+pub enum CondBrKind {
+    /// Condition: given register is zero.
+    Zero(Reg),
+    /// Condition: given register is nonzero.
+    NotZero(Reg),
+    /// Condition: the given condition-code test is true.
+    Cond(Cond),
+}
+
+impl CondBrKind {
+    /// Return the inverted branch condition.
+    pub fn invert(self) -> CondBrKind {
+        match self {
+            CondBrKind::Zero(reg) => CondBrKind::NotZero(reg),
+            CondBrKind::NotZero(reg) => CondBrKind::Zero(reg),
+            CondBrKind::Cond(c) => CondBrKind::Cond(c.invert()),
+        }
+    }
+}
+
+/// A branch target. Either unresolved (basic-block index) or resolved (offset
+/// from end of current instruction).
+#[derive(Clone, Copy, Debug)]
+pub enum BranchTarget {
+    /// An unresolved reference to a BlockIndex, as passed into
+    /// `lower_branch_group()`.
+    Block(BlockIndex),
+    /// A resolved reference to another instruction, after
+    /// `Inst::with_block_offsets()`.
+    ResolvedOffset(isize),
+}
+
+impl BranchTarget {
+    /// Lower the branch target given offsets of each block.
+    pub fn lower(&mut self, targets: &[CodeOffset], my_offset: CodeOffset) {
+        match self {
+            &mut BranchTarget::Block(bix) => {
+                let bix = bix as usize;
+                assert!(bix < targets.len());
+                let block_offset_in_func = targets[bix];
+                let branch_offset = (block_offset_in_func as isize) - (my_offset as isize);
+                *self = BranchTarget::ResolvedOffset(branch_offset);
+            }
+            &mut BranchTarget::ResolvedOffset(..) => {}
+        }
+    }
+
+    /// Get the block index.
+    pub fn as_block_index(&self) -> Option<BlockIndex> {
+        match self {
+            &BranchTarget::Block(bix) => Some(bix),
+            _ => None,
+        }
+    }
+
+    /// Get the offset as 4-byte words. Returns `0` if not
+    /// yet resolved (in that case, we're only computing
+    /// size and the offset doesn't matter).
+    pub fn as_offset_words(&self) -> isize {
+        match self {
+            &BranchTarget::ResolvedOffset(off) => off >> 2,
+            _ => 0,
+        }
+    }
+
+    /// Get the offset as a 26-bit offset suitable for a 26-bit jump, or `None` if overflow.
+    pub fn as_off26(&self) -> Option<u32> {
+        let off = self.as_offset_words();
+        if (off < (1 << 25)) && (off >= -(1 << 25)) {
+            Some((off as u32) & ((1 << 26) - 1))
+        } else {
+            None
+        }
+    }
+
+    /// Get the offset as a 16-bit offset, or `None` if overflow.
+    pub fn as_off19(&self) -> Option<u32> {
+        let off = self.as_offset_words();
+        if (off < (1 << 18)) && (off >= -(1 << 18)) {
+            Some((off as u32) & ((1 << 19) - 1))
+        } else {
+            None
+        }
+    }
+
+    /// Map the block index given a transform map.
+    pub fn map(&mut self, block_index_map: &[BlockIndex]) {
+        match self {
+            &mut BranchTarget::Block(ref mut bix) => {
+                let n = block_index_map[*bix as usize];
+                *bix = n;
+            }
+            &mut BranchTarget::ResolvedOffset(_) => {}
+        }
+    }
+}
+
+impl ShowWithRRU for ShiftOpAndAmt {
+    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+        format!("{:?} {}", self.op(), self.amt().value())
+    }
+}
+
+impl ShowWithRRU for ExtendOp {
+    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+        format!("{:?}", self)
+    }
+}
+
+impl ShowWithRRU for MemLabel {
+    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+        match self {
+            &MemLabel::PCRel(off) => format!("pc+{}", off),
+        }
+    }
+}
+
+fn shift_for_type(ty: Type) -> usize {
+    match ty.bytes() {
+        1 => 0,
+        2 => 1,
+        4 => 2,
+        8 => 3,
+        16 => 4,
+        _ => panic!("unknown type"),
+    }
+}
+
+impl ShowWithRRU for MemArg {
+    fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
+        match self {
+            &MemArg::Unscaled(reg, simm9) => {
+                if simm9.value != 0 {
+                    format!("[{}, {}]", reg.show_rru(mb_rru), simm9.show_rru(mb_rru))
+                } else {
+                    format!("[{}]", reg.show_rru(mb_rru))
+                }
+            }
+            &MemArg::UnsignedOffset(reg, uimm12) => {
+                if uimm12.value != 0 {
+                    format!("[{}, {}]", reg.show_rru(mb_rru), uimm12.show_rru(mb_rru))
+                } else {
+                    format!("[{}]", reg.show_rru(mb_rru))
+                }
+            }
+            &MemArg::RegReg(r1, r2) => {
+                format!("[{}, {}]", r1.show_rru(mb_rru), r2.show_rru(mb_rru),)
+            }
+            &MemArg::RegScaled(r1, r2, ty) => {
+                let shift = shift_for_type(ty);
+                format!(
+                    "[{}, {}, LSL #{}]",
+                    r1.show_rru(mb_rru),
+                    r2.show_rru(mb_rru),
+                    shift,
+                )
+            }
+            &MemArg::RegScaledExtended(r1, r2, ty, op) => {
+                let shift = shift_for_type(ty);
+                let is32 = match op {
+                    ExtendOp::SXTW | ExtendOp::UXTW => true,
+                    _ => false,
+                };
+                let op = op.show_rru(mb_rru);
+                format!(
+                    "[{}, {}, {} #{}]",
+                    r1.show_rru(mb_rru),
+                    show_ireg_sized(r2, mb_rru, is32),
+                    op,
+                    shift
+                )
+            }
+            &MemArg::Label(ref label) => label.show_rru(mb_rru),
+            &MemArg::PreIndexed(r, simm9) => format!(
+                "[{}, {}]!",
+                r.to_reg().show_rru(mb_rru),
+                simm9.show_rru(mb_rru)
+            ),
+            &MemArg::PostIndexed(r, simm9) => format!(
+                "[{}], {}",
+                r.to_reg().show_rru(mb_rru),
+                simm9.show_rru(mb_rru)
+            ),
+            // Eliminated by `mem_finalize()`.
+            &MemArg::SPOffset(..) | &MemArg::FPOffset(..) => {
+                panic!("Unexpected stack-offset mem-arg mode!")
+            }
+        }
+    }
+}
+
+impl ShowWithRRU for PairMemArg {
+    fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
+        match self {
+            &PairMemArg::SignedOffset(reg, simm7) => {
+                if simm7.value != 0 {
+                    format!("[{}, {}]", reg.show_rru(mb_rru), simm7.show_rru(mb_rru))
+                } else {
+                    format!("[{}]", reg.show_rru(mb_rru))
+                }
+            }
+            &PairMemArg::PreIndexed(reg, simm7) => format!(
+                "[{}, {}]!",
+                reg.to_reg().show_rru(mb_rru),
+                simm7.show_rru(mb_rru)
+            ),
+            &PairMemArg::PostIndexed(reg, simm7) => format!(
+                "[{}], {}",
+                reg.to_reg().show_rru(mb_rru),
+                simm7.show_rru(mb_rru)
+            ),
+        }
+    }
+}
+
+impl ShowWithRRU for Cond {
+    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+        let mut s = format!("{:?}", self);
+        s.make_ascii_lowercase();
+        s
+    }
+}
+
+impl ShowWithRRU for BranchTarget {
+    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+        match self {
+            &BranchTarget::Block(block) => format!("block{}", block),
+            &BranchTarget::ResolvedOffset(off) => format!("{}", off),
+        }
+    }
+}
diff --git a/cranelift/codegen/src/isa/arm64/inst/emit.rs b/cranelift/codegen/src/isa/arm64/inst/emit.rs
new file mode 100644
index 0000000000..20eefdeaae
--- /dev/null
+++ b/cranelift/codegen/src/isa/arm64/inst/emit.rs
@@ -0,0 +1,4106 @@
+//! ARM64 ISA: binary code emission.
+
+#![allow(dead_code)]
+#![allow(non_snake_case)]
+
+use crate::binemit::{CodeOffset, CodeSink, Reloc};
+use crate::ir::constant::ConstantData;
+use crate::ir::types::*;
+use crate::ir::{Opcode, TrapCode, Type};
+use crate::isa::arm64::inst::*;
+use crate::machinst::*;
+use cranelift_entity::EntityRef;
+
+use std::env;
+
+use regalloc::{
+    RealReg, RealRegUniverse, Reg, RegClass, RegClassInfo, SpillSlot, VirtualReg, Writable,
+    NUM_REG_CLASSES,
+};
+
+use alloc::vec::Vec;
+
+/// Memory label/reference finalization: convert a MemLabel to a PC-relative
+/// offset, possibly emitting relocation(s) as necessary.
+pub fn memlabel_finalize(_insn_off: CodeOffset, label: &MemLabel) -> i32 {
+    match label {
+        &MemLabel::PCRel(rel) => rel,
+    }
+}
+
+/// Memory addressing mode finalization: convert "special" modes (e.g.,
+/// generic arbitrary stack offset) into real addressing modes, possibly by
+/// emitting some helper instructions that come immediately before the use
+/// of this amode.
+pub fn mem_finalize(insn_off: CodeOffset, mem: &MemArg) -> (Vec<Inst>, MemArg) {
+    match mem {
+        &MemArg::SPOffset(off) | &MemArg::FPOffset(off) => {
+            let basereg = match mem {
+                &MemArg::SPOffset(..) => stack_reg(),
+                &MemArg::FPOffset(..) => fp_reg(),
+                _ => unreachable!(),
+            };
+            if let Some(simm9) = SImm9::maybe_from_i64(off) {
+                let mem = MemArg::Unscaled(basereg, simm9);
+                (vec![], mem)
+            } else {
+                let tmp = writable_spilltmp_reg();
+                let mut const_insts = Inst::load_constant(tmp, off as u64);
+                let add_inst = Inst::AluRRR {
+                    alu_op: ALUOp::Add64,
+                    rd: tmp,
+                    rn: tmp.to_reg(),
+                    rm: basereg,
+                };
+                const_insts.push(add_inst);
+                (const_insts.to_vec(), MemArg::reg(tmp.to_reg()))
+            }
+        }
+        &MemArg::Label(ref label) => {
+            let off = memlabel_finalize(insn_off, label);
+            (vec![], MemArg::Label(MemLabel::PCRel(off)))
+        }
+        _ => (vec![], mem.clone()),
+    }
+}
+
+/// Helper: get a ConstantData from a u64.
+pub fn u64_constant(bits: u64) -> ConstantData {
+    let data = [
+        (bits & 0xff) as u8,
+        ((bits >> 8) & 0xff) as u8,
+        ((bits >> 16) & 0xff) as u8,
+        ((bits >> 24) & 0xff) as u8,
+        ((bits >> 32) & 0xff) as u8,
+        ((bits >> 40) & 0xff) as u8,
+        ((bits >> 48) & 0xff) as u8,
+        ((bits >> 56) & 0xff) as u8,
+    ];
+    ConstantData::from(&data[..])
+}
+
+//=============================================================================
+// Instructions and subcomponents: emission
+
+fn machreg_to_gpr(m: Reg) -> u32 {
+    assert!(m.get_class() == RegClass::I64);
+    assert!(m.is_real());
+    m.to_real_reg().get_hw_encoding() as u32
+}
+
+fn machreg_to_vec(m: Reg) -> u32 {
+    assert!(m.get_class() == RegClass::V128);
+    assert!(m.is_real());
+    m.to_real_reg().get_hw_encoding() as u32
+}
+
+fn machreg_to_gpr_or_vec(m: Reg) -> u32 {
+    m.to_real_reg().get_hw_encoding() as u32
+}
+
+fn enc_arith_rrr(bits_31_21: u16, bits_15_10: u8, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32 {
+    ((bits_31_21 as u32) << 21)
+        | ((bits_15_10 as u32) << 10)
+        | machreg_to_gpr(rd.to_reg())
+        | (machreg_to_gpr(rn) << 5)
+        | (machreg_to_gpr(rm) << 16)
+}
+
+fn enc_arith_rr_imm12(bits_31_24: u8, immshift: u8, imm12: u16, rn: Reg, rd: Writable<Reg>) -> u32 {
+    ((bits_31_24 as u32) << 24)
+        | ((immshift as u32) << 22)
+        | ((imm12 as u32) << 10)
+        | (machreg_to_gpr(rn) << 5)
+        | machreg_to_gpr(rd.to_reg())
+}
+
+fn enc_arith_rr_imml(bits_31_23: u16, imm_bits: u16, rn: Reg, rd: Writable<Reg>) -> u32 {
+    ((bits_31_23 as u32) << 23)
+        | ((imm_bits as u32) << 10)
+        | (machreg_to_gpr(rn) << 5)
+        | machreg_to_gpr(rd.to_reg())
+}
+
+fn enc_arith_rrrr(top11: u32, rm: Reg, bit15: u32, ra: Reg, rn: Reg, rd: Writable<Reg>) -> u32 {
+    (top11 << 21)
+        | (machreg_to_gpr(rm) << 16)
+        | (bit15 << 15)
+        | (machreg_to_gpr(ra) << 10)
+        | (machreg_to_gpr(rn) << 5)
+        | machreg_to_gpr(rd.to_reg())
+}
+
+fn enc_jump26(op_31_26: u32, off_26_0: u32) -> u32 {
+    assert!(off_26_0 < (1 << 26));
+    (op_31_26 << 26) | off_26_0
+}
+
+fn enc_cmpbr(op_31_24: u32, off_18_0: u32, reg: Reg) -> u32 {
+    assert!(off_18_0 < (1 << 19));
+    (op_31_24 << 24) | (off_18_0 << 5) | machreg_to_gpr(reg)
+}
+
+fn enc_cbr(op_31_24: u32, off_18_0: u32, op_4: u32, cond: u32) -> u32 {
+    assert!(off_18_0 < (1 << 19));
+    assert!(cond < (1 << 4));
+    (op_31_24 << 24) | (off_18_0 << 5) | (op_4 << 4) | cond
+}
+
+const MOVE_WIDE_FIXED: u32 = 0x92800000;
+
+#[repr(u32)]
+enum MoveWideOpcode {
+    MOVN = 0b00,
+    MOVZ = 0b10,
+    MOVK = 0b11,
+}
+
+fn enc_move_wide(op: MoveWideOpcode, rd: Writable<Reg>, imm: MoveWideConst) -> u32 {
+    assert!(imm.shift <= 0b11);
+    MOVE_WIDE_FIXED
+        | (op as u32) << 29
+        | (imm.shift as u32) << 21
+        | (imm.bits as u32) << 5
+        | machreg_to_gpr(rd.to_reg())
+}
+
+fn enc_ldst_pair(op_31_22: u32, simm7: SImm7Scaled, rn: Reg, rt: Reg, rt2: Reg) -> u32 {
+    (op_31_22 << 22)
+        | (simm7.bits() << 15)
+        | (machreg_to_gpr(rt2) << 10)
+        | (machreg_to_gpr(rn) << 5)
+        | machreg_to_gpr(rt)
+}
+
+fn enc_ldst_simm9(op_31_22: u32, simm9: SImm9, op_11_10: u32, rn: Reg, rd: Reg) -> u32 {
+    (op_31_22 << 22)
+        | (simm9.bits() << 12)
+        | (op_11_10 << 10)
+        | (machreg_to_gpr(rn) << 5)
+        | machreg_to_gpr_or_vec(rd)
+}
+
+fn enc_ldst_uimm12(op_31_22: u32, uimm12: UImm12Scaled, rn: Reg, rd: Reg) -> u32 {
+    (op_31_22 << 22)
+        | (0b1 << 24)
+        | (uimm12.bits() << 10)
+        | (machreg_to_gpr(rn) << 5)
+        | machreg_to_gpr_or_vec(rd)
+}
+
+fn enc_ldst_reg(
+    op_31_22: u32,
+    rn: Reg,
+    rm: Reg,
+    s_bit: bool,
+    extendop: Option<ExtendOp>,
+    rd: Reg,
+) -> u32 {
+    let s_bit = if s_bit { 1 } else { 0 };
+    let extend_bits = match extendop {
+        Some(ExtendOp::UXTW) => 0b010,
+        Some(ExtendOp::SXTW) => 0b110,
+        Some(ExtendOp::SXTX) => 0b111,
+        None => 0b011, /* LSL */
+        _ => panic!("bad extend mode for ld/st MemArg"),
+    };
+    (op_31_22 << 22)
+        | (1 << 21)
+        | (machreg_to_gpr(rm) << 16)
+        | (extend_bits << 13)
+        | (s_bit << 12)
+        | (0b10 << 10)
+        | (machreg_to_gpr(rn) << 5)
+        | machreg_to_gpr_or_vec(rd)
+}
+
+fn enc_ldst_imm19(op_31_24: u32, imm19: u32, rd: Reg) -> u32 {
+    (op_31_24 << 24) | (imm19 << 5) | machreg_to_gpr_or_vec(rd)
+}
+
+fn enc_extend(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
+    (top22 << 10) | (machreg_to_gpr(rn) << 5) | machreg_to_gpr(rd.to_reg())
+}
+
+fn enc_vec_rrr(top11: u32, rm: Reg, bit15_10: u32, rn: Reg, rd: Writable<Reg>) -> u32 {
+    (top11 << 21)
+        | (machreg_to_vec(rm) << 16)
+        | (bit15_10 << 10)
+        | (machreg_to_vec(rn) << 5)
+        | machreg_to_vec(rd.to_reg())
+}
+
+fn enc_bit_rr(size: u32, opcode2: u32, opcode1: u32, rn: Reg, rd: Writable<Reg>) -> u32 {
+    (0b01011010110 << 21)
+        | size << 31
+        | opcode2 << 16
+        | opcode1 << 10
+        | machreg_to_gpr(rn) << 5
+        | machreg_to_gpr(rd.to_reg())
+}
+
+fn enc_br(rn: Reg) -> u32 {
+    0b1101011_0000_11111_000000_00000_00000 | (machreg_to_gpr(rn) << 5)
+}
+
+fn enc_adr(off: i32, rd: Writable<Reg>) -> u32 {
+    let off = off as u32;
+    let immlo = off & 3;
+    let immhi = (off >> 2) & ((1 << 19) - 1);
+    (0b00010000 << 24) | (immlo << 29) | (immhi << 5) | machreg_to_gpr(rd.to_reg())
+}
+
+fn enc_csel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond) -> u32 {
+    0b100_11010100_00000_0000_00_00000_00000
+        | (machreg_to_gpr(rm) << 16)
+        | (machreg_to_gpr(rn) << 5)
+        | machreg_to_gpr(rd.to_reg())
+        | (cond.bits() << 12)
+}
+
+fn enc_fcsel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond, is32: bool) -> u32 {
+    let ty_bit = if is32 { 0 } else { 1 };
+    0b000_11110_00_1_00000_0000_11_00000_00000
+        | (machreg_to_vec(rm) << 16)
+        | (machreg_to_vec(rn) << 5)
+        | machreg_to_vec(rd.to_reg())
+        | (cond.bits() << 12)
+        | (ty_bit << 22)
+}
+
+fn enc_cset(rd: Writable<Reg>, cond: Cond) -> u32 {
+    0b100_11010100_11111_0000_01_11111_00000
+        | machreg_to_gpr(rd.to_reg())
+        | (cond.invert().bits() << 12)
+}
+
+fn enc_vecmov(is_16b: bool, rd: Writable<Reg>, rn: Reg) -> u32 {
+    debug_assert!(!is_16b); // to be supported later.
+    0b00001110_101_00000_00011_1_00000_00000
+        | machreg_to_vec(rd.to_reg())
+        | (machreg_to_vec(rn) << 16)
+        | (machreg_to_vec(rn) << 5)
+}
+
+fn enc_fpurr(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
+    (top22 << 10) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg())
+}
+
+fn enc_fpurrr(top22: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32 {
+    (top22 << 10)
+        | (machreg_to_vec(rm) << 16)
+        | (machreg_to_vec(rn) << 5)
+        | machreg_to_vec(rd.to_reg())
+}
+
+fn enc_fpurrrr(top17: u32, rd: Writable<Reg>, rn: Reg, rm: Reg, ra: Reg) -> u32 {
+    (top17 << 15)
+        | (machreg_to_vec(rm) << 16)
+        | (machreg_to_vec(ra) << 10)
+        | (machreg_to_vec(rn) << 5)
+        | machreg_to_vec(rd.to_reg())
+}
+
+fn enc_fcmp(is32: bool, rn: Reg, rm: Reg) -> u32 {
+    let bits = if is32 {
+        0b000_11110_00_1_00000_00_1000_00000_00000
+    } else {
+        0b000_11110_01_1_00000_00_1000_00000_00000
+    };
+    bits | (machreg_to_vec(rm) << 16) | (machreg_to_vec(rn) << 5)
+}
+
+fn enc_fputoint(top16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
+    (top16 << 16) | (machreg_to_vec(rn) << 5) | machreg_to_gpr(rd.to_reg())
+}
+
+fn enc_inttofpu(top16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
+    (top16 << 16) | (machreg_to_gpr(rn) << 5) | machreg_to_vec(rd.to_reg())
+}
+
+fn enc_fround(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
+    (top22 << 10) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg())
+}
+
+impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
+    fn emit(&self, sink: &mut O) {
+        match self {
+            &Inst::AluRRR { alu_op, rd, rn, rm } => {
+                let top11 = match alu_op {
+                    ALUOp::Add32 => 0b00001011_000,
+                    ALUOp::Add64 => 0b10001011_000,
+                    ALUOp::Sub32 => 0b01001011_000,
+                    ALUOp::Sub64 => 0b11001011_000,
+                    ALUOp::Orr32 => 0b00101010_000,
+                    ALUOp::Orr64 => 0b10101010_000,
+                    ALUOp::And32 => 0b00001010_000,
+                    ALUOp::And64 => 0b10001010_000,
+                    ALUOp::Eor32 => 0b01001010_000,
+                    ALUOp::Eor64 => 0b11001010_000,
+                    ALUOp::OrrNot32 => 0b00101010_001,
+                    ALUOp::OrrNot64 => 0b10101010_001,
+                    ALUOp::AndNot32 => 0b00001010_001,
+                    ALUOp::AndNot64 => 0b10001010_001,
+                    ALUOp::EorNot32 => 0b01001010_001,
+                    ALUOp::EorNot64 => 0b11001010_001,
+                    ALUOp::AddS32 => 0b00101011_000,
+                    ALUOp::AddS64 => 0b10101011_000,
+                    ALUOp::SubS32 => 0b01101011_000,
+                    ALUOp::SubS64 => 0b11101011_000,
+                    ALUOp::SDiv64 => 0b10011010_110,
+                    ALUOp::UDiv64 => 0b10011010_110,
+                    ALUOp::RotR32 | ALUOp::Lsr32 | ALUOp::Asr32 | ALUOp::Lsl32 => 0b00011010_110,
+                    ALUOp::RotR64 | ALUOp::Lsr64 | ALUOp::Asr64 | ALUOp::Lsl64 => 0b10011010_110,
+
+                    ALUOp::MAdd32
+                    | ALUOp::MAdd64
+                    | ALUOp::MSub32
+                    | ALUOp::MSub64
+                    | ALUOp::SMulH
+                    | ALUOp::UMulH => {
+                        //// RRRR ops.
+                        panic!("Bad ALUOp in RRR form!");
+                    }
+                };
+                let bit15_10 = match alu_op {
+                    ALUOp::SDiv64 => 0b000011,
+                    ALUOp::UDiv64 => 0b000010,
+                    ALUOp::RotR32 | ALUOp::RotR64 => 0b001011,
+                    ALUOp::Lsr32 | ALUOp::Lsr64 => 0b001001,
+                    ALUOp::Asr32 | ALUOp::Asr64 => 0b001010,
+                    ALUOp::Lsl32 | ALUOp::Lsl64 => 0b001000,
+                    _ => 0b000000,
+                };
+                assert_ne!(writable_stack_reg(), rd);
+                sink.put4(enc_arith_rrr(top11, bit15_10, rd, rn, rm));
+            }
+            &Inst::AluRRRR {
+                alu_op,
+                rd,
+                rm,
+                rn,
+                ra,
+            } => {
+                let (top11, bit15) = match alu_op {
+                    ALUOp::MAdd32 => (0b0_00_11011_000, 0),
+                    ALUOp::MSub32 => (0b0_00_11011_000, 1),
+                    ALUOp::MAdd64 => (0b1_00_11011_000, 0),
+                    ALUOp::MSub64 => (0b1_00_11011_000, 1),
+                    ALUOp::SMulH => (0b1_00_11011_010, 0),
+                    ALUOp::UMulH => (0b1_00_11011_110, 0),
+                    _ => unimplemented!("{:?}", alu_op),
+                };
+                sink.put4(enc_arith_rrrr(top11, rm, bit15, ra, rn, rd));
+            }
+            &Inst::AluRRImm12 {
+                alu_op,
+                rd,
+                rn,
+                ref imm12,
+            } => {
+                let top8 = match alu_op {
+                    ALUOp::Add32 => 0b000_10001,
+                    ALUOp::Add64 => 0b100_10001,
+                    ALUOp::Sub32 => 0b010_10001,
+                    ALUOp::Sub64 => 0b110_10001,
+                    ALUOp::AddS32 => 0b001_10001,
+                    ALUOp::AddS64 => 0b101_10001,
+                    ALUOp::SubS32 => 0b011_10001,
+                    ALUOp::SubS64 => 0b111_10001,
+                    _ => unimplemented!("{:?}", alu_op),
+                };
+                sink.put4(enc_arith_rr_imm12(
+                    top8,
+                    imm12.shift_bits(),
+                    imm12.imm_bits(),
+                    rn,
+                    rd,
+                ));
+            }
+            &Inst::AluRRImmLogic {
+                alu_op,
+                rd,
+                rn,
+                ref imml,
+            } => {
+                let (top9, inv) = match alu_op {
+                    ALUOp::Orr32 => (0b001_100100, false),
+                    ALUOp::Orr64 => (0b101_100100, false),
+                    ALUOp::And32 => (0b000_100100, false),
+                    ALUOp::And64 => (0b100_100100, false),
+                    ALUOp::Eor32 => (0b010_100100, false),
+                    ALUOp::Eor64 => (0b110_100100, false),
+                    ALUOp::OrrNot32 => (0b001_100100, true),
+                    ALUOp::OrrNot64 => (0b101_100100, true),
+                    ALUOp::AndNot32 => (0b000_100100, true),
+                    ALUOp::AndNot64 => (0b100_100100, true),
+                    ALUOp::EorNot32 => (0b010_100100, true),
+                    ALUOp::EorNot64 => (0b110_100100, true),
+                    _ => unimplemented!("{:?}", alu_op),
+                };
+                let imml = if inv { imml.invert() } else { imml.clone() };
+                sink.put4(enc_arith_rr_imml(top9, imml.enc_bits(), rn, rd));
+            }
+
+            &Inst::AluRRImmShift {
+                alu_op,
+                rd,
+                rn,
+                ref immshift,
+            } => {
+                let amt = immshift.value();
+                let (top10, immr, imms) = match alu_op {
+                    ALUOp::RotR32 => (0b0001001110, machreg_to_gpr(rn), amt as u32),
+                    ALUOp::RotR64 => (0b1001001111, machreg_to_gpr(rn), amt as u32),
+                    ALUOp::Lsr32 => (0b0101001100, amt as u32, 0b011111),
+                    ALUOp::Lsr64 => (0b1101001101, amt as u32, 0b111111),
+                    ALUOp::Asr32 => (0b0001001100, amt as u32, 0b011111),
+                    ALUOp::Asr64 => (0b1001001101, amt as u32, 0b111111),
+                    ALUOp::Lsl32 => (0b0101001100, (32 - amt) as u32, (31 - amt) as u32),
+                    ALUOp::Lsl64 => (0b1101001101, (64 - amt) as u32, (63 - amt) as u32),
+                    _ => unimplemented!("{:?}", alu_op),
+                };
+                sink.put4(
+                    (top10 << 22)
+                        | (immr << 16)
+                        | (imms << 10)
+                        | (machreg_to_gpr(rn) << 5)
+                        | machreg_to_gpr(rd.to_reg()),
+                );
+            }
+
+            &Inst::AluRRRShift {
+                alu_op,
+                rd,
+                rn,
+                rm,
+                ref shiftop,
+            } => {
+                let top11: u16 = match alu_op {
+                    ALUOp::Add32 => 0b000_01011000,
+                    ALUOp::Add64 => 0b100_01011000,
+                    ALUOp::AddS32 => 0b001_01011000,
+                    ALUOp::AddS64 => 0b101_01011000,
+                    ALUOp::Sub32 => 0b010_01011000,
+                    ALUOp::Sub64 => 0b110_01011000,
+                    ALUOp::SubS32 => 0b011_01011000,
+                    ALUOp::SubS64 => 0b111_01011000,
+                    ALUOp::Orr32 => 0b001_01010000,
+                    ALUOp::Orr64 => 0b101_01010000,
+                    ALUOp::And32 => 0b000_01010000,
+                    ALUOp::And64 => 0b100_01010000,
+                    ALUOp::Eor32 => 0b010_01010000,
+                    ALUOp::Eor64 => 0b110_01010000,
+                    ALUOp::OrrNot32 => 0b001_01010001,
+                    ALUOp::OrrNot64 => 0b101_01010001,
+                    ALUOp::EorNot32 => 0b010_01010001,
+                    ALUOp::EorNot64 => 0b110_01010001,
+                    ALUOp::AndNot32 => 0b000_01010001,
+                    ALUOp::AndNot64 => 0b100_01010001,
+                    _ => unimplemented!("{:?}", alu_op),
+                };
+                let top11 = top11 | ((shiftop.op().bits() as u16) << 1);
+                let bits_15_10 = shiftop.amt().value();
+                sink.put4(enc_arith_rrr(top11, bits_15_10, rd, rn, rm));
+            }
+
+            &Inst::AluRRRExtend {
+                alu_op,
+                rd,
+                rn,
+                rm,
+                extendop,
+            } => {
+                let top11 = match alu_op {
+                    ALUOp::Add32 => 0b00001011001,
+                    ALUOp::Add64 => 0b10001011001,
+                    ALUOp::Sub32 => 0b01001011001,
+                    ALUOp::Sub64 => 0b11001011001,
+                    ALUOp::AddS32 => 0b00101011001,
+                    ALUOp::AddS64 => 0b10101011001,
+                    ALUOp::SubS32 => 0b01101011001,
+                    ALUOp::SubS64 => 0b11101011001,
+                    _ => unimplemented!("{:?}", alu_op),
+                };
+                let bits_15_10 = extendop.bits() << 3;
+                sink.put4(enc_arith_rrr(top11, bits_15_10, rd, rn, rm));
+            }
+
+            &Inst::BitRR { op, rd, rn, .. } => {
+                let size = if op.is_32_bit() { 0b0 } else { 0b1 };
+                let (op1, op2) = match op {
+                    BitOp::RBit32 | BitOp::RBit64 => (0b00000, 0b000000),
+                    BitOp::Clz32 | BitOp::Clz64 => (0b00000, 0b000100),
+                    BitOp::Cls32 | BitOp::Cls64 => (0b00000, 0b000101),
+                };
+                sink.put4(enc_bit_rr(size, op1, op2, rn, rd))
+            }
+
+            &Inst::ULoad8 {
+                rd,
+                ref mem,
+                srcloc,
+            }
+            | &Inst::SLoad8 {
+                rd,
+                ref mem,
+                srcloc,
+            }
+            | &Inst::ULoad16 {
+                rd,
+                ref mem,
+                srcloc,
+            }
+            | &Inst::SLoad16 {
+                rd,
+                ref mem,
+                srcloc,
+            }
+            | &Inst::ULoad32 {
+                rd,
+                ref mem,
+                srcloc,
+            }
+            | &Inst::SLoad32 {
+                rd,
+                ref mem,
+                srcloc,
+            }
+            | &Inst::ULoad64 {
+                rd,
+                ref mem,
+                srcloc,
+                ..
+            }
+            | &Inst::FpuLoad32 {
+                rd,
+                ref mem,
+                srcloc,
+            }
+            | &Inst::FpuLoad64 {
+                rd,
+                ref mem,
+                srcloc,
+            }
+            | &Inst::FpuLoad128 {
+                rd,
+                ref mem,
+                srcloc,
+            } => {
+                let (mem_insts, mem) = mem_finalize(sink.cur_offset_from_start(), mem);
+
+                for inst in mem_insts.into_iter() {
+                    inst.emit(sink);
+                }
+
+                // ldst encoding helpers take Reg, not Writable<Reg>.
+                let rd = rd.to_reg();
+
+                // This is the base opcode (top 10 bits) for the "unscaled
+                // immediate" form (Unscaled). Other addressing modes will OR in
+                // other values for bits 24/25 (bits 1/2 of this constant).
+                let op = match self {
+                    &Inst::ULoad8 { .. } => 0b0011100001,
+                    &Inst::SLoad8 { .. } => 0b0011100010,
+                    &Inst::ULoad16 { .. } => 0b0111100001,
+                    &Inst::SLoad16 { .. } => 0b0111100010,
+                    &Inst::ULoad32 { .. } => 0b1011100001,
+                    &Inst::SLoad32 { .. } => 0b1011100010,
+                    &Inst::ULoad64 { .. } => 0b1111100001,
+                    &Inst::FpuLoad32 { .. } => 0b1011110001,
+                    &Inst::FpuLoad64 { .. } => 0b1111110001,
+                    &Inst::FpuLoad128 { .. } => 0b0011110011,
+                    _ => unreachable!(),
+                };
+
+                if let Some(srcloc) = srcloc {
+                    // Register the offset at which the actual load instruction starts.
+                    sink.add_trap(srcloc, TrapCode::OutOfBounds);
+                }
+
+                match &mem {
+                    &MemArg::Unscaled(reg, simm9) => {
+                        sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd));
+                    }
+                    &MemArg::UnsignedOffset(reg, uimm12scaled) => {
+                        sink.put4(enc_ldst_uimm12(op, uimm12scaled, reg, rd));
+                    }
+                    &MemArg::RegReg(r1, r2) => {
+                        sink.put4(enc_ldst_reg(
+                            op, r1, r2, /* scaled = */ false, /* extendop = */ None, rd,
+                        ));
+                    }
+                    &MemArg::RegScaled(r1, r2, ty) | &MemArg::RegScaledExtended(r1, r2, ty, _) => {
+                        match (ty, self) {
+                            (I8, &Inst::ULoad8 { .. }) => {}
+                            (I8, &Inst::SLoad8 { .. }) => {}
+                            (I16, &Inst::ULoad16 { .. }) => {}
+                            (I16, &Inst::SLoad16 { .. }) => {}
+                            (I32, &Inst::ULoad32 { .. }) => {}
+                            (I32, &Inst::SLoad32 { .. }) => {}
+                            (I64, &Inst::ULoad64 { .. }) => {}
+                            (F32, &Inst::FpuLoad32 { .. }) => {}
+                            (F64, &Inst::FpuLoad64 { .. }) => {}
+                            (I128, &Inst::FpuLoad128 { .. }) => {}
+                            _ => panic!("Mismatching reg-scaling type in MemArg"),
+                        }
+                        let extendop = match &mem {
+                            &MemArg::RegScaled(..) => None,
+                            &MemArg::RegScaledExtended(_, _, _, op) => Some(op),
+                            _ => unreachable!(),
+                        };
+                        sink.put4(enc_ldst_reg(
+                            op, r1, r2, /* scaled = */ true, extendop, rd,
+                        ));
+                    }
+                    &MemArg::Label(ref label) => {
+                        let offset = match label {
+                            &MemLabel::PCRel(off) => off as u32,
+                        } / 4;
+                        assert!(offset < (1 << 19));
+                        match self {
+                            &Inst::ULoad32 { .. } => {
+                                sink.put4(enc_ldst_imm19(0b00011000, offset, rd));
+                            }
+                            &Inst::SLoad32 { .. } => {
+                                sink.put4(enc_ldst_imm19(0b10011000, offset, rd));
+                            }
+                            &Inst::FpuLoad32 { .. } => {
+                                sink.put4(enc_ldst_imm19(0b00011100, offset, rd));
+                            }
+                            &Inst::ULoad64 { .. } => {
+                                sink.put4(enc_ldst_imm19(0b01011000, offset, rd));
+                            }
+                            &Inst::FpuLoad64 { .. } => {
+                                sink.put4(enc_ldst_imm19(0b01011100, offset, rd));
+                            }
+                            &Inst::FpuLoad128 { .. } => {
+                                sink.put4(enc_ldst_imm19(0b10011100, offset, rd));
+                            }
+                            _ => panic!("Unspported size for LDR from constant pool!"),
+                        }
+                    }
+                    &MemArg::PreIndexed(reg, simm9) => {
+                        sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg.to_reg(), rd));
+                    }
+                    &MemArg::PostIndexed(reg, simm9) => {
+                        sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg.to_reg(), rd));
+                    }
+                    // Eliminated by `mem_finalize()` above.
+                    &MemArg::SPOffset(..) | &MemArg::FPOffset(..) => {
+                        panic!("Should not see stack-offset here!")
+                    }
+                }
+            }
+
+            &Inst::Store8 {
+                rd,
+                ref mem,
+                srcloc,
+            }
+            | &Inst::Store16 {
+                rd,
+                ref mem,
+                srcloc,
+            }
+            | &Inst::Store32 {
+                rd,
+                ref mem,
+                srcloc,
+            }
+            | &Inst::Store64 {
+                rd,
+                ref mem,
+                srcloc,
+                ..
+            }
+            | &Inst::FpuStore32 {
+                rd,
+                ref mem,
+                srcloc,
+            }
+            | &Inst::FpuStore64 {
+                rd,
+                ref mem,
+                srcloc,
+            }
+            | &Inst::FpuStore128 {
+                rd,
+                ref mem,
+                srcloc,
+            } => {
+                let (mem_insts, mem) = mem_finalize(sink.cur_offset_from_start(), mem);
+
+                for inst in mem_insts.into_iter() {
+                    inst.emit(sink);
+                }
+
+                let op = match self {
+                    &Inst::Store8 { .. } => 0b0011100000,
+                    &Inst::Store16 { .. } => 0b0111100000,
+                    &Inst::Store32 { .. } => 0b1011100000,
+                    &Inst::Store64 { .. } => 0b1111100000,
+                    &Inst::FpuStore32 { .. } => 0b1011110000,
+                    &Inst::FpuStore64 { .. } => 0b1111110000,
+                    &Inst::FpuStore128 { .. } => 0b0011110010,
+                    _ => unreachable!(),
+                };
+
+                if let Some(srcloc) = srcloc {
+                    // Register the offset at which the actual load instruction starts.
+                    sink.add_trap(srcloc, TrapCode::OutOfBounds);
+                }
+
+                match &mem {
+                    &MemArg::Unscaled(reg, simm9) => {
+                        sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd));
+                    }
+                    &MemArg::UnsignedOffset(reg, uimm12scaled) => {
+                        sink.put4(enc_ldst_uimm12(op, uimm12scaled, reg, rd));
+                    }
+                    &MemArg::RegReg(r1, r2) => {
+                        sink.put4(enc_ldst_reg(
+                            op, r1, r2, /* scaled = */ false, /* extendop = */ None, rd,
+                        ));
+                    }
+                    &MemArg::RegScaled(r1, r2, _ty)
+                    | &MemArg::RegScaledExtended(r1, r2, _ty, _) => {
+                        let extendop = match &mem {
+                            &MemArg::RegScaled(..) => None,
+                            &MemArg::RegScaledExtended(_, _, _, op) => Some(op),
+                            _ => unreachable!(),
+                        };
+                        sink.put4(enc_ldst_reg(
+                            op, r1, r2, /* scaled = */ true, extendop, rd,
+                        ));
+                    }
+                    &MemArg::Label(..) => {
+                        panic!("Store to a MemLabel not implemented!");
+                    }
+                    &MemArg::PreIndexed(reg, simm9) => {
+                        sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg.to_reg(), rd));
+                    }
+                    &MemArg::PostIndexed(reg, simm9) => {
+                        sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg.to_reg(), rd));
+                    }
+                    // Eliminated by `mem_finalize()` above.
+                    &MemArg::SPOffset(..) | &MemArg::FPOffset(..) => {
+                        panic!("Should not see stack-offset here!")
+                    }
+                }
+            }
+
+            &Inst::StoreP64 { rt, rt2, ref mem } => match mem {
+                &PairMemArg::SignedOffset(reg, simm7) => {
+                    assert_eq!(simm7.scale_ty, I64);
+                    sink.put4(enc_ldst_pair(0b1010100100, simm7, reg, rt, rt2));
+                }
+                &PairMemArg::PreIndexed(reg, simm7) => {
+                    assert_eq!(simm7.scale_ty, I64);
+                    sink.put4(enc_ldst_pair(0b1010100110, simm7, reg.to_reg(), rt, rt2));
+                }
+                &PairMemArg::PostIndexed(reg, simm7) => {
+                    assert_eq!(simm7.scale_ty, I64);
+                    sink.put4(enc_ldst_pair(0b1010100010, simm7, reg.to_reg(), rt, rt2));
+                }
+            },
+            &Inst::LoadP64 { rt, rt2, ref mem } => {
+                let rt = rt.to_reg();
+                let rt2 = rt2.to_reg();
+                match mem {
+                    &PairMemArg::SignedOffset(reg, simm7) => {
+                        assert_eq!(simm7.scale_ty, I64);
+                        sink.put4(enc_ldst_pair(0b1010100101, simm7, reg, rt, rt2));
+                    }
+                    &PairMemArg::PreIndexed(reg, simm7) => {
+                        assert_eq!(simm7.scale_ty, I64);
+                        sink.put4(enc_ldst_pair(0b1010100111, simm7, reg.to_reg(), rt, rt2));
+                    }
+                    &PairMemArg::PostIndexed(reg, simm7) => {
+                        assert_eq!(simm7.scale_ty, I64);
+                        sink.put4(enc_ldst_pair(0b1010100011, simm7, reg.to_reg(), rt, rt2));
+                    }
+                }
+            }
+            &Inst::Mov { rd, rm } => {
+                assert!(rd.to_reg().get_class() == rm.get_class());
+                assert!(rm.get_class() == RegClass::I64);
+                // Encoded as ORR rd, rm, zero.
+                sink.put4(enc_arith_rrr(0b10101010_000, 0b000_000, rd, zero_reg(), rm));
+            }
+            &Inst::Mov32 { rd, rm } => {
+                // Encoded as ORR rd, rm, zero.
+                sink.put4(enc_arith_rrr(0b00101010_000, 0b000_000, rd, zero_reg(), rm));
+            }
+            &Inst::MovZ { rd, imm } => sink.put4(enc_move_wide(MoveWideOpcode::MOVZ, rd, imm)),
+            &Inst::MovN { rd, imm } => sink.put4(enc_move_wide(MoveWideOpcode::MOVN, rd, imm)),
+            &Inst::MovK { rd, imm } => sink.put4(enc_move_wide(MoveWideOpcode::MOVK, rd, imm)),
+            &Inst::CSel { rd, rn, rm, cond } => {
+                sink.put4(enc_csel(rd, rn, rm, cond));
+            }
+            &Inst::CSet { rd, cond } => {
+                sink.put4(enc_cset(rd, cond));
+            }
+            &Inst::FpuMove64 { rd, rn } => {
+                sink.put4(enc_vecmov(/* 16b = */ false, rd, rn));
+            }
+            &Inst::FpuRR { fpu_op, rd, rn } => {
+                let top22 = match fpu_op {
+                    FPUOp1::Abs32 => 0b000_11110_00_1_000001_10000,
+                    FPUOp1::Abs64 => 0b000_11110_01_1_000001_10000,
+                    FPUOp1::Neg32 => 0b000_11110_00_1_000010_10000,
+                    FPUOp1::Neg64 => 0b000_11110_01_1_000010_10000,
+                    FPUOp1::Sqrt32 => 0b000_11110_00_1_000011_10000,
+                    FPUOp1::Sqrt64 => 0b000_11110_01_1_000011_10000,
+                    FPUOp1::Cvt32To64 => 0b000_11110_00_1_000101_10000,
+                    FPUOp1::Cvt64To32 => 0b000_11110_01_1_000100_10000,
+                };
+                sink.put4(enc_fpurr(top22, rd, rn));
+            }
+            &Inst::FpuRRR { fpu_op, rd, rn, rm } => {
+                let top22 = match fpu_op {
+                    FPUOp2::Add32 => 0b000_11110_00_1_00000_001010,
+                    FPUOp2::Add64 => 0b000_11110_01_1_00000_001010,
+                    FPUOp2::Sub32 => 0b000_11110_00_1_00000_001110,
+                    FPUOp2::Sub64 => 0b000_11110_01_1_00000_001110,
+                    FPUOp2::Mul32 => 0b000_11110_00_1_00000_000010,
+                    FPUOp2::Mul64 => 0b000_11110_01_1_00000_000010,
+                    FPUOp2::Div32 => 0b000_11110_00_1_00000_000110,
+                    FPUOp2::Div64 => 0b000_11110_01_1_00000_000110,
+                    FPUOp2::Max32 => 0b000_11110_00_1_00000_010010,
+                    FPUOp2::Max64 => 0b000_11110_01_1_00000_010010,
+                    FPUOp2::Min32 => 0b000_11110_00_1_00000_010110,
+                    FPUOp2::Min64 => 0b000_11110_01_1_00000_010110,
+                };
+                sink.put4(enc_fpurrr(top22, rd, rn, rm));
+            }
+            &Inst::FpuRRRR {
+                fpu_op,
+                rd,
+                rn,
+                rm,
+                ra,
+            } => {
+                let top17 = match fpu_op {
+                    FPUOp3::MAdd32 => 0b000_11111_00_0_00000_0,
+                    FPUOp3::MAdd64 => 0b000_11111_01_0_00000_0,
+                };
+                sink.put4(enc_fpurrrr(top17, rd, rn, rm, ra));
+            }
+            &Inst::FpuCmp32 { rn, rm } => {
+                sink.put4(enc_fcmp(/* is32 = */ true, rn, rm));
+            }
+            &Inst::FpuCmp64 { rn, rm } => {
+                sink.put4(enc_fcmp(/* is32 = */ false, rn, rm));
+            }
+            &Inst::FpuToInt { op, rd, rn } => {
+                let top16 = match op {
+                    // FCVTZS (32/32-bit)
+                    FpuToIntOp::F32ToI32 => 0b000_11110_00_1_11_000,
+                    // FCVTZU (32/32-bit)
+                    FpuToIntOp::F32ToU32 => 0b000_11110_00_1_11_001,
+                    // FCVTZS (32/64-bit)
+                    FpuToIntOp::F32ToI64 => 0b100_11110_00_1_11_000,
+                    // FCVTZU (32/64-bit)
+                    FpuToIntOp::F32ToU64 => 0b100_11110_00_1_11_001,
+                    // FCVTZS (64/32-bit)
+                    FpuToIntOp::F64ToI32 => 0b000_11110_01_1_11_000,
+                    // FCVTZU (64/32-bit)
+                    FpuToIntOp::F64ToU32 => 0b000_11110_01_1_11_001,
+                    // FCVTZS (64/64-bit)
+                    FpuToIntOp::F64ToI64 => 0b100_11110_01_1_11_000,
+                    // FCVTZU (64/64-bit)
+                    FpuToIntOp::F64ToU64 => 0b100_11110_01_1_11_001,
+                };
+                sink.put4(enc_fputoint(top16, rd, rn));
+            }
+            &Inst::IntToFpu { op, rd, rn } => {
+                let top16 = match op {
+                    // SCVTF (32/32-bit)
+                    IntToFpuOp::I32ToF32 => 0b000_11110_00_1_00_010,
+                    // UCVTF (32/32-bit)
+                    IntToFpuOp::U32ToF32 => 0b000_11110_00_1_00_011,
+                    // SCVTF (64/32-bit)
+                    IntToFpuOp::I64ToF32 => 0b100_11110_00_1_00_010,
+                    // UCVTF (64/32-bit)
+                    IntToFpuOp::U64ToF32 => 0b100_11110_00_1_00_011,
+                    // SCVTF (32/64-bit)
+                    IntToFpuOp::I32ToF64 => 0b000_11110_01_1_00_010,
+                    // UCVTF (32/64-bit)
+                    IntToFpuOp::U32ToF64 => 0b000_11110_01_1_00_011,
+                    // SCVTF (64/64-bit)
+                    IntToFpuOp::I64ToF64 => 0b100_11110_01_1_00_010,
+                    // UCVTF (64/64-bit)
+                    IntToFpuOp::U64ToF64 => 0b100_11110_01_1_00_011,
+                };
+                sink.put4(enc_inttofpu(top16, rd, rn));
+            }
+            &Inst::LoadFpuConst32 { rd, const_data } => {
+                let inst = Inst::FpuLoad32 {
+                    rd,
+                    mem: MemArg::Label(MemLabel::PCRel(8)),
+                    srcloc: None,
+                };
+                inst.emit(sink);
+                let inst = Inst::Jump {
+                    dest: BranchTarget::ResolvedOffset(8),
+                };
+                inst.emit(sink);
+                sink.put4(const_data.to_bits());
+            }
+            &Inst::LoadFpuConst64 { rd, const_data } => {
+                let inst = Inst::FpuLoad64 {
+                    rd,
+                    mem: MemArg::Label(MemLabel::PCRel(8)),
+                    srcloc: None,
+                };
+                inst.emit(sink);
+                let inst = Inst::Jump {
+                    dest: BranchTarget::ResolvedOffset(12),
+                };
+                inst.emit(sink);
+                sink.put8(const_data.to_bits());
+            }
+            &Inst::FpuCSel32 { rd, rn, rm, cond } => {
+                sink.put4(enc_fcsel(rd, rn, rm, cond, /* is32 = */ true));
+            }
+            &Inst::FpuCSel64 { rd, rn, rm, cond } => {
+                sink.put4(enc_fcsel(rd, rn, rm, cond, /* is32 = */ false));
+            }
+            &Inst::FpuRound { op, rd, rn } => {
+                let top22 = match op {
+                    FpuRoundMode::Minus32 => 0b000_11110_00_1_001_010_10000,
+                    FpuRoundMode::Minus64 => 0b000_11110_01_1_001_010_10000,
+                    FpuRoundMode::Plus32 => 0b000_11110_00_1_001_001_10000,
+                    FpuRoundMode::Plus64 => 0b000_11110_01_1_001_001_10000,
+                    FpuRoundMode::Zero32 => 0b000_11110_00_1_001_011_10000,
+                    FpuRoundMode::Zero64 => 0b000_11110_01_1_001_011_10000,
+                    FpuRoundMode::Nearest32 => 0b000_11110_00_1_001_000_10000,
+                    FpuRoundMode::Nearest64 => 0b000_11110_01_1_001_000_10000,
+                };
+                sink.put4(enc_fround(top22, rd, rn));
+            }
+            &Inst::MovToVec64 { rd, rn } => {
+                sink.put4(
+                    0b010_01110000_01000_0_0011_1_00000_00000
+                        | (machreg_to_gpr(rn) << 5)
+                        | machreg_to_vec(rd.to_reg()),
+                );
+            }
+            &Inst::MovFromVec64 { rd, rn } => {
+                sink.put4(
+                    0b010_01110000_01000_0_0111_1_00000_00000
+                        | (machreg_to_vec(rn) << 5)
+                        | machreg_to_gpr(rd.to_reg()),
+                );
+            }
+            &Inst::VecRRR { rd, rn, rm, alu_op } => {
+                let (top11, bit15_10) = match alu_op {
+                    VecALUOp::SQAddScalar => (0b010_11110_11_1, 0b000011),
+                    VecALUOp::SQSubScalar => (0b010_11110_11_1, 0b001011),
+                    VecALUOp::UQAddScalar => (0b011_11110_11_1, 0b000011),
+                    VecALUOp::UQSubScalar => (0b011_11110_11_1, 0b001011),
+                };
+                sink.put4(enc_vec_rrr(top11, rm, bit15_10, rn, rd));
+            }
+            &Inst::MovToNZCV { rn } => {
+                sink.put4(0xd51b4200 | machreg_to_gpr(rn));
+            }
+            &Inst::MovFromNZCV { rd } => {
+                sink.put4(0xd53b4200 | machreg_to_gpr(rd.to_reg()));
+            }
+            &Inst::CondSet { rd, cond } => {
+                sink.put4(
+                    0b100_11010100_11111_0000_01_11111_00000
+                        | (cond.invert().bits() << 12)
+                        | machreg_to_gpr(rd.to_reg()),
+                );
+            }
+            &Inst::Extend {
+                rd,
+                rn,
+                signed,
+                from_bits,
+                to_bits,
+            } if from_bits >= 8 => {
+                let top22 = match (signed, from_bits, to_bits) {
+                    (false, 8, 32) => 0b010_100110_0_000000_000111, // UXTB (32)
+                    (false, 16, 32) => 0b010_100110_0_000000_001111, // UXTH (32)
+                    (true, 8, 32) => 0b000_100110_0_000000_000111,  // SXTB (32)
+                    (true, 16, 32) => 0b000_100110_0_000000_001111, // SXTH (32)
+                    // The 64-bit unsigned variants are the same as the 32-bit ones,
+                    // because writes to Wn zero out the top 32 bits of Xn
+                    (false, 8, 64) => 0b010_100110_0_000000_000111, // UXTB (64)
+                    (false, 16, 64) => 0b010_100110_0_000000_001111, // UXTH (64)
+                    (true, 8, 64) => 0b100_100110_1_000000_000111,  // SXTB (64)
+                    (true, 16, 64) => 0b100_100110_1_000000_001111, // SXTH (64)
+                    // 32-to-64: the unsigned case is a 'mov' (special-cased below).
+                    (false, 32, 64) => 0,                           // MOV
+                    (true, 32, 64) => 0b100_100110_1_000000_011111, // SXTW (64)
+                    _ => panic!(
+                        "Unsupported extend combination: signed = {}, from_bits = {}, to_bits = {}",
+                        signed, from_bits, to_bits
+                    ),
+                };
+                if top22 != 0 {
+                    sink.put4(enc_extend(top22, rd, rn));
+                } else {
+                    Inst::mov32(rd, rn).emit(sink);
+                }
+            }
+            &Inst::Extend {
+                rd,
+                rn,
+                signed,
+                from_bits,
+                to_bits,
+            } if from_bits == 1 && signed => {
+                assert!(to_bits <= 64);
+                // Reduce sign-extend-from-1-bit to:
+                // - and rd, rn, #1
+                // - sub rd, zr, rd
+
+                // We don't have ImmLogic yet, so we just hardcode this. FIXME.
+                sink.put4(0x92400000 | (machreg_to_gpr(rn) << 5) | machreg_to_gpr(rd.to_reg()));
+                let sub_inst = Inst::AluRRR {
+                    alu_op: ALUOp::Sub64,
+                    rd,
+                    rn: zero_reg(),
+                    rm: rd.to_reg(),
+                };
+                sub_inst.emit(sink);
+            }
+            &Inst::Extend {
+                rd,
+                rn,
+                signed,
+                from_bits,
+                to_bits,
+            } if from_bits == 1 && !signed => {
+                assert!(to_bits <= 64);
+                // Reduce zero-extend-from-1-bit to:
+                // - and rd, rn, #1
+
+                // We don't have ImmLogic yet, so we just hardcode this. FIXME.
+                sink.put4(0x92400000 | (machreg_to_gpr(rn) << 5) | machreg_to_gpr(rd.to_reg()));
+            }
+            &Inst::Extend { .. } => {
+                panic!("Unsupported extend variant");
+            }
+            &Inst::Jump { ref dest } => {
+                // TODO: differentiate between as_off26() returning `None` for
+                // out-of-range vs. not-yet-finalized. The latter happens when we
+                // do early (fake) emission for size computation.
+                sink.put4(enc_jump26(0b000101, dest.as_off26().unwrap()));
+            }
+            &Inst::Ret {} => {
+                sink.put4(0xd65f03c0);
+            }
+            &Inst::EpiloguePlaceholder {} => {
+                // Noop; this is just a placeholder for epilogues.
+            }
+            &Inst::Call {
+                ref dest,
+                loc,
+                opcode,
+                ..
+            } => {
+                sink.add_reloc(loc, Reloc::Arm64Call, dest, 0);
+                sink.put4(enc_jump26(0b100101, 0));
+                if opcode.is_call() {
+                    sink.add_call_site(loc, opcode);
+                }
+            }
+            &Inst::CallInd {
+                rn, loc, opcode, ..
+            } => {
+                sink.put4(0b1101011_0001_11111_000000_00000_00000 | (machreg_to_gpr(rn) << 5));
+                if opcode.is_call() {
+                    sink.add_call_site(loc, opcode);
+                }
+            }
+            &Inst::CondBr { .. } => panic!("Unlowered CondBr during binemit!"),
+            &Inst::CondBrLowered { target, kind } => match kind {
+                // TODO: handle >2^19 case by emitting a compound sequence with
+                // an unconditional (26-bit) branch. We need branch-relaxation
+                // adjustment machinery to enable this (because we don't want to
+                // always emit the long form).
+                CondBrKind::Zero(reg) => {
+                    sink.put4(enc_cmpbr(0b1_011010_0, target.as_off19().unwrap(), reg));
+                }
+                CondBrKind::NotZero(reg) => {
+                    sink.put4(enc_cmpbr(0b1_011010_1, target.as_off19().unwrap(), reg));
+                }
+                CondBrKind::Cond(c) => {
+                    sink.put4(enc_cbr(
+                        0b01010100,
+                        target.as_off19().unwrap_or(0),
+                        0b0,
+                        c.bits(),
+                    ));
+                }
+            },
+            &Inst::CondBrLoweredCompound {
+                taken,
+                not_taken,
+                kind,
+            } => {
+                // Conditional part first.
+                match kind {
+                    CondBrKind::Zero(reg) => {
+                        sink.put4(enc_cmpbr(0b1_011010_0, taken.as_off19().unwrap(), reg));
+                    }
+                    CondBrKind::NotZero(reg) => {
+                        sink.put4(enc_cmpbr(0b1_011010_1, taken.as_off19().unwrap(), reg));
+                    }
+                    CondBrKind::Cond(c) => {
+                        sink.put4(enc_cbr(
+                            0b01010100,
+                            taken.as_off19().unwrap_or(0),
+                            0b0,
+                            c.bits(),
+                        ));
+                    }
+                }
+                // Unconditional part.
+                sink.put4(enc_jump26(0b000101, not_taken.as_off26().unwrap_or(0)));
+            }
+            &Inst::IndirectBr { rn, .. } => {
+                sink.put4(enc_br(rn));
+            }
+            &Inst::Nop => {}
+            &Inst::Nop4 => {
+                sink.put4(0xd503201f);
+            }
+            &Inst::Brk => {
+                sink.put4(0xd4200000);
+            }
+            &Inst::Udf { trap_info } => {
+                let (srcloc, code) = trap_info;
+                sink.add_trap(srcloc, code);
+                sink.put4(0xd4a00000);
+            }
+            &Inst::Adr { rd, ref label } => {
+                let off = memlabel_finalize(sink.cur_offset_from_start(), label);
+                assert!(off > -(1 << 20));
+                assert!(off < (1 << 20));
+                sink.put4(enc_adr(off, rd));
+            }
+            &Inst::Word4 { data } => {
+                sink.put4(data);
+            }
+            &Inst::Word8 { data } => {
+                sink.put8(data);
+            }
+            &Inst::JTSequence {
+                ridx,
+                rtmp1,
+                rtmp2,
+                ref targets,
+                ..
+            } => {
+                // This sequence is *one* instruction in the vcode, and is expanded only here at
+                // emission time, because we cannot allow the regalloc to insert spills/reloads in
+                // the middle; we depend on hardcoded PC-rel addressing below.
+                //
+                // N.B.: if PC-rel addressing on ADR below is changed, also update
+                // `Inst::with_block_offsets()` in arm64/inst/mod.rs.
+
+                // Save index in a tmp (the live range of ridx only goes to start of this
+                // sequence; rtmp1 or rtmp2 may overwrite it).
+                let inst = Inst::gen_move(rtmp2, ridx, I64);
+                inst.emit(sink);
+                // Load address of jump table
+                let inst = Inst::Adr {
+                    rd: rtmp1,
+                    label: MemLabel::PCRel(16),
+                };
+                inst.emit(sink);
+                // Load value out of jump table
+                let inst = Inst::SLoad32 {
+                    rd: rtmp2,
+                    mem: MemArg::reg_reg_scaled_extended(
+                        rtmp1.to_reg(),
+                        rtmp2.to_reg(),
+                        I32,
+                        ExtendOp::UXTW,
+                    ),
+                    srcloc: None, // can't cause a user trap.
+                };
+                inst.emit(sink);
+                // Add base of jump table to jump-table-sourced block offset
+                let inst = Inst::AluRRR {
+                    alu_op: ALUOp::Add64,
+                    rd: rtmp1,
+                    rn: rtmp1.to_reg(),
+                    rm: rtmp2.to_reg(),
+                };
+                inst.emit(sink);
+                // Branch to computed address. (`targets` here is only used for successor queries
+                // and is not needed for emission.)
+                let inst = Inst::IndirectBr {
+                    rn: rtmp1.to_reg(),
+                    targets: vec![],
+                };
+                inst.emit(sink);
+                // Emit jump table (table of 32-bit offsets).
+                for target in targets {
+                    let off = target.as_offset_words() * 4;
+                    let off = off as i32 as u32;
+                    sink.put4(off);
+                }
+            }
+            &Inst::LoadConst64 { rd, const_data } => {
+                let inst = Inst::ULoad64 {
+                    rd,
+                    mem: MemArg::Label(MemLabel::PCRel(8)),
+                    srcloc: None, // can't cause a user trap.
+                };
+                inst.emit(sink);
+                let inst = Inst::Jump {
+                    dest: BranchTarget::ResolvedOffset(12),
+                };
+                inst.emit(sink);
+                sink.put8(const_data);
+            }
+            &Inst::LoadExtName {
+                rd,
+                ref name,
+                offset,
+                srcloc,
+            } => {
+                let inst = Inst::ULoad64 {
+                    rd,
+                    mem: MemArg::Label(MemLabel::PCRel(8)),
+                    srcloc: None, // can't cause a user trap.
+                };
+                inst.emit(sink);
+                let inst = Inst::Jump {
+                    dest: BranchTarget::ResolvedOffset(12),
+                };
+                inst.emit(sink);
+                sink.add_reloc(srcloc, Reloc::Abs8, name, offset);
+                sink.put8(0);
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use crate::isa::test_utils;
+
+    #[test]
+    fn test_arm64_binemit() {
+        let mut insns = Vec::<(Inst, &str, &str)>::new();
+
+        // N.B.: the architecture is little-endian, so when transcribing the 32-bit
+        // hex instructions from e.g. objdump disassembly, one must swap the bytes
+        // seen below. (E.g., a `ret` is normally written as the u32 `D65F03C0`,
+        // but we write it here as C0035FD6.)
+
+        // Useful helper script to produce the encodings from the text:
+        //
+        //      #!/bin/sh
+        //      tmp=`mktemp /tmp/XXXXXXXX.o`
+        //      aarch64-linux-gnu-as /dev/stdin -o $tmp
+        //      aarch64-linux-gnu-objdump -d $tmp
+        //      rm -f $tmp
+        //
+        // Then:
+        //
+        //      $ echo "mov x1, x2" | arm64inst.sh
+        insns.push((Inst::Ret {}, "C0035FD6", "ret"));
+        insns.push((Inst::Nop {}, "", "nop-zero-len"));
+        insns.push((Inst::Nop4 {}, "1F2003D5", "nop"));
+        insns.push((
+            Inst::AluRRR {
+                alu_op: ALUOp::Add32,
+                rd: writable_xreg(1),
+                rn: xreg(2),
+                rm: xreg(3),
+            },
+            "4100030B",
+            "add w1, w2, w3",
+        ));
+        insns.push((
+            Inst::AluRRR {
+                alu_op: ALUOp::Add64,
+                rd: writable_xreg(4),
+                rn: xreg(5),
+                rm: xreg(6),
+            },
+            "A400068B",
+            "add x4, x5, x6",
+        ));
+        insns.push((
+            Inst::AluRRR {
+                alu_op: ALUOp::Sub32,
+                rd: writable_xreg(1),
+                rn: xreg(2),
+                rm: xreg(3),
+            },
+            "4100034B",
+            "sub w1, w2, w3",
+        ));
+        insns.push((
+            Inst::AluRRR {
+                alu_op: ALUOp::Sub64,
+                rd: writable_xreg(4),
+                rn: xreg(5),
+                rm: xreg(6),
+            },
+            "A40006CB",
+            "sub x4, x5, x6",
+        ));
+        insns.push((
+            Inst::AluRRR {
+                alu_op: ALUOp::Orr32,
+                rd: writable_xreg(1),
+                rn: xreg(2),
+                rm: xreg(3),
+            },
+            "4100032A",
+            "orr w1, w2, w3",
+        ));
+        insns.push((
+            Inst::AluRRR {
+                alu_op: ALUOp::Orr64,
+                rd: writable_xreg(4),
+                rn: xreg(5),
+                rm: xreg(6),
+            },
+            "A40006AA",
+            "orr x4, x5, x6",
+        ));
+        insns.push((
+            Inst::AluRRR {
+                alu_op: ALUOp::And32,
+                rd: writable_xreg(1),
+                rn: xreg(2),
+                rm: xreg(3),
+            },
+            "4100030A",
+            "and w1, w2, w3",
+        ));
+        insns.push((
+            Inst::AluRRR {
+                alu_op: ALUOp::And64,
+                rd: writable_xreg(4),
+                rn: xreg(5),
+                rm: xreg(6),
+            },
+            "A400068A",
+            "and x4, x5, x6",
+        ));
+        insns.push((
+            Inst::AluRRR {
+                alu_op: ALUOp::SubS32,
+                rd: writable_xreg(1),
+                rn: xreg(2),
+                rm: xreg(3),
+            },
+            "4100036B",
+            "subs w1, w2, w3",
+        ));
+        insns.push((
+            Inst::AluRRR {
+                alu_op: ALUOp::SubS64,
+                rd: writable_xreg(4),
+                rn: xreg(5),
+                rm: xreg(6),
+            },
+            "A40006EB",
+            "subs x4, x5, x6",
+        ));
+        insns.push((
+            Inst::AluRRR {
+                alu_op: ALUOp::AddS32,
+                rd: writable_xreg(1),
+                rn: xreg(2),
+                rm: xreg(3),
+            },
+            "4100032B",
+            "adds w1, w2, w3",
+        ));
+        insns.push((
+            Inst::AluRRR {
+                alu_op: ALUOp::AddS64,
+                rd: writable_xreg(4),
+                rn: xreg(5),
+                rm: xreg(6),
+            },
+            "A40006AB",
+            "adds x4, x5, x6",
+        ));
+        insns.push((
+            Inst::AluRRR {
+                alu_op: ALUOp::SDiv64,
+                rd: writable_xreg(4),
+                rn: xreg(5),
+                rm: xreg(6),
+            },
+            "A40CC69A",
+            "sdiv x4, x5, x6",
+        ));
+        insns.push((
+            Inst::AluRRR {
+                alu_op: ALUOp::UDiv64,
+                rd: writable_xreg(4),
+                rn: xreg(5),
+                rm: xreg(6),
+            },
+            "A408C69A",
+            "udiv x4, x5, x6",
+        ));
+
+        insns.push((
+            Inst::AluRRR {
+                alu_op: ALUOp::Eor32,
+                rd: writable_xreg(4),
+                rn: xreg(5),
+                rm: xreg(6),
+            },
+            "A400064A",
+            "eor w4, w5, w6",
+        ));
+        insns.push((
+            Inst::AluRRR {
+                alu_op: ALUOp::Eor64,
+                rd: writable_xreg(4),
+                rn: xreg(5),
+                rm: xreg(6),
+            },
+            "A40006CA",
+            "eor x4, x5, x6",
+        ));
+        insns.push((
+            Inst::AluRRR {
+                alu_op: ALUOp::AndNot32,
+                rd: writable_xreg(4),
+                rn: xreg(5),
+                rm: xreg(6),
+            },
+            "A400260A",
+            "bic w4, w5, w6",
+        ));
+        insns.push((
+            Inst::AluRRR {
+                alu_op: ALUOp::AndNot64,
+                rd: writable_xreg(4),
+                rn: xreg(5),
+                rm: xreg(6),
+            },
+            "A400268A",
+            "bic x4, x5, x6",
+        ));
+        insns.push((
+            Inst::AluRRR {
+                alu_op: ALUOp::OrrNot32,
+                rd: writable_xreg(4),
+                rn: xreg(5),
+                rm: xreg(6),
+            },
+            "A400262A",
+            "orn w4, w5, w6",
+        ));
+        insns.push((
+            Inst::AluRRR {
+                alu_op: ALUOp::OrrNot64,
+                rd: writable_xreg(4),
+                rn: xreg(5),
+                rm: xreg(6),
+            },
+            "A40026AA",
+            "orn x4, x5, x6",
+        ));
+        insns.push((
+            Inst::AluRRR {
+                alu_op: ALUOp::EorNot32,
+                rd: writable_xreg(4),
+                rn: xreg(5),
+                rm: xreg(6),
+            },
+            "A400264A",
+            "eon w4, w5, w6",
+        ));
+        insns.push((
+            Inst::AluRRR {
+                alu_op: ALUOp::EorNot64,
+                rd: writable_xreg(4),
+                rn: xreg(5),
+                rm: xreg(6),
+            },
+            "A40026CA",
+            "eon x4, x5, x6",
+        ));
+
+        insns.push((
+            Inst::AluRRR {
+                alu_op: ALUOp::RotR32,
+                rd: writable_xreg(4),
+                rn: xreg(5),
+                rm: xreg(6),
+            },
+            "A42CC61A",
+            "ror w4, w5, w6",
+        ));
+        insns.push((
+            Inst::AluRRR {
+                alu_op: ALUOp::RotR64,
+                rd: writable_xreg(4),
+                rn: xreg(5),
+                rm: xreg(6),
+            },
+            "A42CC69A",
+            "ror x4, x5, x6",
+        ));
+        insns.push((
+            Inst::AluRRR {
+                alu_op: ALUOp::Lsr32,
+                rd: writable_xreg(4),
+                rn: xreg(5),
+                rm: xreg(6),
+            },
+            "A424C61A",
+            "lsr w4, w5, w6",
+        ));
+        insns.push((
+            Inst::AluRRR {
+                alu_op: ALUOp::Lsr64,
+                rd: writable_xreg(4),
+                rn: xreg(5),
+                rm: xreg(6),
+            },
+            "A424C69A",
+            "lsr x4, x5, x6",
+        ));
+        insns.push((
+            Inst::AluRRR {
+                alu_op: ALUOp::Asr32,
+                rd: writable_xreg(4),
+                rn: xreg(5),
+                rm: xreg(6),
+            },
+            "A428C61A",
+            "asr w4, w5, w6",
+        ));
+        insns.push((
+            Inst::AluRRR {
+                alu_op: ALUOp::Asr64,
+                rd: writable_xreg(4),
+                rn: xreg(5),
+                rm: xreg(6),
+            },
+            "A428C69A",
+            "asr x4, x5, x6",
+        ));
+        insns.push((
+            Inst::AluRRR {
+                alu_op: ALUOp::Lsl32,
+                rd: writable_xreg(4),
+                rn: xreg(5),
+                rm: xreg(6),
+            },
+            "A420C61A",
+            "lsl w4, w5, w6",
+        ));
+        insns.push((
+            Inst::AluRRR {
+                alu_op: ALUOp::Lsl64,
+                rd: writable_xreg(4),
+                rn: xreg(5),
+                rm: xreg(6),
+            },
+            "A420C69A",
+            "lsl x4, x5, x6",
+        ));
+
+        insns.push((
+            Inst::AluRRImm12 {
+                alu_op: ALUOp::Add32,
+                rd: writable_xreg(7),
+                rn: xreg(8),
+                imm12: Imm12 {
+                    bits: 0x123,
+                    shift12: false,
+                },
+            },
+            "078D0411",
+            "add w7, w8, #291",
+        ));
+        insns.push((
+            Inst::AluRRImm12 {
+                alu_op: ALUOp::Add32,
+                rd: writable_xreg(7),
+                rn: xreg(8),
+                imm12: Imm12 {
+                    bits: 0x123,
+                    shift12: true,
+                },
+            },
+            "078D4411",
+            "add w7, w8, #1191936",
+        ));
+        insns.push((
+            Inst::AluRRImm12 {
+                alu_op: ALUOp::Add64,
+                rd: writable_xreg(7),
+                rn: xreg(8),
+                imm12: Imm12 {
+                    bits: 0x123,
+                    shift12: false,
+                },
+            },
+            "078D0491",
+            "add x7, x8, #291",
+        ));
+        insns.push((
+            Inst::AluRRImm12 {
+                alu_op: ALUOp::Sub32,
+                rd: writable_xreg(7),
+                rn: xreg(8),
+                imm12: Imm12 {
+                    bits: 0x123,
+                    shift12: false,
+                },
+            },
+            "078D0451",
+            "sub w7, w8, #291",
+        ));
+        insns.push((
+            Inst::AluRRImm12 {
+                alu_op: ALUOp::Sub64,
+                rd: writable_xreg(7),
+                rn: xreg(8),
+                imm12: Imm12 {
+                    bits: 0x123,
+                    shift12: false,
+                },
+            },
+            "078D04D1",
+            "sub x7, x8, #291",
+        ));
+        insns.push((
+            Inst::AluRRImm12 {
+                alu_op: ALUOp::SubS32,
+                rd: writable_xreg(7),
+                rn: xreg(8),
+                imm12: Imm12 {
+                    bits: 0x123,
+                    shift12: false,
+                },
+            },
+            "078D0471",
+            "subs w7, w8, #291",
+        ));
+        insns.push((
+            Inst::AluRRImm12 {
+                alu_op: ALUOp::SubS64,
+                rd: writable_xreg(7),
+                rn: xreg(8),
+                imm12: Imm12 {
+                    bits: 0x123,
+                    shift12: false,
+                },
+            },
+            "078D04F1",
+            "subs x7, x8, #291",
+        ));
+
+        insns.push((
+            Inst::AluRRRExtend {
+                alu_op: ALUOp::Add32,
+                rd: writable_xreg(7),
+                rn: xreg(8),
+                rm: xreg(9),
+                extendop: ExtendOp::SXTB,
+            },
+            "0781290B",
+            "add w7, w8, w9, SXTB",
+        ));
+
+        insns.push((
+            Inst::AluRRRExtend {
+                alu_op: ALUOp::Add64,
+                rd: writable_xreg(15),
+                rn: xreg(16),
+                rm: xreg(17),
+                extendop: ExtendOp::UXTB,
+            },
+            "0F02318B",
+            "add x15, x16, x17, UXTB",
+        ));
+
+        insns.push((
+            Inst::AluRRRExtend {
+                alu_op: ALUOp::Sub32,
+                rd: writable_xreg(1),
+                rn: xreg(2),
+                rm: xreg(3),
+                extendop: ExtendOp::SXTH,
+            },
+            "41A0234B",
+            "sub w1, w2, w3, SXTH",
+        ));
+
+        insns.push((
+            Inst::AluRRRExtend {
+                alu_op: ALUOp::Sub64,
+                rd: writable_xreg(20),
+                rn: xreg(21),
+                rm: xreg(22),
+                extendop: ExtendOp::UXTW,
+            },
+            "B44236CB",
+            "sub x20, x21, x22, UXTW",
+        ));
+
+        insns.push((
+            Inst::AluRRRShift {
+                alu_op: ALUOp::Add32,
+                rd: writable_xreg(10),
+                rn: xreg(11),
+                rm: xreg(12),
+                shiftop: ShiftOpAndAmt::new(
+                    ShiftOp::LSL,
+                    ShiftOpShiftImm::maybe_from_shift(20).unwrap(),
+                ),
+            },
+            "6A510C0B",
+            "add w10, w11, w12, LSL 20",
+        ));
+        insns.push((
+            Inst::AluRRRShift {
+                alu_op: ALUOp::Add64,
+                rd: writable_xreg(10),
+                rn: xreg(11),
+                rm: xreg(12),
+                shiftop: ShiftOpAndAmt::new(
+                    ShiftOp::ASR,
+                    ShiftOpShiftImm::maybe_from_shift(42).unwrap(),
+                ),
+            },
+            "6AA98C8B",
+            "add x10, x11, x12, ASR 42",
+        ));
+        insns.push((
+            Inst::AluRRRShift {
+                alu_op: ALUOp::Sub32,
+                rd: writable_xreg(10),
+                rn: xreg(11),
+                rm: xreg(12),
+                shiftop: ShiftOpAndAmt::new(
+                    ShiftOp::LSL,
+                    ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+                ),
+            },
+            "6A5D0C4B",
+            "sub w10, w11, w12, LSL 23",
+        ));
+        insns.push((
+            Inst::AluRRRShift {
+                alu_op: ALUOp::Sub64,
+                rd: writable_xreg(10),
+                rn: xreg(11),
+                rm: xreg(12),
+                shiftop: ShiftOpAndAmt::new(
+                    ShiftOp::LSL,
+                    ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+                ),
+            },
+            "6A5D0CCB",
+            "sub x10, x11, x12, LSL 23",
+        ));
+        insns.push((
+            Inst::AluRRRShift {
+                alu_op: ALUOp::Orr32,
+                rd: writable_xreg(10),
+                rn: xreg(11),
+                rm: xreg(12),
+                shiftop: ShiftOpAndAmt::new(
+                    ShiftOp::LSL,
+                    ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+                ),
+            },
+            "6A5D0C2A",
+            "orr w10, w11, w12, LSL 23",
+        ));
+        insns.push((
+            Inst::AluRRRShift {
+                alu_op: ALUOp::Orr64,
+                rd: writable_xreg(10),
+                rn: xreg(11),
+                rm: xreg(12),
+                shiftop: ShiftOpAndAmt::new(
+                    ShiftOp::LSL,
+                    ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+                ),
+            },
+            "6A5D0CAA",
+            "orr x10, x11, x12, LSL 23",
+        ));
+        insns.push((
+            Inst::AluRRRShift {
+                alu_op: ALUOp::And32,
+                rd: writable_xreg(10),
+                rn: xreg(11),
+                rm: xreg(12),
+                shiftop: ShiftOpAndAmt::new(
+                    ShiftOp::LSL,
+                    ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+                ),
+            },
+            "6A5D0C0A",
+            "and w10, w11, w12, LSL 23",
+        ));
+        insns.push((
+            Inst::AluRRRShift {
+                alu_op: ALUOp::And64,
+                rd: writable_xreg(10),
+                rn: xreg(11),
+                rm: xreg(12),
+                shiftop: ShiftOpAndAmt::new(
+                    ShiftOp::LSL,
+                    ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+                ),
+            },
+            "6A5D0C8A",
+            "and x10, x11, x12, LSL 23",
+        ));
+        insns.push((
+            Inst::AluRRRShift {
+                alu_op: ALUOp::Eor32,
+                rd: writable_xreg(10),
+                rn: xreg(11),
+                rm: xreg(12),
+                shiftop: ShiftOpAndAmt::new(
+                    ShiftOp::LSL,
+                    ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+                ),
+            },
+            "6A5D0C4A",
+            "eor w10, w11, w12, LSL 23",
+        ));
+        insns.push((
+            Inst::AluRRRShift {
+                alu_op: ALUOp::Eor64,
+                rd: writable_xreg(10),
+                rn: xreg(11),
+                rm: xreg(12),
+                shiftop: ShiftOpAndAmt::new(
+                    ShiftOp::LSL,
+                    ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+                ),
+            },
+            "6A5D0CCA",
+            "eor x10, x11, x12, LSL 23",
+        ));
+        insns.push((
+            Inst::AluRRRShift {
+                alu_op: ALUOp::OrrNot32,
+                rd: writable_xreg(10),
+                rn: xreg(11),
+                rm: xreg(12),
+                shiftop: ShiftOpAndAmt::new(
+                    ShiftOp::LSL,
+                    ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+                ),
+            },
+            "6A5D2C2A",
+            "orn w10, w11, w12, LSL 23",
+        ));
+        insns.push((
+            Inst::AluRRRShift {
+                alu_op: ALUOp::OrrNot64,
+                rd: writable_xreg(10),
+                rn: xreg(11),
+                rm: xreg(12),
+                shiftop: ShiftOpAndAmt::new(
+                    ShiftOp::LSL,
+                    ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+                ),
+            },
+            "6A5D2CAA",
+            "orn x10, x11, x12, LSL 23",
+        ));
+        insns.push((
+            Inst::AluRRRShift {
+                alu_op: ALUOp::AndNot32,
+                rd: writable_xreg(10),
+                rn: xreg(11),
+                rm: xreg(12),
+                shiftop: ShiftOpAndAmt::new(
+                    ShiftOp::LSL,
+                    ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+                ),
+            },
+            "6A5D2C0A",
+            "bic w10, w11, w12, LSL 23",
+        ));
+        insns.push((
+            Inst::AluRRRShift {
+                alu_op: ALUOp::AndNot64,
+                rd: writable_xreg(10),
+                rn: xreg(11),
+                rm: xreg(12),
+                shiftop: ShiftOpAndAmt::new(
+                    ShiftOp::LSL,
+                    ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+                ),
+            },
+            "6A5D2C8A",
+            "bic x10, x11, x12, LSL 23",
+        ));
+        insns.push((
+            Inst::AluRRRShift {
+                alu_op: ALUOp::EorNot32,
+                rd: writable_xreg(10),
+                rn: xreg(11),
+                rm: xreg(12),
+                shiftop: ShiftOpAndAmt::new(
+                    ShiftOp::LSL,
+                    ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+                ),
+            },
+            "6A5D2C4A",
+            "eon w10, w11, w12, LSL 23",
+        ));
+        insns.push((
+            Inst::AluRRRShift {
+                alu_op: ALUOp::EorNot64,
+                rd: writable_xreg(10),
+                rn: xreg(11),
+                rm: xreg(12),
+                shiftop: ShiftOpAndAmt::new(
+                    ShiftOp::LSL,
+                    ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+                ),
+            },
+            "6A5D2CCA",
+            "eon x10, x11, x12, LSL 23",
+        ));
+        insns.push((
+            Inst::AluRRRShift {
+                alu_op: ALUOp::AddS32,
+                rd: writable_xreg(10),
+                rn: xreg(11),
+                rm: xreg(12),
+                shiftop: ShiftOpAndAmt::new(
+                    ShiftOp::LSL,
+                    ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+                ),
+            },
+            "6A5D0C2B",
+            "adds w10, w11, w12, LSL 23",
+        ));
+        insns.push((
+            Inst::AluRRRShift {
+                alu_op: ALUOp::AddS64,
+                rd: writable_xreg(10),
+                rn: xreg(11),
+                rm: xreg(12),
+                shiftop: ShiftOpAndAmt::new(
+                    ShiftOp::LSL,
+                    ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+                ),
+            },
+            "6A5D0CAB",
+            "adds x10, x11, x12, LSL 23",
+        ));
+        insns.push((
+            Inst::AluRRRShift {
+                alu_op: ALUOp::SubS32,
+                rd: writable_xreg(10),
+                rn: xreg(11),
+                rm: xreg(12),
+                shiftop: ShiftOpAndAmt::new(
+                    ShiftOp::LSL,
+                    ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+                ),
+            },
+            "6A5D0C6B",
+            "subs w10, w11, w12, LSL 23",
+        ));
+        insns.push((
+            Inst::AluRRRShift {
+                alu_op: ALUOp::SubS64,
+                rd: writable_xreg(10),
+                rn: xreg(11),
+                rm: xreg(12),
+                shiftop: ShiftOpAndAmt::new(
+                    ShiftOp::LSL,
+                    ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+                ),
+            },
+            "6A5D0CEB",
+            "subs x10, x11, x12, LSL 23",
+        ));
+
+        insns.push((
+            Inst::AluRRRR {
+                alu_op: ALUOp::MAdd32,
+                rd: writable_xreg(1),
+                rn: xreg(2),
+                rm: xreg(3),
+                ra: xreg(4),
+            },
+            "4110031B",
+            "madd w1, w2, w3, w4",
+        ));
+        insns.push((
+            Inst::AluRRRR {
+                alu_op: ALUOp::MAdd64,
+                rd: writable_xreg(1),
+                rn: xreg(2),
+                rm: xreg(3),
+                ra: xreg(4),
+            },
+            "4110039B",
+            "madd x1, x2, x3, x4",
+        ));
+        insns.push((
+            Inst::AluRRRR {
+                alu_op: ALUOp::MSub32,
+                rd: writable_xreg(1),
+                rn: xreg(2),
+                rm: xreg(3),
+                ra: xreg(4),
+            },
+            "4190031B",
+            "msub w1, w2, w3, w4",
+        ));
+        insns.push((
+            Inst::AluRRRR {
+                alu_op: ALUOp::MSub64,
+                rd: writable_xreg(1),
+                rn: xreg(2),
+                rm: xreg(3),
+                ra: xreg(4),
+            },
+            "4190039B",
+            "msub x1, x2, x3, x4",
+        ));
+        insns.push((
+            Inst::AluRRRR {
+                alu_op: ALUOp::SMulH,
+                rd: writable_xreg(1),
+                rn: xreg(2),
+                rm: xreg(3),
+                ra: zero_reg(),
+            },
+            "417C439B",
+            "smulh x1, x2, x3",
+        ));
+        insns.push((
+            Inst::AluRRRR {
+                alu_op: ALUOp::UMulH,
+                rd: writable_xreg(1),
+                rn: xreg(2),
+                rm: xreg(3),
+                ra: zero_reg(),
+            },
+            "417CC39B",
+            "umulh x1, x2, x3",
+        ));
+
+        insns.push((
+            Inst::AluRRImmShift {
+                alu_op: ALUOp::RotR32,
+                rd: writable_xreg(20),
+                rn: xreg(21),
+                immshift: ImmShift::maybe_from_u64(19).unwrap(),
+            },
+            "B44E9513",
+            "ror w20, w21, #19",
+        ));
+        insns.push((
+            Inst::AluRRImmShift {
+                alu_op: ALUOp::RotR64,
+                rd: writable_xreg(20),
+                rn: xreg(21),
+                immshift: ImmShift::maybe_from_u64(42).unwrap(),
+            },
+            "B4AAD593",
+            "ror x20, x21, #42",
+        ));
+        insns.push((
+            Inst::AluRRImmShift {
+                alu_op: ALUOp::Lsr32,
+                rd: writable_xreg(10),
+                rn: xreg(11),
+                immshift: ImmShift::maybe_from_u64(13).unwrap(),
+            },
+            "6A7D0D53",
+            "lsr w10, w11, #13",
+        ));
+        insns.push((
+            Inst::AluRRImmShift {
+                alu_op: ALUOp::Lsr64,
+                rd: writable_xreg(10),
+                rn: xreg(11),
+                immshift: ImmShift::maybe_from_u64(57).unwrap(),
+            },
+            "6AFD79D3",
+            "lsr x10, x11, #57",
+        ));
+        insns.push((
+            Inst::AluRRImmShift {
+                alu_op: ALUOp::Asr32,
+                rd: writable_xreg(4),
+                rn: xreg(5),
+                immshift: ImmShift::maybe_from_u64(7).unwrap(),
+            },
+            "A47C0713",
+            "asr w4, w5, #7",
+        ));
+        insns.push((
+            Inst::AluRRImmShift {
+                alu_op: ALUOp::Asr64,
+                rd: writable_xreg(4),
+                rn: xreg(5),
+                immshift: ImmShift::maybe_from_u64(35).unwrap(),
+            },
+            "A4FC6393",
+            "asr x4, x5, #35",
+        ));
+        insns.push((
+            Inst::AluRRImmShift {
+                alu_op: ALUOp::Lsl32,
+                rd: writable_xreg(8),
+                rn: xreg(9),
+                immshift: ImmShift::maybe_from_u64(24).unwrap(),
+            },
+            "281D0853",
+            "lsl w8, w9, #24",
+        ));
+        insns.push((
+            Inst::AluRRImmShift {
+                alu_op: ALUOp::Lsl64,
+                rd: writable_xreg(8),
+                rn: xreg(9),
+                immshift: ImmShift::maybe_from_u64(63).unwrap(),
+            },
+            "280141D3",
+            "lsl x8, x9, #63",
+        ));
+
+        insns.push((
+            Inst::AluRRImmLogic {
+                alu_op: ALUOp::And32,
+                rd: writable_xreg(21),
+                rn: xreg(27),
+                imml: ImmLogic::maybe_from_u64(0x80003fff, I32).unwrap(),
+            },
+            "753B0112",
+            "and w21, w27, #2147500031",
+        ));
+        insns.push((
+            Inst::AluRRImmLogic {
+                alu_op: ALUOp::And64,
+                rd: writable_xreg(7),
+                rn: xreg(6),
+                imml: ImmLogic::maybe_from_u64(0x3fff80003fff800, I64).unwrap(),
+            },
+            "C7381592",
+            "and x7, x6, #288221580125796352",
+        ));
+        insns.push((
+            Inst::AluRRImmLogic {
+                alu_op: ALUOp::Orr32,
+                rd: writable_xreg(1),
+                rn: xreg(5),
+                imml: ImmLogic::maybe_from_u64(0x100000, I32).unwrap(),
+            },
+            "A1000C32",
+            "orr w1, w5, #1048576",
+        ));
+        insns.push((
+            Inst::AluRRImmLogic {
+                alu_op: ALUOp::Orr64,
+                rd: writable_xreg(4),
+                rn: xreg(5),
+                imml: ImmLogic::maybe_from_u64(0x8181818181818181, I64).unwrap(),
+            },
+            "A4C401B2",
+            "orr x4, x5, #9331882296111890817",
+        ));
+        insns.push((
+            Inst::AluRRImmLogic {
+                alu_op: ALUOp::Eor32,
+                rd: writable_xreg(1),
+                rn: xreg(5),
+                imml: ImmLogic::maybe_from_u64(0x00007fff, I32).unwrap(),
+            },
+            "A1380052",
+            "eor w1, w5, #32767",
+        ));
+        insns.push((
+            Inst::AluRRImmLogic {
+                alu_op: ALUOp::Eor64,
+                rd: writable_xreg(10),
+                rn: xreg(8),
+                imml: ImmLogic::maybe_from_u64(0x8181818181818181, I64).unwrap(),
+            },
+            "0AC501D2",
+            "eor x10, x8, #9331882296111890817",
+        ));
+
+        insns.push((
+            Inst::BitRR {
+                op: BitOp::RBit32,
+                rd: writable_xreg(1),
+                rn: xreg(10),
+            },
+            "4101C05A",
+            "rbit w1, w10",
+        ));
+
+        insns.push((
+            Inst::BitRR {
+                op: BitOp::RBit64,
+                rd: writable_xreg(1),
+                rn: xreg(10),
+            },
+            "4101C0DA",
+            "rbit x1, x10",
+        ));
+
+        insns.push((
+            Inst::BitRR {
+                op: BitOp::Clz32,
+                rd: writable_xreg(15),
+                rn: xreg(3),
+            },
+            "6F10C05A",
+            "clz w15, w3",
+        ));
+
+        insns.push((
+            Inst::BitRR {
+                op: BitOp::Clz64,
+                rd: writable_xreg(15),
+                rn: xreg(3),
+            },
+            "6F10C0DA",
+            "clz x15, x3",
+        ));
+
+        insns.push((
+            Inst::BitRR {
+                op: BitOp::Cls32,
+                rd: writable_xreg(21),
+                rn: xreg(16),
+            },
+            "1516C05A",
+            "cls w21, w16",
+        ));
+
+        insns.push((
+            Inst::BitRR {
+                op: BitOp::Cls64,
+                rd: writable_xreg(21),
+                rn: xreg(16),
+            },
+            "1516C0DA",
+            "cls x21, x16",
+        ));
+
+        insns.push((
+            Inst::ULoad8 {
+                rd: writable_xreg(1),
+                mem: MemArg::Unscaled(xreg(2), SImm9::zero()),
+                srcloc: None,
+            },
+            "41004038",
+            "ldurb w1, [x2]",
+        ));
+        insns.push((
+            Inst::ULoad8 {
+                rd: writable_xreg(1),
+                mem: MemArg::UnsignedOffset(xreg(2), UImm12Scaled::zero(I8)),
+                srcloc: None,
+            },
+            "41004039",
+            "ldrb w1, [x2]",
+        ));
+        insns.push((
+            Inst::ULoad8 {
+                rd: writable_xreg(1),
+                mem: MemArg::RegReg(xreg(2), xreg(5)),
+                srcloc: None,
+            },
+            "41686538",
+            "ldrb w1, [x2, x5]",
+        ));
+        insns.push((
+            Inst::SLoad8 {
+                rd: writable_xreg(1),
+                mem: MemArg::Unscaled(xreg(2), SImm9::zero()),
+                srcloc: None,
+            },
+            "41008038",
+            "ldursb x1, [x2]",
+        ));
+        insns.push((
+            Inst::SLoad8 {
+                rd: writable_xreg(1),
+                mem: MemArg::UnsignedOffset(xreg(2), UImm12Scaled::maybe_from_i64(63, I8).unwrap()),
+                srcloc: None,
+            },
+            "41FC8039",
+            "ldrsb x1, [x2, #63]",
+        ));
+        insns.push((
+            Inst::SLoad8 {
+                rd: writable_xreg(1),
+                mem: MemArg::RegReg(xreg(2), xreg(5)),
+                srcloc: None,
+            },
+            "4168A538",
+            "ldrsb x1, [x2, x5]",
+        ));
+        insns.push((
+            Inst::ULoad16 {
+                rd: writable_xreg(1),
+                mem: MemArg::Unscaled(xreg(2), SImm9::maybe_from_i64(5).unwrap()),
+                srcloc: None,
+            },
+            "41504078",
+            "ldurh w1, [x2, #5]",
+        ));
+        insns.push((
+            Inst::ULoad16 {
+                rd: writable_xreg(1),
+                mem: MemArg::UnsignedOffset(xreg(2), UImm12Scaled::maybe_from_i64(8, I16).unwrap()),
+                srcloc: None,
+            },
+            "41104079",
+            "ldrh w1, [x2, #8]",
+        ));
+        insns.push((
+            Inst::ULoad16 {
+                rd: writable_xreg(1),
+                mem: MemArg::RegScaled(xreg(2), xreg(3), I16),
+                srcloc: None,
+            },
+            "41786378",
+            "ldrh w1, [x2, x3, LSL #1]",
+        ));
+        insns.push((
+            Inst::SLoad16 {
+                rd: writable_xreg(1),
+                mem: MemArg::Unscaled(xreg(2), SImm9::zero()),
+                srcloc: None,
+            },
+            "41008078",
+            "ldursh x1, [x2]",
+        ));
+        insns.push((
+            Inst::SLoad16 {
+                rd: writable_xreg(28),
+                mem: MemArg::UnsignedOffset(
+                    xreg(20),
+                    UImm12Scaled::maybe_from_i64(24, I16).unwrap(),
+                ),
+                srcloc: None,
+            },
+            "9C328079",
+            "ldrsh x28, [x20, #24]",
+        ));
+        insns.push((
+            Inst::SLoad16 {
+                rd: writable_xreg(28),
+                mem: MemArg::RegScaled(xreg(20), xreg(20), I16),
+                srcloc: None,
+            },
+            "9C7AB478",
+            "ldrsh x28, [x20, x20, LSL #1]",
+        ));
+        insns.push((
+            Inst::ULoad32 {
+                rd: writable_xreg(1),
+                mem: MemArg::Unscaled(xreg(2), SImm9::zero()),
+                srcloc: None,
+            },
+            "410040B8",
+            "ldur w1, [x2]",
+        ));
+        insns.push((
+            Inst::ULoad32 {
+                rd: writable_xreg(12),
+                mem: MemArg::UnsignedOffset(
+                    xreg(0),
+                    UImm12Scaled::maybe_from_i64(204, I32).unwrap(),
+                ),
+                srcloc: None,
+            },
+            "0CCC40B9",
+            "ldr w12, [x0, #204]",
+        ));
+        insns.push((
+            Inst::ULoad32 {
+                rd: writable_xreg(1),
+                mem: MemArg::RegScaled(xreg(2), xreg(12), I32),
+                srcloc: None,
+            },
+            "41786CB8",
+            "ldr w1, [x2, x12, LSL #2]",
+        ));
+        insns.push((
+            Inst::SLoad32 {
+                rd: writable_xreg(1),
+                mem: MemArg::Unscaled(xreg(2), SImm9::zero()),
+                srcloc: None,
+            },
+            "410080B8",
+            "ldursw x1, [x2]",
+        ));
+        insns.push((
+            Inst::SLoad32 {
+                rd: writable_xreg(12),
+                mem: MemArg::UnsignedOffset(
+                    xreg(1),
+                    UImm12Scaled::maybe_from_i64(16380, I32).unwrap(),
+                ),
+                srcloc: None,
+            },
+            "2CFCBFB9",
+            "ldrsw x12, [x1, #16380]",
+        ));
+        insns.push((
+            Inst::SLoad32 {
+                rd: writable_xreg(1),
+                mem: MemArg::RegScaled(xreg(5), xreg(1), I32),
+                srcloc: None,
+            },
+            "A178A1B8",
+            "ldrsw x1, [x5, x1, LSL #2]",
+        ));
+        insns.push((
+            Inst::ULoad64 {
+                rd: writable_xreg(1),
+                mem: MemArg::Unscaled(xreg(2), SImm9::zero()),
+                srcloc: None,
+            },
+            "410040F8",
+            "ldur x1, [x2]",
+        ));
+        insns.push((
+            Inst::ULoad64 {
+                rd: writable_xreg(1),
+                mem: MemArg::Unscaled(xreg(2), SImm9::maybe_from_i64(-256).unwrap()),
+                srcloc: None,
+            },
+            "410050F8",
+            "ldur x1, [x2, #-256]",
+        ));
+        insns.push((
+            Inst::ULoad64 {
+                rd: writable_xreg(1),
+                mem: MemArg::Unscaled(xreg(2), SImm9::maybe_from_i64(255).unwrap()),
+                srcloc: None,
+            },
+            "41F04FF8",
+            "ldur x1, [x2, #255]",
+        ));
+        insns.push((
+            Inst::ULoad64 {
+                rd: writable_xreg(1),
+                mem: MemArg::UnsignedOffset(
+                    xreg(2),
+                    UImm12Scaled::maybe_from_i64(32760, I64).unwrap(),
+                ),
+                srcloc: None,
+            },
+            "41FC7FF9",
+            "ldr x1, [x2, #32760]",
+        ));
+        insns.push((
+            Inst::ULoad64 {
+                rd: writable_xreg(1),
+                mem: MemArg::RegReg(xreg(2), xreg(3)),
+                srcloc: None,
+            },
+            "416863F8",
+            "ldr x1, [x2, x3]",
+        ));
+        insns.push((
+            Inst::ULoad64 {
+                rd: writable_xreg(1),
+                mem: MemArg::RegScaled(xreg(2), xreg(3), I64),
+                srcloc: None,
+            },
+            "417863F8",
+            "ldr x1, [x2, x3, LSL #3]",
+        ));
+        insns.push((
+            Inst::ULoad64 {
+                rd: writable_xreg(1),
+                mem: MemArg::RegScaledExtended(xreg(2), xreg(3), I64, ExtendOp::SXTW),
+                srcloc: None,
+            },
+            "41D863F8",
+            "ldr x1, [x2, w3, SXTW #3]",
+        ));
+        insns.push((
+            Inst::ULoad64 {
+                rd: writable_xreg(1),
+                mem: MemArg::Label(MemLabel::PCRel(64)),
+                srcloc: None,
+            },
+            "01020058",
+            "ldr x1, pc+64",
+        ));
+        insns.push((
+            Inst::ULoad64 {
+                rd: writable_xreg(1),
+                mem: MemArg::PreIndexed(writable_xreg(2), SImm9::maybe_from_i64(16).unwrap()),
+                srcloc: None,
+            },
+            "410C41F8",
+            "ldr x1, [x2, #16]!",
+        ));
+        insns.push((
+            Inst::ULoad64 {
+                rd: writable_xreg(1),
+                mem: MemArg::PostIndexed(writable_xreg(2), SImm9::maybe_from_i64(16).unwrap()),
+                srcloc: None,
+            },
+            "410441F8",
+            "ldr x1, [x2], #16",
+        ));
+        insns.push((
+            Inst::ULoad64 {
+                rd: writable_xreg(1),
+                mem: MemArg::FPOffset(32768),
+                srcloc: None,
+            },
+            "0F0090D2EF011D8BE10140F9",
+            "movz x15, #32768 ; add x15, x15, fp ; ldr x1, [x15]",
+        ));
+        insns.push((
+            Inst::ULoad64 {
+                rd: writable_xreg(1),
+                mem: MemArg::FPOffset(-32768),
+                srcloc: None,
+            },
+            "EFFF8F92EF011D8BE10140F9",
+            "movn x15, #32767 ; add x15, x15, fp ; ldr x1, [x15]",
+        ));
+        insns.push((
+            Inst::ULoad64 {
+                rd: writable_xreg(1),
+                mem: MemArg::FPOffset(1048576), // 2^20
+                srcloc: None,
+            },
+            "0F02A0D2EF011D8BE10140F9",
+            "movz x15, #16, LSL #16 ; add x15, x15, fp ; ldr x1, [x15]",
+        ));
+        insns.push((
+            Inst::ULoad64 {
+                rd: writable_xreg(1),
+                mem: MemArg::FPOffset(1048576 + 1), // 2^20 + 1
+                srcloc: None,
+            },
+            "2F0080D20F02A0F2EF011D8BE10140F9",
+            "movz x15, #1 ; movk x15, #16, LSL #16 ; add x15, x15, fp ; ldr x1, [x15]",
+        ));
+
+        insns.push((
+            Inst::Store8 {
+                rd: xreg(1),
+                mem: MemArg::Unscaled(xreg(2), SImm9::zero()),
+                srcloc: None,
+            },
+            "41000038",
+            "sturb w1, [x2]",
+        ));
+        insns.push((
+            Inst::Store8 {
+                rd: xreg(1),
+                mem: MemArg::UnsignedOffset(
+                    xreg(2),
+                    UImm12Scaled::maybe_from_i64(4095, I8).unwrap(),
+                ),
+                srcloc: None,
+            },
+            "41FC3F39",
+            "strb w1, [x2, #4095]",
+        ));
+        insns.push((
+            Inst::Store16 {
+                rd: xreg(1),
+                mem: MemArg::Unscaled(xreg(2), SImm9::zero()),
+                srcloc: None,
+            },
+            "41000078",
+            "sturh w1, [x2]",
+        ));
+        insns.push((
+            Inst::Store16 {
+                rd: xreg(1),
+                mem: MemArg::UnsignedOffset(
+                    xreg(2),
+                    UImm12Scaled::maybe_from_i64(8190, I16).unwrap(),
+                ),
+                srcloc: None,
+            },
+            "41FC3F79",
+            "strh w1, [x2, #8190]",
+        ));
+        insns.push((
+            Inst::Store32 {
+                rd: xreg(1),
+                mem: MemArg::Unscaled(xreg(2), SImm9::zero()),
+                srcloc: None,
+            },
+            "410000B8",
+            "stur w1, [x2]",
+        ));
+        insns.push((
+            Inst::Store32 {
+                rd: xreg(1),
+                mem: MemArg::UnsignedOffset(
+                    xreg(2),
+                    UImm12Scaled::maybe_from_i64(16380, I32).unwrap(),
+                ),
+                srcloc: None,
+            },
+            "41FC3FB9",
+            "str w1, [x2, #16380]",
+        ));
+        insns.push((
+            Inst::Store64 {
+                rd: xreg(1),
+                mem: MemArg::Unscaled(xreg(2), SImm9::zero()),
+                srcloc: None,
+            },
+            "410000F8",
+            "stur x1, [x2]",
+        ));
+        insns.push((
+            Inst::Store64 {
+                rd: xreg(1),
+                mem: MemArg::UnsignedOffset(
+                    xreg(2),
+                    UImm12Scaled::maybe_from_i64(32760, I64).unwrap(),
+                ),
+                srcloc: None,
+            },
+            "41FC3FF9",
+            "str x1, [x2, #32760]",
+        ));
+        insns.push((
+            Inst::Store64 {
+                rd: xreg(1),
+                mem: MemArg::RegReg(xreg(2), xreg(3)),
+                srcloc: None,
+            },
+            "416823F8",
+            "str x1, [x2, x3]",
+        ));
+        insns.push((
+            Inst::Store64 {
+                rd: xreg(1),
+                mem: MemArg::RegScaled(xreg(2), xreg(3), I64),
+                srcloc: None,
+            },
+            "417823F8",
+            "str x1, [x2, x3, LSL #3]",
+        ));
+        insns.push((
+            Inst::Store64 {
+                rd: xreg(1),
+                mem: MemArg::RegScaledExtended(xreg(2), xreg(3), I64, ExtendOp::UXTW),
+                srcloc: None,
+            },
+            "415823F8",
+            "str x1, [x2, w3, UXTW #3]",
+        ));
+        insns.push((
+            Inst::Store64 {
+                rd: xreg(1),
+                mem: MemArg::PreIndexed(writable_xreg(2), SImm9::maybe_from_i64(16).unwrap()),
+                srcloc: None,
+            },
+            "410C01F8",
+            "str x1, [x2, #16]!",
+        ));
+        insns.push((
+            Inst::Store64 {
+                rd: xreg(1),
+                mem: MemArg::PostIndexed(writable_xreg(2), SImm9::maybe_from_i64(16).unwrap()),
+                srcloc: None,
+            },
+            "410401F8",
+            "str x1, [x2], #16",
+        ));
+
+        insns.push((
+            Inst::StoreP64 {
+                rt: xreg(8),
+                rt2: xreg(9),
+                mem: PairMemArg::SignedOffset(xreg(10), SImm7Scaled::zero(I64)),
+            },
+            "482500A9",
+            "stp x8, x9, [x10]",
+        ));
+        insns.push((
+            Inst::StoreP64 {
+                rt: xreg(8),
+                rt2: xreg(9),
+                mem: PairMemArg::SignedOffset(
+                    xreg(10),
+                    SImm7Scaled::maybe_from_i64(504, I64).unwrap(),
+                ),
+            },
+            "48A51FA9",
+            "stp x8, x9, [x10, #504]",
+        ));
+        insns.push((
+            Inst::StoreP64 {
+                rt: xreg(8),
+                rt2: xreg(9),
+                mem: PairMemArg::SignedOffset(
+                    xreg(10),
+                    SImm7Scaled::maybe_from_i64(-64, I64).unwrap(),
+                ),
+            },
+            "48253CA9",
+            "stp x8, x9, [x10, #-64]",
+        ));
+        insns.push((
+            Inst::StoreP64 {
+                rt: xreg(21),
+                rt2: xreg(28),
+                mem: PairMemArg::SignedOffset(
+                    xreg(1),
+                    SImm7Scaled::maybe_from_i64(-512, I64).unwrap(),
+                ),
+            },
+            "357020A9",
+            "stp x21, x28, [x1, #-512]",
+        ));
+        insns.push((
+            Inst::StoreP64 {
+                rt: xreg(8),
+                rt2: xreg(9),
+                mem: PairMemArg::PreIndexed(
+                    writable_xreg(10),
+                    SImm7Scaled::maybe_from_i64(-64, I64).unwrap(),
+                ),
+            },
+            "4825BCA9",
+            "stp x8, x9, [x10, #-64]!",
+        ));
+        insns.push((
+            Inst::StoreP64 {
+                rt: xreg(15),
+                rt2: xreg(16),
+                mem: PairMemArg::PostIndexed(
+                    writable_xreg(20),
+                    SImm7Scaled::maybe_from_i64(504, I64).unwrap(),
+                ),
+            },
+            "8FC29FA8",
+            "stp x15, x16, [x20], #504",
+        ));
+
+        insns.push((
+            Inst::LoadP64 {
+                rt: writable_xreg(8),
+                rt2: writable_xreg(9),
+                mem: PairMemArg::SignedOffset(xreg(10), SImm7Scaled::zero(I64)),
+            },
+            "482540A9",
+            "ldp x8, x9, [x10]",
+        ));
+        insns.push((
+            Inst::LoadP64 {
+                rt: writable_xreg(8),
+                rt2: writable_xreg(9),
+                mem: PairMemArg::SignedOffset(
+                    xreg(10),
+                    SImm7Scaled::maybe_from_i64(504, I64).unwrap(),
+                ),
+            },
+            "48A55FA9",
+            "ldp x8, x9, [x10, #504]",
+        ));
+        insns.push((
+            Inst::LoadP64 {
+                rt: writable_xreg(8),
+                rt2: writable_xreg(9),
+                mem: PairMemArg::SignedOffset(
+                    xreg(10),
+                    SImm7Scaled::maybe_from_i64(-64, I64).unwrap(),
+                ),
+            },
+            "48257CA9",
+            "ldp x8, x9, [x10, #-64]",
+        ));
+        insns.push((
+            Inst::LoadP64 {
+                rt: writable_xreg(8),
+                rt2: writable_xreg(9),
+                mem: PairMemArg::SignedOffset(
+                    xreg(10),
+                    SImm7Scaled::maybe_from_i64(-512, I64).unwrap(),
+                ),
+            },
+            "482560A9",
+            "ldp x8, x9, [x10, #-512]",
+        ));
+        insns.push((
+            Inst::LoadP64 {
+                rt: writable_xreg(8),
+                rt2: writable_xreg(9),
+                mem: PairMemArg::PreIndexed(
+                    writable_xreg(10),
+                    SImm7Scaled::maybe_from_i64(-64, I64).unwrap(),
+                ),
+            },
+            "4825FCA9",
+            "ldp x8, x9, [x10, #-64]!",
+        ));
+        insns.push((
+            Inst::LoadP64 {
+                rt: writable_xreg(8),
+                rt2: writable_xreg(25),
+                mem: PairMemArg::PostIndexed(
+                    writable_xreg(12),
+                    SImm7Scaled::maybe_from_i64(504, I64).unwrap(),
+                ),
+            },
+            "88E5DFA8",
+            "ldp x8, x25, [x12], #504",
+        ));
+
+        insns.push((
+            Inst::Mov {
+                rd: writable_xreg(8),
+                rm: xreg(9),
+            },
+            "E80309AA",
+            "mov x8, x9",
+        ));
+        insns.push((
+            Inst::Mov32 {
+                rd: writable_xreg(8),
+                rm: xreg(9),
+            },
+            "E803092A",
+            "mov w8, w9",
+        ));
+
+        insns.push((
+            Inst::MovZ {
+                rd: writable_xreg(8),
+                imm: MoveWideConst::maybe_from_u64(0x0000_0000_0000_ffff).unwrap(),
+            },
+            "E8FF9FD2",
+            "movz x8, #65535",
+        ));
+        insns.push((
+            Inst::MovZ {
+                rd: writable_xreg(8),
+                imm: MoveWideConst::maybe_from_u64(0x0000_0000_ffff_0000).unwrap(),
+            },
+            "E8FFBFD2",
+            "movz x8, #65535, LSL #16",
+        ));
+        insns.push((
+            Inst::MovZ {
+                rd: writable_xreg(8),
+                imm: MoveWideConst::maybe_from_u64(0x0000_ffff_0000_0000).unwrap(),
+            },
+            "E8FFDFD2",
+            "movz x8, #65535, LSL #32",
+        ));
+        insns.push((
+            Inst::MovZ {
+                rd: writable_xreg(8),
+                imm: MoveWideConst::maybe_from_u64(0xffff_0000_0000_0000).unwrap(),
+            },
+            "E8FFFFD2",
+            "movz x8, #65535, LSL #48",
+        ));
+
+        insns.push((
+            Inst::MovN {
+                rd: writable_xreg(8),
+                imm: MoveWideConst::maybe_from_u64(0x0000_0000_0000_ffff).unwrap(),
+            },
+            "E8FF9F92",
+            "movn x8, #65535",
+        ));
+        insns.push((
+            Inst::MovN {
+                rd: writable_xreg(8),
+                imm: MoveWideConst::maybe_from_u64(0x0000_0000_ffff_0000).unwrap(),
+            },
+            "E8FFBF92",
+            "movn x8, #65535, LSL #16",
+        ));
+        insns.push((
+            Inst::MovN {
+                rd: writable_xreg(8),
+                imm: MoveWideConst::maybe_from_u64(0x0000_ffff_0000_0000).unwrap(),
+            },
+            "E8FFDF92",
+            "movn x8, #65535, LSL #32",
+        ));
+        insns.push((
+            Inst::MovN {
+                rd: writable_xreg(8),
+                imm: MoveWideConst::maybe_from_u64(0xffff_0000_0000_0000).unwrap(),
+            },
+            "E8FFFF92",
+            "movn x8, #65535, LSL #48",
+        ));
+
+        insns.push((
+            Inst::MovK {
+                rd: writable_xreg(12),
+                imm: MoveWideConst::maybe_from_u64(0x0000_0000_0000_0000).unwrap(),
+            },
+            "0C0080F2",
+            "movk x12, #0",
+        ));
+        insns.push((
+            Inst::MovK {
+                rd: writable_xreg(19),
+                imm: MoveWideConst::maybe_with_shift(0x0000, 16).unwrap(),
+            },
+            "1300A0F2",
+            "movk x19, #0, LSL #16",
+        ));
+        insns.push((
+            Inst::MovK {
+                rd: writable_xreg(3),
+                imm: MoveWideConst::maybe_from_u64(0x0000_0000_0000_ffff).unwrap(),
+            },
+            "E3FF9FF2",
+            "movk x3, #65535",
+        ));
+        insns.push((
+            Inst::MovK {
+                rd: writable_xreg(8),
+                imm: MoveWideConst::maybe_from_u64(0x0000_0000_ffff_0000).unwrap(),
+            },
+            "E8FFBFF2",
+            "movk x8, #65535, LSL #16",
+        ));
+        insns.push((
+            Inst::MovK {
+                rd: writable_xreg(8),
+                imm: MoveWideConst::maybe_from_u64(0x0000_ffff_0000_0000).unwrap(),
+            },
+            "E8FFDFF2",
+            "movk x8, #65535, LSL #32",
+        ));
+        insns.push((
+            Inst::MovK {
+                rd: writable_xreg(8),
+                imm: MoveWideConst::maybe_from_u64(0xffff_0000_0000_0000).unwrap(),
+            },
+            "E8FFFFF2",
+            "movk x8, #65535, LSL #48",
+        ));
+
+        insns.push((
+            Inst::CSel {
+                rd: writable_xreg(10),
+                rn: xreg(12),
+                rm: xreg(14),
+                cond: Cond::Hs,
+            },
+            "8A218E9A",
+            "csel x10, x12, x14, hs",
+        ));
+        insns.push((
+            Inst::CSet {
+                rd: writable_xreg(15),
+                cond: Cond::Ge,
+            },
+            "EFB79F9A",
+            "cset x15, ge",
+        ));
+        insns.push((
+            Inst::MovToVec64 {
+                rd: writable_vreg(20),
+                rn: xreg(21),
+            },
+            "B41E084E",
+            "mov v20.d[0], x21",
+        ));
+        insns.push((
+            Inst::MovFromVec64 {
+                rd: writable_xreg(21),
+                rn: vreg(20),
+            },
+            "953E084E",
+            "mov x21, v20.d[0]",
+        ));
+        insns.push((
+            Inst::MovToNZCV { rn: xreg(13) },
+            "0D421BD5",
+            "msr nzcv, x13",
+        ));
+        insns.push((
+            Inst::MovFromNZCV {
+                rd: writable_xreg(27),
+            },
+            "1B423BD5",
+            "mrs x27, nzcv",
+        ));
+        insns.push((
+            Inst::CondSet {
+                rd: writable_xreg(5),
+                cond: Cond::Hi,
+            },
+            "E5979F9A",
+            "cset x5, hi",
+        ));
+        insns.push((
+            Inst::VecRRR {
+                rd: writable_vreg(21),
+                rn: vreg(22),
+                rm: vreg(23),
+                alu_op: VecALUOp::UQAddScalar,
+            },
+            "D50EF77E",
+            "uqadd d21, d22, d23",
+        ));
+        insns.push((
+            Inst::VecRRR {
+                rd: writable_vreg(21),
+                rn: vreg(22),
+                rm: vreg(23),
+                alu_op: VecALUOp::SQAddScalar,
+            },
+            "D50EF75E",
+            "sqadd d21, d22, d23",
+        ));
+        insns.push((
+            Inst::VecRRR {
+                rd: writable_vreg(21),
+                rn: vreg(22),
+                rm: vreg(23),
+                alu_op: VecALUOp::UQSubScalar,
+            },
+            "D52EF77E",
+            "uqsub d21, d22, d23",
+        ));
+        insns.push((
+            Inst::VecRRR {
+                rd: writable_vreg(21),
+                rn: vreg(22),
+                rm: vreg(23),
+                alu_op: VecALUOp::SQSubScalar,
+            },
+            "D52EF75E",
+            "sqsub d21, d22, d23",
+        ));
+        insns.push((
+            Inst::Extend {
+                rd: writable_xreg(1),
+                rn: xreg(2),
+                signed: false,
+                from_bits: 8,
+                to_bits: 32,
+            },
+            "411C0053",
+            "uxtb w1, w2",
+        ));
+        insns.push((
+            Inst::Extend {
+                rd: writable_xreg(1),
+                rn: xreg(2),
+                signed: true,
+                from_bits: 8,
+                to_bits: 32,
+            },
+            "411C0013",
+            "sxtb w1, w2",
+        ));
+        insns.push((
+            Inst::Extend {
+                rd: writable_xreg(1),
+                rn: xreg(2),
+                signed: false,
+                from_bits: 16,
+                to_bits: 32,
+            },
+            "413C0053",
+            "uxth w1, w2",
+        ));
+        insns.push((
+            Inst::Extend {
+                rd: writable_xreg(1),
+                rn: xreg(2),
+                signed: true,
+                from_bits: 16,
+                to_bits: 32,
+            },
+            "413C0013",
+            "sxth w1, w2",
+        ));
+        insns.push((
+            Inst::Extend {
+                rd: writable_xreg(1),
+                rn: xreg(2),
+                signed: false,
+                from_bits: 8,
+                to_bits: 64,
+            },
+            "411C0053",
+            "uxtb x1, w2",
+        ));
+        insns.push((
+            Inst::Extend {
+                rd: writable_xreg(1),
+                rn: xreg(2),
+                signed: true,
+                from_bits: 8,
+                to_bits: 64,
+            },
+            "411C4093",
+            "sxtb x1, w2",
+        ));
+        insns.push((
+            Inst::Extend {
+                rd: writable_xreg(1),
+                rn: xreg(2),
+                signed: false,
+                from_bits: 16,
+                to_bits: 64,
+            },
+            "413C0053",
+            "uxth x1, w2",
+        ));
+        insns.push((
+            Inst::Extend {
+                rd: writable_xreg(1),
+                rn: xreg(2),
+                signed: true,
+                from_bits: 16,
+                to_bits: 64,
+            },
+            "413C4093",
+            "sxth x1, w2",
+        ));
+        insns.push((
+            Inst::Extend {
+                rd: writable_xreg(1),
+                rn: xreg(2),
+                signed: false,
+                from_bits: 32,
+                to_bits: 64,
+            },
+            "E103022A",
+            "mov w1, w2",
+        ));
+        insns.push((
+            Inst::Extend {
+                rd: writable_xreg(1),
+                rn: xreg(2),
+                signed: true,
+                from_bits: 32,
+                to_bits: 64,
+            },
+            "417C4093",
+            "sxtw x1, w2",
+        ));
+
+        insns.push((
+            Inst::Jump {
+                dest: BranchTarget::ResolvedOffset(64),
+            },
+            "10000014",
+            "b 64",
+        ));
+
+        insns.push((
+            Inst::CondBrLowered {
+                target: BranchTarget::ResolvedOffset(64),
+                kind: CondBrKind::Zero(xreg(8)),
+            },
+            "080200B4",
+            "cbz x8, 64",
+        ));
+        insns.push((
+            Inst::CondBrLowered {
+                target: BranchTarget::ResolvedOffset(64),
+                kind: CondBrKind::NotZero(xreg(8)),
+            },
+            "080200B5",
+            "cbnz x8, 64",
+        ));
+        insns.push((
+            Inst::CondBrLowered {
+                target: BranchTarget::ResolvedOffset(64),
+                kind: CondBrKind::Cond(Cond::Eq),
+            },
+            "00020054",
+            "b.eq 64",
+        ));
+        insns.push((
+            Inst::CondBrLowered {
+                target: BranchTarget::ResolvedOffset(64),
+                kind: CondBrKind::Cond(Cond::Ne),
+            },
+            "01020054",
+            "b.ne 64",
+        ));
+
+        insns.push((
+            Inst::CondBrLowered {
+                target: BranchTarget::ResolvedOffset(64),
+                kind: CondBrKind::Cond(Cond::Hs),
+            },
+            "02020054",
+            "b.hs 64",
+        ));
+        insns.push((
+            Inst::CondBrLowered {
+                target: BranchTarget::ResolvedOffset(64),
+                kind: CondBrKind::Cond(Cond::Lo),
+            },
+            "03020054",
+            "b.lo 64",
+        ));
+        insns.push((
+            Inst::CondBrLowered {
+                target: BranchTarget::ResolvedOffset(64),
+                kind: CondBrKind::Cond(Cond::Mi),
+            },
+            "04020054",
+            "b.mi 64",
+        ));
+        insns.push((
+            Inst::CondBrLowered {
+                target: BranchTarget::ResolvedOffset(64),
+                kind: CondBrKind::Cond(Cond::Pl),
+            },
+            "05020054",
+            "b.pl 64",
+        ));
+        insns.push((
+            Inst::CondBrLowered {
+                target: BranchTarget::ResolvedOffset(64),
+                kind: CondBrKind::Cond(Cond::Vs),
+            },
+            "06020054",
+            "b.vs 64",
+        ));
+        insns.push((
+            Inst::CondBrLowered {
+                target: BranchTarget::ResolvedOffset(64),
+                kind: CondBrKind::Cond(Cond::Vc),
+            },
+            "07020054",
+            "b.vc 64",
+        ));
+        insns.push((
+            Inst::CondBrLowered {
+                target: BranchTarget::ResolvedOffset(64),
+                kind: CondBrKind::Cond(Cond::Hi),
+            },
+            "08020054",
+            "b.hi 64",
+        ));
+        insns.push((
+            Inst::CondBrLowered {
+                target: BranchTarget::ResolvedOffset(64),
+                kind: CondBrKind::Cond(Cond::Ls),
+            },
+            "09020054",
+            "b.ls 64",
+        ));
+        insns.push((
+            Inst::CondBrLowered {
+                target: BranchTarget::ResolvedOffset(64),
+                kind: CondBrKind::Cond(Cond::Ge),
+            },
+            "0A020054",
+            "b.ge 64",
+        ));
+        insns.push((
+            Inst::CondBrLowered {
+                target: BranchTarget::ResolvedOffset(64),
+                kind: CondBrKind::Cond(Cond::Lt),
+            },
+            "0B020054",
+            "b.lt 64",
+        ));
+        insns.push((
+            Inst::CondBrLowered {
+                target: BranchTarget::ResolvedOffset(64),
+                kind: CondBrKind::Cond(Cond::Gt),
+            },
+            "0C020054",
+            "b.gt 64",
+        ));
+        insns.push((
+            Inst::CondBrLowered {
+                target: BranchTarget::ResolvedOffset(64),
+                kind: CondBrKind::Cond(Cond::Le),
+            },
+            "0D020054",
+            "b.le 64",
+        ));
+        insns.push((
+            Inst::CondBrLowered {
+                target: BranchTarget::ResolvedOffset(64),
+                kind: CondBrKind::Cond(Cond::Al),
+            },
+            "0E020054",
+            "b.al 64",
+        ));
+        insns.push((
+            Inst::CondBrLowered {
+                target: BranchTarget::ResolvedOffset(64),
+                kind: CondBrKind::Cond(Cond::Nv),
+            },
+            "0F020054",
+            "b.nv 64",
+        ));
+
+        insns.push((
+            Inst::CondBrLoweredCompound {
+                taken: BranchTarget::ResolvedOffset(64),
+                not_taken: BranchTarget::ResolvedOffset(128),
+                kind: CondBrKind::Cond(Cond::Le),
+            },
+            "0D02005420000014",
+            "b.le 64 ; b 128",
+        ));
+
+        insns.push((
+            Inst::Call {
+                dest: ExternalName::testcase("test0"),
+                uses: Set::empty(),
+                defs: Set::empty(),
+                loc: SourceLoc::default(),
+                opcode: Opcode::Call,
+            },
+            "00000094",
+            "bl 0",
+        ));
+
+        insns.push((
+            Inst::CallInd {
+                rn: xreg(10),
+                uses: Set::empty(),
+                defs: Set::empty(),
+                loc: SourceLoc::default(),
+                opcode: Opcode::CallIndirect,
+            },
+            "40013FD6",
+            "blr x10",
+        ));
+
+        insns.push((
+            Inst::IndirectBr {
+                rn: xreg(3),
+                targets: vec![1, 2, 3],
+            },
+            "60001FD6",
+            "br x3",
+        ));
+
+        insns.push((Inst::Brk, "000020D4", "brk #0"));
+
+        insns.push((
+            Inst::Adr {
+                rd: writable_xreg(15),
+                label: MemLabel::PCRel((1 << 20) - 4),
+            },
+            "EFFF7F10",
+            "adr x15, pc+1048572",
+        ));
+
+        insns.push((
+            Inst::FpuMove64 {
+                rd: writable_vreg(8),
+                rn: vreg(4),
+            },
+            "881CA40E",
+            "mov v8.8b, v4.8b",
+        ));
+
+        insns.push((
+            Inst::FpuRR {
+                fpu_op: FPUOp1::Abs32,
+                rd: writable_vreg(15),
+                rn: vreg(30),
+            },
+            "CFC3201E",
+            "fabs s15, s30",
+        ));
+
+        insns.push((
+            Inst::FpuRR {
+                fpu_op: FPUOp1::Abs64,
+                rd: writable_vreg(15),
+                rn: vreg(30),
+            },
+            "CFC3601E",
+            "fabs d15, d30",
+        ));
+
+        insns.push((
+            Inst::FpuRR {
+                fpu_op: FPUOp1::Neg32,
+                rd: writable_vreg(15),
+                rn: vreg(30),
+            },
+            "CF43211E",
+            "fneg s15, s30",
+        ));
+
+        insns.push((
+            Inst::FpuRR {
+                fpu_op: FPUOp1::Neg64,
+                rd: writable_vreg(15),
+                rn: vreg(30),
+            },
+            "CF43611E",
+            "fneg d15, d30",
+        ));
+
+        insns.push((
+            Inst::FpuRR {
+                fpu_op: FPUOp1::Sqrt32,
+                rd: writable_vreg(15),
+                rn: vreg(30),
+            },
+            "CFC3211E",
+            "fsqrt s15, s30",
+        ));
+
+        insns.push((
+            Inst::FpuRR {
+                fpu_op: FPUOp1::Sqrt64,
+                rd: writable_vreg(15),
+                rn: vreg(30),
+            },
+            "CFC3611E",
+            "fsqrt d15, d30",
+        ));
+
+        insns.push((
+            Inst::FpuRR {
+                fpu_op: FPUOp1::Cvt32To64,
+                rd: writable_vreg(15),
+                rn: vreg(30),
+            },
+            "CFC3221E",
+            "fcvt d15, s30",
+        ));
+
+        insns.push((
+            Inst::FpuRR {
+                fpu_op: FPUOp1::Cvt64To32,
+                rd: writable_vreg(15),
+                rn: vreg(30),
+            },
+            "CF43621E",
+            "fcvt s15, d30",
+        ));
+
+        insns.push((
+            Inst::FpuRRR {
+                fpu_op: FPUOp2::Add32,
+                rd: writable_vreg(15),
+                rn: vreg(30),
+                rm: vreg(31),
+            },
+            "CF2B3F1E",
+            "fadd s15, s30, s31",
+        ));
+
+        insns.push((
+            Inst::FpuRRR {
+                fpu_op: FPUOp2::Add64,
+                rd: writable_vreg(15),
+                rn: vreg(30),
+                rm: vreg(31),
+            },
+            "CF2B7F1E",
+            "fadd d15, d30, d31",
+        ));
+
+        insns.push((
+            Inst::FpuRRR {
+                fpu_op: FPUOp2::Sub32,
+                rd: writable_vreg(15),
+                rn: vreg(30),
+                rm: vreg(31),
+            },
+            "CF3B3F1E",
+            "fsub s15, s30, s31",
+        ));
+
+        insns.push((
+            Inst::FpuRRR {
+                fpu_op: FPUOp2::Sub64,
+                rd: writable_vreg(15),
+                rn: vreg(30),
+                rm: vreg(31),
+            },
+            "CF3B7F1E",
+            "fsub d15, d30, d31",
+        ));
+
+        insns.push((
+            Inst::FpuRRR {
+                fpu_op: FPUOp2::Mul32,
+                rd: writable_vreg(15),
+                rn: vreg(30),
+                rm: vreg(31),
+            },
+            "CF0B3F1E",
+            "fmul s15, s30, s31",
+        ));
+
+        insns.push((
+            Inst::FpuRRR {
+                fpu_op: FPUOp2::Mul64,
+                rd: writable_vreg(15),
+                rn: vreg(30),
+                rm: vreg(31),
+            },
+            "CF0B7F1E",
+            "fmul d15, d30, d31",
+        ));
+
+        insns.push((
+            Inst::FpuRRR {
+                fpu_op: FPUOp2::Div32,
+                rd: writable_vreg(15),
+                rn: vreg(30),
+                rm: vreg(31),
+            },
+            "CF1B3F1E",
+            "fdiv s15, s30, s31",
+        ));
+
+        insns.push((
+            Inst::FpuRRR {
+                fpu_op: FPUOp2::Div64,
+                rd: writable_vreg(15),
+                rn: vreg(30),
+                rm: vreg(31),
+            },
+            "CF1B7F1E",
+            "fdiv d15, d30, d31",
+        ));
+
+        insns.push((
+            Inst::FpuRRR {
+                fpu_op: FPUOp2::Max32,
+                rd: writable_vreg(15),
+                rn: vreg(30),
+                rm: vreg(31),
+            },
+            "CF4B3F1E",
+            "fmax s15, s30, s31",
+        ));
+
+        insns.push((
+            Inst::FpuRRR {
+                fpu_op: FPUOp2::Max64,
+                rd: writable_vreg(15),
+                rn: vreg(30),
+                rm: vreg(31),
+            },
+            "CF4B7F1E",
+            "fmax d15, d30, d31",
+        ));
+
+        insns.push((
+            Inst::FpuRRR {
+                fpu_op: FPUOp2::Min32,
+                rd: writable_vreg(15),
+                rn: vreg(30),
+                rm: vreg(31),
+            },
+            "CF5B3F1E",
+            "fmin s15, s30, s31",
+        ));
+
+        insns.push((
+            Inst::FpuRRR {
+                fpu_op: FPUOp2::Min64,
+                rd: writable_vreg(15),
+                rn: vreg(30),
+                rm: vreg(31),
+            },
+            "CF5B7F1E",
+            "fmin d15, d30, d31",
+        ));
+
+        insns.push((
+            Inst::FpuRRRR {
+                fpu_op: FPUOp3::MAdd32,
+                rd: writable_vreg(15),
+                rn: vreg(30),
+                rm: vreg(31),
+                ra: vreg(1),
+            },
+            "CF071F1F",
+            "fmadd s15, s30, s31, s1",
+        ));
+
+        insns.push((
+            Inst::FpuRRRR {
+                fpu_op: FPUOp3::MAdd64,
+                rd: writable_vreg(15),
+                rn: vreg(30),
+                rm: vreg(31),
+                ra: vreg(1),
+            },
+            "CF075F1F",
+            "fmadd d15, d30, d31, d1",
+        ));
+
+        insns.push((
+            Inst::FpuToInt {
+                op: FpuToIntOp::F32ToU32,
+                rd: writable_xreg(1),
+                rn: vreg(4),
+            },
+            "8100391E",
+            "fcvtzu w1, s4",
+        ));
+
+        insns.push((
+            Inst::FpuToInt {
+                op: FpuToIntOp::F32ToU64,
+                rd: writable_xreg(1),
+                rn: vreg(4),
+            },
+            "8100399E",
+            "fcvtzu x1, s4",
+        ));
+
+        insns.push((
+            Inst::FpuToInt {
+                op: FpuToIntOp::F32ToI32,
+                rd: writable_xreg(1),
+                rn: vreg(4),
+            },
+            "8100381E",
+            "fcvtzs w1, s4",
+        ));
+
+        insns.push((
+            Inst::FpuToInt {
+                op: FpuToIntOp::F32ToI64,
+                rd: writable_xreg(1),
+                rn: vreg(4),
+            },
+            "8100389E",
+            "fcvtzs x1, s4",
+        ));
+
+        insns.push((
+            Inst::FpuToInt {
+                op: FpuToIntOp::F64ToU32,
+                rd: writable_xreg(1),
+                rn: vreg(4),
+            },
+            "8100791E",
+            "fcvtzu w1, d4",
+        ));
+
+        insns.push((
+            Inst::FpuToInt {
+                op: FpuToIntOp::F64ToU64,
+                rd: writable_xreg(1),
+                rn: vreg(4),
+            },
+            "8100799E",
+            "fcvtzu x1, d4",
+        ));
+
+        insns.push((
+            Inst::FpuToInt {
+                op: FpuToIntOp::F64ToI32,
+                rd: writable_xreg(1),
+                rn: vreg(4),
+            },
+            "8100781E",
+            "fcvtzs w1, d4",
+        ));
+
+        insns.push((
+            Inst::FpuToInt {
+                op: FpuToIntOp::F64ToI64,
+                rd: writable_xreg(1),
+                rn: vreg(4),
+            },
+            "8100789E",
+            "fcvtzs x1, d4",
+        ));
+
+        insns.push((
+            Inst::IntToFpu {
+                op: IntToFpuOp::U32ToF32,
+                rd: writable_vreg(1),
+                rn: xreg(4),
+            },
+            "8100231E",
+            "ucvtf s1, w4",
+        ));
+
+        insns.push((
+            Inst::IntToFpu {
+                op: IntToFpuOp::I32ToF32,
+                rd: writable_vreg(1),
+                rn: xreg(4),
+            },
+            "8100221E",
+            "scvtf s1, w4",
+        ));
+
+        insns.push((
+            Inst::IntToFpu {
+                op: IntToFpuOp::U32ToF64,
+                rd: writable_vreg(1),
+                rn: xreg(4),
+            },
+            "8100631E",
+            "ucvtf d1, w4",
+        ));
+
+        insns.push((
+            Inst::IntToFpu {
+                op: IntToFpuOp::I32ToF64,
+                rd: writable_vreg(1),
+                rn: xreg(4),
+            },
+            "8100621E",
+            "scvtf d1, w4",
+        ));
+
+        insns.push((
+            Inst::IntToFpu {
+                op: IntToFpuOp::U64ToF32,
+                rd: writable_vreg(1),
+                rn: xreg(4),
+            },
+            "8100239E",
+            "ucvtf s1, x4",
+        ));
+
+        insns.push((
+            Inst::IntToFpu {
+                op: IntToFpuOp::I64ToF32,
+                rd: writable_vreg(1),
+                rn: xreg(4),
+            },
+            "8100229E",
+            "scvtf s1, x4",
+        ));
+
+        insns.push((
+            Inst::IntToFpu {
+                op: IntToFpuOp::U64ToF64,
+                rd: writable_vreg(1),
+                rn: xreg(4),
+            },
+            "8100639E",
+            "ucvtf d1, x4",
+        ));
+
+        insns.push((
+            Inst::IntToFpu {
+                op: IntToFpuOp::I64ToF64,
+                rd: writable_vreg(1),
+                rn: xreg(4),
+            },
+            "8100629E",
+            "scvtf d1, x4",
+        ));
+
+        insns.push((
+            Inst::FpuCmp32 {
+                rn: vreg(23),
+                rm: vreg(24),
+            },
+            "E022381E",
+            "fcmp s23, s24",
+        ));
+
+        insns.push((
+            Inst::FpuCmp64 {
+                rn: vreg(23),
+                rm: vreg(24),
+            },
+            "E022781E",
+            "fcmp d23, d24",
+        ));
+
+        insns.push((
+            Inst::FpuLoad32 {
+                rd: writable_vreg(16),
+                mem: MemArg::RegScaled(xreg(8), xreg(9), F32),
+                srcloc: None,
+            },
+            "107969BC",
+            "ldr s16, [x8, x9, LSL #2]",
+        ));
+
+        insns.push((
+            Inst::FpuLoad64 {
+                rd: writable_vreg(16),
+                mem: MemArg::RegScaled(xreg(8), xreg(9), F64),
+                srcloc: None,
+            },
+            "107969FC",
+            "ldr d16, [x8, x9, LSL #3]",
+        ));
+
+        insns.push((
+            Inst::FpuLoad128 {
+                rd: writable_vreg(16),
+                mem: MemArg::RegScaled(xreg(8), xreg(9), I128),
+                srcloc: None,
+            },
+            "1079E93C",
+            "ldr q16, [x8, x9, LSL #4]",
+        ));
+
+        insns.push((
+            Inst::FpuLoad32 {
+                rd: writable_vreg(16),
+                mem: MemArg::Label(MemLabel::PCRel(8)),
+                srcloc: None,
+            },
+            "5000001C",
+            "ldr s16, pc+8",
+        ));
+
+        insns.push((
+            Inst::FpuLoad64 {
+                rd: writable_vreg(16),
+                mem: MemArg::Label(MemLabel::PCRel(8)),
+                srcloc: None,
+            },
+            "5000005C",
+            "ldr d16, pc+8",
+        ));
+
+        insns.push((
+            Inst::FpuLoad128 {
+                rd: writable_vreg(16),
+                mem: MemArg::Label(MemLabel::PCRel(8)),
+                srcloc: None,
+            },
+            "5000009C",
+            "ldr q16, pc+8",
+        ));
+
+        insns.push((
+            Inst::FpuStore32 {
+                rd: vreg(16),
+                mem: MemArg::RegScaled(xreg(8), xreg(9), F32),
+                srcloc: None,
+            },
+            "107929BC",
+            "str s16, [x8, x9, LSL #2]",
+        ));
+
+        insns.push((
+            Inst::FpuStore64 {
+                rd: vreg(16),
+                mem: MemArg::RegScaled(xreg(8), xreg(9), F64),
+                srcloc: None,
+            },
+            "107929FC",
+            "str d16, [x8, x9, LSL #3]",
+        ));
+
+        insns.push((
+            Inst::FpuStore128 {
+                rd: vreg(16),
+                mem: MemArg::RegScaled(xreg(8), xreg(9), I128),
+                srcloc: None,
+            },
+            "1079A93C",
+            "str q16, [x8, x9, LSL #4]",
+        ));
+
+        insns.push((
+            Inst::LoadFpuConst32 {
+                rd: writable_vreg(16),
+                const_data: 1.0,
+            },
+            "5000001C020000140000803F",
+            "ldr s16, pc+8 ; b 8 ; data.f32 1",
+        ));
+
+        insns.push((
+            Inst::LoadFpuConst64 {
+                rd: writable_vreg(16),
+                const_data: 1.0,
+            },
+            "5000005C03000014000000000000F03F",
+            "ldr d16, pc+8 ; b 12 ; data.f64 1",
+        ));
+
+        insns.push((
+            Inst::FpuCSel32 {
+                rd: writable_vreg(1),
+                rn: vreg(2),
+                rm: vreg(3),
+                cond: Cond::Hi,
+            },
+            "418C231E",
+            "fcsel s1, s2, s3, hi",
+        ));
+
+        insns.push((
+            Inst::FpuCSel64 {
+                rd: writable_vreg(1),
+                rn: vreg(2),
+                rm: vreg(3),
+                cond: Cond::Eq,
+            },
+            "410C631E",
+            "fcsel d1, d2, d3, eq",
+        ));
+
+        insns.push((
+            Inst::FpuRound {
+                rd: writable_vreg(23),
+                rn: vreg(24),
+                op: FpuRoundMode::Minus32,
+            },
+            "1743251E",
+            "frintm s23, s24",
+        ));
+        insns.push((
+            Inst::FpuRound {
+                rd: writable_vreg(23),
+                rn: vreg(24),
+                op: FpuRoundMode::Minus64,
+            },
+            "1743651E",
+            "frintm d23, d24",
+        ));
+        insns.push((
+            Inst::FpuRound {
+                rd: writable_vreg(23),
+                rn: vreg(24),
+                op: FpuRoundMode::Plus32,
+            },
+            "17C3241E",
+            "frintp s23, s24",
+        ));
+        insns.push((
+            Inst::FpuRound {
+                rd: writable_vreg(23),
+                rn: vreg(24),
+                op: FpuRoundMode::Plus64,
+            },
+            "17C3641E",
+            "frintp d23, d24",
+        ));
+        insns.push((
+            Inst::FpuRound {
+                rd: writable_vreg(23),
+                rn: vreg(24),
+                op: FpuRoundMode::Zero32,
+            },
+            "17C3251E",
+            "frintz s23, s24",
+        ));
+        insns.push((
+            Inst::FpuRound {
+                rd: writable_vreg(23),
+                rn: vreg(24),
+                op: FpuRoundMode::Zero64,
+            },
+            "17C3651E",
+            "frintz d23, d24",
+        ));
+        insns.push((
+            Inst::FpuRound {
+                rd: writable_vreg(23),
+                rn: vreg(24),
+                op: FpuRoundMode::Nearest32,
+            },
+            "1743241E",
+            "frintn s23, s24",
+        ));
+        insns.push((
+            Inst::FpuRound {
+                rd: writable_vreg(23),
+                rn: vreg(24),
+                op: FpuRoundMode::Nearest64,
+            },
+            "1743641E",
+            "frintn d23, d24",
+        ));
+
+        let rru = create_reg_universe();
+        for (insn, expected_encoding, expected_printing) in insns {
+            println!(
+                "ARM64: {:?}, {}, {}",
+                insn, expected_encoding, expected_printing
+            );
+
+            // Check the printed text is as expected.
+            let actual_printing = insn.show_rru(Some(&rru));
+            assert_eq!(expected_printing, actual_printing);
+
+            // Check the encoding is as expected.
+            let text_size = {
+                let mut code_sec = MachSectionSize::new(0);
+                insn.emit(&mut code_sec);
+                code_sec.size()
+            };
+
+            let mut sink = test_utils::TestCodeSink::new();
+            let mut sections = MachSections::new();
+            let code_idx = sections.add_section(0, text_size);
+            let code_sec = sections.get_section(code_idx);
+            insn.emit(code_sec);
+            sections.emit(&mut sink);
+            let actual_encoding = &sink.stringify();
+            assert_eq!(expected_encoding, actual_encoding);
+        }
+    }
+
+    #[test]
+    fn test_cond_invert() {
+        for cond in vec![
+            Cond::Eq,
+            Cond::Ne,
+            Cond::Hs,
+            Cond::Lo,
+            Cond::Mi,
+            Cond::Pl,
+            Cond::Vs,
+            Cond::Vc,
+            Cond::Hi,
+            Cond::Ls,
+            Cond::Ge,
+            Cond::Lt,
+            Cond::Gt,
+            Cond::Le,
+            Cond::Al,
+            Cond::Nv,
+        ]
+        .into_iter()
+        {
+            assert_eq!(cond.invert().invert(), cond);
+        }
+    }
+}
diff --git a/cranelift/codegen/src/isa/arm64/inst/imms.rs b/cranelift/codegen/src/isa/arm64/inst/imms.rs
new file mode 100644
index 0000000000..eda68af7b1
--- /dev/null
+++ b/cranelift/codegen/src/isa/arm64/inst/imms.rs
@@ -0,0 +1,753 @@
+//! ARM64 ISA definitions: immediate constants.
+
+#![allow(dead_code)]
+#![allow(non_snake_case)]
+
+use crate::ir::types::*;
+use crate::ir::Type;
+use crate::machinst::*;
+
+use regalloc::RealRegUniverse;
+
+use core::convert::TryFrom;
+use std::string::String;
+
+/// A signed, scaled 7-bit offset.
+#[derive(Clone, Copy, Debug)]
+pub struct SImm7Scaled {
+    /// The value.
+    pub value: i16,
+    /// multiplied by the size of this type
+    pub scale_ty: Type,
+}
+
+impl SImm7Scaled {
+    /// Create a SImm7Scaled from a raw offset and the known scale type, if
+    /// possible.
+    pub fn maybe_from_i64(value: i64, scale_ty: Type) -> Option<SImm7Scaled> {
+        assert!(scale_ty == I64 || scale_ty == I32);
+        let scale = scale_ty.bytes();
+        assert!(scale.is_power_of_two());
+        let scale = scale as i64;
+        let upper_limit = 63 * scale;
+        let lower_limit = -(64 * scale);
+        if value >= lower_limit && value <= upper_limit && (value & (scale - 1)) == 0 {
+            Some(SImm7Scaled {
+                value: value as i16,
+                scale_ty,
+            })
+        } else {
+            None
+        }
+    }
+
+    /// Create a zero immediate of this format.
+    pub fn zero(scale_ty: Type) -> SImm7Scaled {
+        SImm7Scaled { value: 0, scale_ty }
+    }
+
+    /// Bits for encoding.
+    pub fn bits(&self) -> u32 {
+        ((self.value / self.scale_ty.bytes() as i16) as u32) & 0x7f
+    }
+}
+
+/// a 9-bit signed offset.
+#[derive(Clone, Copy, Debug)]
+pub struct SImm9 {
+    /// The value.
+    pub value: i16,
+}
+
+impl SImm9 {
+    /// Create a signed 9-bit offset from a full-range value, if possible.
+    pub fn maybe_from_i64(value: i64) -> Option<SImm9> {
+        if value >= -256 && value <= 255 {
+            Some(SImm9 {
+                value: value as i16,
+            })
+        } else {
+            None
+        }
+    }
+
+    /// Create a zero immediate of this format.
+    pub fn zero() -> SImm9 {
+        SImm9 { value: 0 }
+    }
+
+    /// Bits for encoding.
+    pub fn bits(&self) -> u32 {
+        (self.value as u32) & 0x1ff
+    }
+}
+
+/// An unsigned, scaled 12-bit offset.
+#[derive(Clone, Copy, Debug)]
+pub struct UImm12Scaled {
+    /// The value.
+    pub value: u16,
+    /// multiplied by the size of this type
+    pub scale_ty: Type,
+}
+
+impl UImm12Scaled {
+    /// Create a UImm12Scaled from a raw offset and the known scale type, if
+    /// possible.
+    pub fn maybe_from_i64(value: i64, scale_ty: Type) -> Option<UImm12Scaled> {
+        let scale = scale_ty.bytes();
+        assert!(scale.is_power_of_two());
+        let scale = scale as i64;
+        let limit = 4095 * scale;
+        if value >= 0 && value <= limit && (value & (scale - 1)) == 0 {
+            Some(UImm12Scaled {
+                value: value as u16,
+                scale_ty,
+            })
+        } else {
+            None
+        }
+    }
+
+    /// Create a zero immediate of this format.
+    pub fn zero(scale_ty: Type) -> UImm12Scaled {
+        UImm12Scaled { value: 0, scale_ty }
+    }
+
+    /// Encoded bits.
+    pub fn bits(&self) -> u32 {
+        (self.value as u32 / self.scale_ty.bytes()) & 0xfff
+    }
+}
+
+/// A shifted immediate value in 'imm12' format: supports 12 bits, shifted
+/// left by 0 or 12 places.
+#[derive(Clone, Debug)]
+pub struct Imm12 {
+    /// The immediate bits.
+    pub bits: usize,
+    /// Whether the immediate bits are shifted left by 12 or not.
+    pub shift12: bool,
+}
+
+impl Imm12 {
+    /// Compute a Imm12 from raw bits, if possible.
+    pub fn maybe_from_u64(val: u64) -> Option<Imm12> {
+        if val == 0 {
+            Some(Imm12 {
+                bits: 0,
+                shift12: false,
+            })
+        } else if val < 0xfff {
+            Some(Imm12 {
+                bits: val as usize,
+                shift12: false,
+            })
+        } else if val < 0xfff_000 && (val & 0xfff == 0) {
+            Some(Imm12 {
+                bits: (val as usize) >> 12,
+                shift12: true,
+            })
+        } else {
+            None
+        }
+    }
+
+    /// Bits for 2-bit "shift" field in e.g. AddI.
+    pub fn shift_bits(&self) -> u8 {
+        if self.shift12 {
+            0b01
+        } else {
+            0b00
+        }
+    }
+
+    /// Bits for 12-bit "imm" field in e.g. AddI.
+    pub fn imm_bits(&self) -> u16 {
+        self.bits as u16
+    }
+}
+
+/// An immediate for logical instructions.
+#[derive(Clone, Debug)]
+#[cfg_attr(test, derive(PartialEq))]
+pub struct ImmLogic {
+    /// The actual value.
+    value: u64,
+    /// `N` flag.
+    pub N: bool,
+    /// `S` field: element size and element bits.
+    pub R: u8,
+    /// `R` field: rotate amount.
+    pub S: u8,
+}
+
+impl ImmLogic {
+    /// Compute an ImmLogic from raw bits, if possible.
+    pub fn maybe_from_u64(value: u64, ty: Type) -> Option<ImmLogic> {
+        // Note: This function is a port of VIXL's Assembler::IsImmLogical.
+
+        if ty != I64 && ty != I32 {
+            return None;
+        }
+
+        let original_value = value;
+
+        let value = if ty == I32 {
+            // To handle 32-bit logical immediates, the very easiest thing is to repeat
+            // the input value twice to make a 64-bit word. The correct encoding of that
+            // as a logical immediate will also be the correct encoding of the 32-bit
+            // value.
+
+            // Avoid making the assumption that the most-significant 32 bits are zero by
+            // shifting the value left and duplicating it.
+            let value = value << 32;
+            value | value >> 32
+        } else {
+            value
+        };
+
+        // Logical immediates are encoded using parameters n, imm_s and imm_r using
+        // the following table:
+        //
+        //    N   imms    immr    size        S             R
+        //    1  ssssss  rrrrrr    64    UInt(ssssss)  UInt(rrrrrr)
+        //    0  0sssss  xrrrrr    32    UInt(sssss)   UInt(rrrrr)
+        //    0  10ssss  xxrrrr    16    UInt(ssss)    UInt(rrrr)
+        //    0  110sss  xxxrrr     8    UInt(sss)     UInt(rrr)
+        //    0  1110ss  xxxxrr     4    UInt(ss)      UInt(rr)
+        //    0  11110s  xxxxxr     2    UInt(s)       UInt(r)
+        // (s bits must not be all set)
+        //
+        // A pattern is constructed of size bits, where the least significant S+1 bits
+        // are set. The pattern is rotated right by R, and repeated across a 32 or
+        // 64-bit value, depending on destination register width.
+        //
+        // Put another way: the basic format of a logical immediate is a single
+        // contiguous stretch of 1 bits, repeated across the whole word at intervals
+        // given by a power of 2. To identify them quickly, we first locate the
+        // lowest stretch of 1 bits, then the next 1 bit above that; that combination
+        // is different for every logical immediate, so it gives us all the
+        // information we need to identify the only logical immediate that our input
+        // could be, and then we simply check if that's the value we actually have.
+        //
+        // (The rotation parameter does give the possibility of the stretch of 1 bits
+        // going 'round the end' of the word. To deal with that, we observe that in
+        // any situation where that happens the bitwise NOT of the value is also a
+        // valid logical immediate. So we simply invert the input whenever its low bit
+        // is set, and then we know that the rotated case can't arise.)
+        let (value, inverted) = if value & 1 == 1 {
+            (!value, true)
+        } else {
+            (value, false)
+        };
+
+        if value == 0 {
+            return None;
+        }
+
+        // The basic analysis idea: imagine our input word looks like this.
+        //
+        //    0011111000111110001111100011111000111110001111100011111000111110
+        //                                                          c  b    a
+        //                                                          |<--d-->|
+        //
+        // We find the lowest set bit (as an actual power-of-2 value, not its index)
+        // and call it a. Then we add a to our original number, which wipes out the
+        // bottommost stretch of set bits and replaces it with a 1 carried into the
+        // next zero bit. Then we look for the new lowest set bit, which is in
+        // position b, and subtract it, so now our number is just like the original
+        // but with the lowest stretch of set bits completely gone. Now we find the
+        // lowest set bit again, which is position c in the diagram above. Then we'll
+        // measure the distance d between bit positions a and c (using CLZ), and that
+        // tells us that the only valid logical immediate that could possibly be equal
+        // to this number is the one in which a stretch of bits running from a to just
+        // below b is replicated every d bits.
+        fn lowest_set_bit(value: u64) -> u64 {
+            let bit = value.trailing_zeros();
+            1u64.checked_shl(bit).unwrap_or(0)
+        }
+        let a = lowest_set_bit(value);
+        assert_ne!(0, a);
+        let value_plus_a = value.wrapping_add(a);
+        let b = lowest_set_bit(value_plus_a);
+        let value_plus_a_minus_b = value_plus_a - b;
+        let c = lowest_set_bit(value_plus_a_minus_b);
+
+        let (d, clz_a, out_n, mask) = if c != 0 {
+            // The general case, in which there is more than one stretch of set bits.
+            // Compute the repeat distance d, and set up a bitmask covering the basic
+            // unit of repetition (i.e. a word with the bottom d bits set). Also, in all
+            // of these cases the N bit of the output will be zero.
+            let clz_a = a.leading_zeros();
+            let clz_c = c.leading_zeros();
+            let d = clz_a - clz_c;
+            let mask = (1 << d) - 1;
+            (d, clz_a, 0, mask)
+        } else {
+            (64, a.leading_zeros(), 1, u64::max_value())
+        };
+
+        // If the repeat period d is not a power of two, it can't be encoded.
+        if !d.is_power_of_two() {
+            return None;
+        }
+
+        if ((b.wrapping_sub(a)) & !mask) != 0 {
+            // If the bit stretch (b - a) does not fit within the mask derived from the
+            // repeat period, then fail.
+            return None;
+        }
+
+        // The only possible option is b - a repeated every d bits. Now we're going to
+        // actually construct the valid logical immediate derived from that
+        // specification, and see if it equals our original input.
+        //
+        // To repeat a value every d bits, we multiply it by a number of the form
+        // (1 + 2^d + 2^(2d) + ...), i.e. 0x0001000100010001 or similar. These can
+        // be derived using a table lookup on CLZ(d).
+        const MULTIPLIERS: [u64; 6] = [
+            0x0000000000000001,
+            0x0000000100000001,
+            0x0001000100010001,
+            0x0101010101010101,
+            0x1111111111111111,
+            0x5555555555555555,
+        ];
+        let multiplier = MULTIPLIERS[(u64::from(d).leading_zeros() - 57) as usize];
+        let candidate = b.wrapping_sub(a) * multiplier;
+
+        if value != candidate {
+            // The candidate pattern doesn't match our input value, so fail.
+            return None;
+        }
+
+        // We have a match! This is a valid logical immediate, so now we have to
+        // construct the bits and pieces of the instruction encoding that generates
+        // it.
+
+        // Count the set bits in our basic stretch. The special case of clz(0) == -1
+        // makes the answer come out right for stretches that reach the very top of
+        // the word (e.g. numbers like 0xffffc00000000000).
+        let clz_b = if b == 0 {
+            u32::max_value() // -1
+        } else {
+            b.leading_zeros()
+        };
+        let s = clz_a.wrapping_sub(clz_b);
+
+        // Decide how many bits to rotate right by, to put the low bit of that basic
+        // stretch in position a.
+        let (s, r) = if inverted {
+            // If we inverted the input right at the start of this function, here's
+            // where we compensate: the number of set bits becomes the number of clear
+            // bits, and the rotation count is based on position b rather than position
+            // a (since b is the location of the 'lowest' 1 bit after inversion).
+            // Need wrapping for when clz_b is max_value() (for when b == 0).
+            (d - s, clz_b.wrapping_add(1) & (d - 1))
+        } else {
+            (s, (clz_a + 1) & (d - 1))
+        };
+
+        // Now we're done, except for having to encode the S output in such a way that
+        // it gives both the number of set bits and the length of the repeated
+        // segment. The s field is encoded like this:
+        //
+        //     imms    size        S
+        //    ssssss    64    UInt(ssssss)
+        //    0sssss    32    UInt(sssss)
+        //    10ssss    16    UInt(ssss)
+        //    110sss     8    UInt(sss)
+        //    1110ss     4    UInt(ss)
+        //    11110s     2    UInt(s)
+        //
+        // So we 'or' (2 * -d) with our computed s to form imms.
+        let s = ((d * 2).wrapping_neg() | (s - 1)) & 0x3f;
+        debug_assert!(u8::try_from(r).is_ok());
+        debug_assert!(u8::try_from(s).is_ok());
+        Some(ImmLogic {
+            value: original_value,
+            N: out_n != 0,
+            R: r as u8,
+            S: s as u8,
+        })
+    }
+
+    pub fn from_raw(value: u64, n: bool, r: u8, s: u8) -> ImmLogic {
+        ImmLogic {
+            N: n,
+            R: r,
+            S: s,
+            value,
+        }
+    }
+
+    /// Returns bits ready for encoding: (N:1, R:6, S:6)
+    pub fn enc_bits(&self) -> u16 {
+        ((self.N as u16) << 12) | ((self.R as u16) << 6) | (self.S as u16)
+    }
+
+    /// Returns the value that this immediate represents.
+    pub fn value(&self) -> u64 {
+        self.value
+    }
+
+    /// Return an immediate for the bitwise-inverted value.
+    pub fn invert(&self) -> ImmLogic {
+        // For every ImmLogical immediate, the inverse can also be encoded.
+        Self::maybe_from_u64(!self.value, I64).unwrap()
+    }
+}
+
+/// An immediate for shift instructions.
+#[derive(Clone, Debug)]
+pub struct ImmShift {
+    /// 6-bit shift amount.
+    pub imm: u8,
+}
+
+impl ImmShift {
+    /// Create an ImmShift from raw bits, if possible.
+    pub fn maybe_from_u64(val: u64) -> Option<ImmShift> {
+        if val < 64 {
+            Some(ImmShift { imm: val as u8 })
+        } else {
+            None
+        }
+    }
+
+    /// Get the immediate value.
+    pub fn value(&self) -> u8 {
+        self.imm
+    }
+}
+
+/// A 16-bit immediate for a MOVZ instruction, with a {0,16,32,48}-bit shift.
+#[derive(Clone, Copy, Debug)]
+pub struct MoveWideConst {
+    /// The value.
+    pub bits: u16,
+    /// shifted 16*shift bits to the left.
+    pub shift: u8,
+}
+
+impl MoveWideConst {
+    /// Construct a MoveWideConst from an arbitrary 64-bit constant if possible.
+    pub fn maybe_from_u64(value: u64) -> Option<MoveWideConst> {
+        let mask0 = 0x0000_0000_0000_ffffu64;
+        let mask1 = 0x0000_0000_ffff_0000u64;
+        let mask2 = 0x0000_ffff_0000_0000u64;
+        let mask3 = 0xffff_0000_0000_0000u64;
+
+        if value == (value & mask0) {
+            return Some(MoveWideConst {
+                bits: (value & mask0) as u16,
+                shift: 0,
+            });
+        }
+        if value == (value & mask1) {
+            return Some(MoveWideConst {
+                bits: ((value >> 16) & mask0) as u16,
+                shift: 1,
+            });
+        }
+        if value == (value & mask2) {
+            return Some(MoveWideConst {
+                bits: ((value >> 32) & mask0) as u16,
+                shift: 2,
+            });
+        }
+        if value == (value & mask3) {
+            return Some(MoveWideConst {
+                bits: ((value >> 48) & mask0) as u16,
+                shift: 3,
+            });
+        }
+        None
+    }
+
+    pub fn maybe_with_shift(imm: u16, shift: u8) -> Option<MoveWideConst> {
+        let shift_enc = shift / 16;
+        if shift_enc > 3 {
+            None
+        } else {
+            Some(MoveWideConst {
+                bits: imm,
+                shift: shift_enc,
+            })
+        }
+    }
+
+    /// Returns the value that this constant represents.
+    pub fn value(&self) -> u64 {
+        (self.bits as u64) << (16 * self.shift)
+    }
+}
+
+impl ShowWithRRU for Imm12 {
+    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+        let shift = if self.shift12 { 12 } else { 0 };
+        let value = self.bits << shift;
+        format!("#{}", value)
+    }
+}
+
+impl ShowWithRRU for SImm7Scaled {
+    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+        format!("#{}", self.value)
+    }
+}
+
+impl ShowWithRRU for SImm9 {
+    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+        format!("#{}", self.value)
+    }
+}
+
+impl ShowWithRRU for UImm12Scaled {
+    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+        format!("#{}", self.value)
+    }
+}
+
+impl ShowWithRRU for ImmLogic {
+    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+        format!("#{}", self.value())
+    }
+}
+
+impl ShowWithRRU for ImmShift {
+    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+        format!("#{}", self.imm)
+    }
+}
+
+impl ShowWithRRU for MoveWideConst {
+    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+        if self.shift == 0 {
+            format!("#{}", self.bits)
+        } else {
+            format!("#{}, LSL #{}", self.bits, self.shift * 16)
+        }
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+
+    #[test]
+    fn imm_logical_test() {
+        assert_eq!(None, ImmLogic::maybe_from_u64(0, I64));
+        assert_eq!(None, ImmLogic::maybe_from_u64(u64::max_value(), I64));
+
+        assert_eq!(
+            Some(ImmLogic {
+                value: 1,
+                N: true,
+                R: 0,
+                S: 0
+            }),
+            ImmLogic::maybe_from_u64(1, I64)
+        );
+
+        assert_eq!(
+            Some(ImmLogic {
+                value: 2,
+                N: true,
+                R: 63,
+                S: 0
+            }),
+            ImmLogic::maybe_from_u64(2, I64)
+        );
+
+        assert_eq!(None, ImmLogic::maybe_from_u64(5, I64));
+
+        assert_eq!(None, ImmLogic::maybe_from_u64(11, I64));
+
+        assert_eq!(
+            Some(ImmLogic {
+                value: 248,
+                N: true,
+                R: 61,
+                S: 4
+            }),
+            ImmLogic::maybe_from_u64(248, I64)
+        );
+
+        assert_eq!(None, ImmLogic::maybe_from_u64(249, I64));
+
+        assert_eq!(
+            Some(ImmLogic {
+                value: 1920,
+                N: true,
+                R: 57,
+                S: 3
+            }),
+            ImmLogic::maybe_from_u64(1920, I64)
+        );
+
+        assert_eq!(
+            Some(ImmLogic {
+                value: 0x7ffe,
+                N: true,
+                R: 63,
+                S: 13
+            }),
+            ImmLogic::maybe_from_u64(0x7ffe, I64)
+        );
+
+        assert_eq!(
+            Some(ImmLogic {
+                value: 0x30000,
+                N: true,
+                R: 48,
+                S: 1
+            }),
+            ImmLogic::maybe_from_u64(0x30000, I64)
+        );
+
+        assert_eq!(
+            Some(ImmLogic {
+                value: 0x100000,
+                N: true,
+                R: 44,
+                S: 0
+            }),
+            ImmLogic::maybe_from_u64(0x100000, I64)
+        );
+
+        assert_eq!(
+            Some(ImmLogic {
+                value: u64::max_value() - 1,
+                N: true,
+                R: 63,
+                S: 62
+            }),
+            ImmLogic::maybe_from_u64(u64::max_value() - 1, I64)
+        );
+
+        assert_eq!(
+            Some(ImmLogic {
+                value: 0xaaaaaaaaaaaaaaaa,
+                N: false,
+                R: 1,
+                S: 60
+            }),
+            ImmLogic::maybe_from_u64(0xaaaaaaaaaaaaaaaa, I64)
+        );
+
+        assert_eq!(
+            Some(ImmLogic {
+                value: 0x8181818181818181,
+                N: false,
+                R: 1,
+                S: 49
+            }),
+            ImmLogic::maybe_from_u64(0x8181818181818181, I64)
+        );
+
+        assert_eq!(
+            Some(ImmLogic {
+                value: 0xffc3ffc3ffc3ffc3,
+                N: false,
+                R: 10,
+                S: 43
+            }),
+            ImmLogic::maybe_from_u64(0xffc3ffc3ffc3ffc3, I64)
+        );
+
+        assert_eq!(
+            Some(ImmLogic {
+                value: 0x100000001,
+                N: false,
+                R: 0,
+                S: 0
+            }),
+            ImmLogic::maybe_from_u64(0x100000001, I64)
+        );
+
+        assert_eq!(
+            Some(ImmLogic {
+                value: 0x1111111111111111,
+                N: false,
+                R: 0,
+                S: 56
+            }),
+            ImmLogic::maybe_from_u64(0x1111111111111111, I64)
+        );
+
+        for n in 0..2 {
+            let types = if n == 0 { vec![I64, I32] } else { vec![I64] };
+            for s in 0..64 {
+                for r in 0..64 {
+                    let imm = get_logical_imm(n, s, r);
+                    for &ty in &types {
+                        match ImmLogic::maybe_from_u64(imm, ty) {
+                            Some(ImmLogic { value, .. }) => {
+                                assert_eq!(imm, value);
+                                ImmLogic::maybe_from_u64(!value, ty).unwrap();
+                            }
+                            None => assert_eq!(0, imm),
+                        };
+                    }
+                }
+            }
+        }
+    }
+
+    // Repeat a value that has `width` bits, across a 64-bit value.
+    fn repeat(value: u64, width: u64) -> u64 {
+        let mut result = value & ((1 << width) - 1);
+        let mut i = width;
+        while i < 64 {
+            result |= result << i;
+            i *= 2;
+        }
+        result
+    }
+
+    // Get the logical immediate, from the encoding N/R/S bits.
+    fn get_logical_imm(n: u32, s: u32, r: u32) -> u64 {
+        // An integer is constructed from the n, imm_s and imm_r bits according to
+        // the following table:
+        //
+        //  N   imms    immr    size        S             R
+        //  1  ssssss  rrrrrr    64    UInt(ssssss)  UInt(rrrrrr)
+        //  0  0sssss  xrrrrr    32    UInt(sssss)   UInt(rrrrr)
+        //  0  10ssss  xxrrrr    16    UInt(ssss)    UInt(rrrr)
+        //  0  110sss  xxxrrr     8    UInt(sss)     UInt(rrr)
+        //  0  1110ss  xxxxrr     4    UInt(ss)      UInt(rr)
+        //  0  11110s  xxxxxr     2    UInt(s)       UInt(r)
+        // (s bits must not be all set)
+        //
+        // A pattern is constructed of size bits, where the least significant S+1
+        // bits are set. The pattern is rotated right by R, and repeated across a
+        // 64-bit value.
+
+        if n == 1 {
+            if s == 0x3f {
+                return 0;
+            }
+            let bits = (1u64 << (s + 1)) - 1;
+            bits.rotate_right(r)
+        } else {
+            if (s >> 1) == 0x1f {
+                return 0;
+            }
+            let mut width = 0x20;
+            while width >= 0x2 {
+                if (s & width) == 0 {
+                    let mask = width - 1;
+                    if (s & mask) == mask {
+                        return 0;
+                    }
+                    let bits = (1u64 << ((s & mask) + 1)) - 1;
+                    return repeat(bits.rotate_right(r & mask), width.into());
+                }
+                width >>= 1;
+            }
+            unreachable!();
+        }
+    }
+}
diff --git a/cranelift/codegen/src/isa/arm64/inst/mod.rs b/cranelift/codegen/src/isa/arm64/inst/mod.rs
new file mode 100644
index 0000000000..ecc948cc70
--- /dev/null
+++ b/cranelift/codegen/src/isa/arm64/inst/mod.rs
@@ -0,0 +1,2515 @@
+//! This module defines arm64-specific machine instruction types.
+
+#![allow(non_snake_case)]
+#![allow(unused_imports)]
+#![allow(non_camel_case_types)]
+#![allow(dead_code)]
+
+use crate::binemit::CodeOffset;
+use crate::ir::constant::{ConstantData, ConstantOffset};
+use crate::ir::types::{
+    B1, B128, B16, B32, B64, B8, F32, F64, FFLAGS, I128, I16, I32, I64, I8, IFLAGS,
+};
+use crate::ir::{ExternalName, GlobalValue, JumpTable, Opcode, SourceLoc, TrapCode, Type};
+use crate::machinst::*;
+
+use regalloc::Map as RegallocMap;
+use regalloc::{
+    RealReg, RealRegUniverse, Reg, RegClass, RegClassInfo, SpillSlot, VirtualReg, Writable,
+    NUM_REG_CLASSES,
+};
+use regalloc::{RegUsageCollector, Set};
+
+use alloc::vec::Vec;
+use smallvec::{smallvec, SmallVec};
+use std::mem;
+use std::string::{String, ToString};
+
+pub mod regs;
+pub use self::regs::*;
+pub mod imms;
+pub use self::imms::*;
+pub mod args;
+pub use self::args::*;
+pub mod emit;
+pub use self::emit::*;
+
+//=============================================================================
+// Instructions (top level): definition
+
+/// An ALU operation. This can be paired with several instruction formats
+/// below (see `Inst`) in any combination.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum ALUOp {
+    Add32,
+    Add64,
+    Sub32,
+    Sub64,
+    Orr32,
+    Orr64,
+    OrrNot32,
+    OrrNot64,
+    And32,
+    And64,
+    AndNot32,
+    AndNot64,
+    Eor32,
+    Eor64,
+    EorNot32,
+    EorNot64,
+    AddS32,
+    AddS64,
+    SubS32,
+    SubS64,
+    MAdd32, // multiply-add
+    MAdd64,
+    MSub32,
+    MSub64,
+    SMulH,
+    UMulH,
+    SDiv64,
+    UDiv64,
+    RotR32,
+    RotR64,
+    Lsr32,
+    Lsr64,
+    Asr32,
+    Asr64,
+    Lsl32,
+    Lsl64,
+}
+
+/// A floating-point unit (FPU) operation with one arg.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum FPUOp1 {
+    Abs32,
+    Abs64,
+    Neg32,
+    Neg64,
+    Sqrt32,
+    Sqrt64,
+    Cvt32To64,
+    Cvt64To32,
+}
+
+/// A floating-point unit (FPU) operation with two args.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum FPUOp2 {
+    Add32,
+    Add64,
+    Sub32,
+    Sub64,
+    Mul32,
+    Mul64,
+    Div32,
+    Div64,
+    Max32,
+    Max64,
+    Min32,
+    Min64,
+}
+
+/// A floating-point unit (FPU) operation with three args.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum FPUOp3 {
+    MAdd32,
+    MAdd64,
+}
+
+/// A conversion from an FP to an integer value.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum FpuToIntOp {
+    F32ToU32,
+    F32ToI32,
+    F32ToU64,
+    F32ToI64,
+    F64ToU32,
+    F64ToI32,
+    F64ToU64,
+    F64ToI64,
+}
+
+/// A conversion from an integer to an FP value.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum IntToFpuOp {
+    U32ToF32,
+    I32ToF32,
+    U32ToF64,
+    I32ToF64,
+    U64ToF32,
+    I64ToF32,
+    U64ToF64,
+    I64ToF64,
+}
+
+/// Modes for FP rounding ops: round down (floor) or up (ceil), or toward zero (trunc), or to
+/// nearest, and for 32- or 64-bit FP values.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum FpuRoundMode {
+    Minus32,
+    Minus64,
+    Plus32,
+    Plus64,
+    Zero32,
+    Zero64,
+    Nearest32,
+    Nearest64,
+}
+
+/// A vector ALU operation.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum VecALUOp {
+    SQAddScalar, // signed saturating add
+    UQAddScalar, // unsigned saturating add
+    SQSubScalar, // signed saturating subtract
+    UQSubScalar, // unsigned saturating subtract
+}
+
+/// An operation on the bits of a register. This can be paired with several instruction formats
+/// below (see `Inst`) in any combination.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum BitOp {
+    RBit32,
+    RBit64,
+    Clz32,
+    Clz64,
+    Cls32,
+    Cls64,
+}
+
+impl BitOp {
+    /// Is the opcode a 32-bit operation.
+    pub fn is_32_bit(&self) -> bool {
+        match self {
+            BitOp::RBit32 => true,
+            BitOp::Clz32 => true,
+            BitOp::Cls32 => true,
+            _ => false,
+        }
+    }
+
+    /// Get the assembly mnemonic for this opcode.
+    pub fn op_str(&self) -> &'static str {
+        match self {
+            BitOp::RBit32 | BitOp::RBit64 => "rbit",
+            BitOp::Clz32 | BitOp::Clz64 => "clz",
+            BitOp::Cls32 | BitOp::Cls64 => "cls",
+        }
+    }
+}
+
+impl From<(Opcode, Type)> for BitOp {
+    /// Get the BitOp from the IR opcode.
+    fn from(op_ty: (Opcode, Type)) -> BitOp {
+        match op_ty {
+            (Opcode::Bitrev, I32) => BitOp::RBit32,
+            (Opcode::Bitrev, I64) => BitOp::RBit64,
+            (Opcode::Clz, I32) => BitOp::Clz32,
+            (Opcode::Clz, I64) => BitOp::Clz64,
+            (Opcode::Cls, I32) => BitOp::Cls32,
+            (Opcode::Cls, I64) => BitOp::Cls64,
+            _ => unreachable!("Called with non-bit op!"),
+        }
+    }
+}
+
+/// Instruction formats.
+#[derive(Clone, Debug)]
+pub enum Inst {
+    /// A no-op of zero size.
+    Nop,
+
+    /// A no-op that is one instruction large.
+    Nop4,
+
+    /// An ALU operation with two register sources and a register destination.
+    AluRRR {
+        alu_op: ALUOp,
+        rd: Writable<Reg>,
+        rn: Reg,
+        rm: Reg,
+    },
+    /// An ALU operation with three register sources and a register destination.
+    AluRRRR {
+        alu_op: ALUOp,
+        rd: Writable<Reg>,
+        rn: Reg,
+        rm: Reg,
+        ra: Reg,
+    },
+    /// An ALU operation with a register source and an immediate-12 source, and a register
+    /// destination.
+    AluRRImm12 {
+        alu_op: ALUOp,
+        rd: Writable<Reg>,
+        rn: Reg,
+        imm12: Imm12,
+    },
+    /// An ALU operation with a register source and an immediate-logic source, and a register destination.
+    AluRRImmLogic {
+        alu_op: ALUOp,
+        rd: Writable<Reg>,
+        rn: Reg,
+        imml: ImmLogic,
+    },
+    /// An ALU operation with a register source and an immediate-shiftamt source, and a register destination.
+    AluRRImmShift {
+        alu_op: ALUOp,
+        rd: Writable<Reg>,
+        rn: Reg,
+        immshift: ImmShift,
+    },
+    /// An ALU operation with two register sources, one of which can be shifted, and a register
+    /// destination.
+    AluRRRShift {
+        alu_op: ALUOp,
+        rd: Writable<Reg>,
+        rn: Reg,
+        rm: Reg,
+        shiftop: ShiftOpAndAmt,
+    },
+    /// An ALU operation with two register sources, one of which can be {zero,sign}-extended and
+    /// shifted, and a register destination.
+    AluRRRExtend {
+        alu_op: ALUOp,
+        rd: Writable<Reg>,
+        rn: Reg,
+        rm: Reg,
+        extendop: ExtendOp,
+    },
+
+    /// A bit op instruction with a single register source.
+    BitRR {
+        op: BitOp,
+        rd: Writable<Reg>,
+        rn: Reg,
+    },
+
+    /// An unsigned (zero-extending) 8-bit load.
+    ULoad8 {
+        rd: Writable<Reg>,
+        mem: MemArg,
+        srcloc: Option<SourceLoc>,
+    },
+    /// A signed (sign-extending) 8-bit load.
+    SLoad8 {
+        rd: Writable<Reg>,
+        mem: MemArg,
+        srcloc: Option<SourceLoc>,
+    },
+    /// An unsigned (zero-extending) 16-bit load.
+    ULoad16 {
+        rd: Writable<Reg>,
+        mem: MemArg,
+        srcloc: Option<SourceLoc>,
+    },
+    /// A signed (sign-extending) 16-bit load.
+    SLoad16 {
+        rd: Writable<Reg>,
+        mem: MemArg,
+        srcloc: Option<SourceLoc>,
+    },
+    /// An unsigned (zero-extending) 32-bit load.
+    ULoad32 {
+        rd: Writable<Reg>,
+        mem: MemArg,
+        srcloc: Option<SourceLoc>,
+    },
+    /// A signed (sign-extending) 32-bit load.
+    SLoad32 {
+        rd: Writable<Reg>,
+        mem: MemArg,
+        srcloc: Option<SourceLoc>,
+    },
+    /// A 64-bit load.
+    ULoad64 {
+        rd: Writable<Reg>,
+        mem: MemArg,
+        srcloc: Option<SourceLoc>,
+    },
+
+    /// An 8-bit store.
+    Store8 {
+        rd: Reg,
+        mem: MemArg,
+        srcloc: Option<SourceLoc>,
+    },
+    /// A 16-bit store.
+    Store16 {
+        rd: Reg,
+        mem: MemArg,
+        srcloc: Option<SourceLoc>,
+    },
+    /// A 32-bit store.
+    Store32 {
+        rd: Reg,
+        mem: MemArg,
+        srcloc: Option<SourceLoc>,
+    },
+    /// A 64-bit store.
+    Store64 {
+        rd: Reg,
+        mem: MemArg,
+        srcloc: Option<SourceLoc>,
+    },
+
+    /// A store of a pair of registers.
+    StoreP64 {
+        rt: Reg,
+        rt2: Reg,
+        mem: PairMemArg,
+    },
+    /// A load of a pair of registers.
+    LoadP64 {
+        rt: Writable<Reg>,
+        rt2: Writable<Reg>,
+        mem: PairMemArg,
+    },
+
+    /// A MOV instruction. These are encoded as ORR's (AluRRR form) but we
+    /// keep them separate at the `Inst` level for better pretty-printing
+    /// and faster `is_move()` logic.
+    Mov {
+        rd: Writable<Reg>,
+        rm: Reg,
+    },
+
+    /// A 32-bit MOV. Zeroes the top 32 bits of the destination. This is
+    /// effectively an alias for an unsigned 32-to-64-bit extension.
+    Mov32 {
+        rd: Writable<Reg>,
+        rm: Reg,
+    },
+
+    /// A MOVZ with a 16-bit immediate.
+    MovZ {
+        rd: Writable<Reg>,
+        imm: MoveWideConst,
+    },
+
+    /// A MOVN with a 16-bit immediate.
+    MovN {
+        rd: Writable<Reg>,
+        imm: MoveWideConst,
+    },
+
+    /// A MOVK with a 16-bit immediate.
+    MovK {
+        rd: Writable<Reg>,
+        imm: MoveWideConst,
+    },
+
+    /// A sign- or zero-extend operation.
+    Extend {
+        rd: Writable<Reg>,
+        rn: Reg,
+        signed: bool,
+        from_bits: u8,
+        to_bits: u8,
+    },
+
+    /// A conditional-select operation.
+    CSel {
+        rd: Writable<Reg>,
+        cond: Cond,
+        rn: Reg,
+        rm: Reg,
+    },
+
+    /// A conditional-set operation.
+    CSet {
+        rd: Writable<Reg>,
+        cond: Cond,
+    },
+
+    /// FPU move. Note that this is distinct from a vector-register
+    /// move; moving just 64 bits seems to be significantly faster.
+    FpuMove64 {
+        rd: Writable<Reg>,
+        rn: Reg,
+    },
+
+    /// 1-op FPU instruction.
+    FpuRR {
+        fpu_op: FPUOp1,
+        rd: Writable<Reg>,
+        rn: Reg,
+    },
+
+    /// 2-op FPU instruction.
+    FpuRRR {
+        fpu_op: FPUOp2,
+        rd: Writable<Reg>,
+        rn: Reg,
+        rm: Reg,
+    },
+
+    /// 3-op FPU instruction.
+    FpuRRRR {
+        fpu_op: FPUOp3,
+        rd: Writable<Reg>,
+        rn: Reg,
+        rm: Reg,
+        ra: Reg,
+    },
+
+    /// FPU comparison, single-precision (32 bit).
+    FpuCmp32 {
+        rn: Reg,
+        rm: Reg,
+    },
+
+    /// FPU comparison, double-precision (64 bit).
+    FpuCmp64 {
+        rn: Reg,
+        rm: Reg,
+    },
+
+    /// Floating-point loads and stores.
+    FpuLoad32 {
+        rd: Writable<Reg>,
+        mem: MemArg,
+        srcloc: Option<SourceLoc>,
+    },
+    FpuStore32 {
+        rd: Reg,
+        mem: MemArg,
+        srcloc: Option<SourceLoc>,
+    },
+    FpuLoad64 {
+        rd: Writable<Reg>,
+        mem: MemArg,
+        srcloc: Option<SourceLoc>,
+    },
+    FpuStore64 {
+        rd: Reg,
+        mem: MemArg,
+        srcloc: Option<SourceLoc>,
+    },
+    FpuLoad128 {
+        rd: Writable<Reg>,
+        mem: MemArg,
+        srcloc: Option<SourceLoc>,
+    },
+    FpuStore128 {
+        rd: Reg,
+        mem: MemArg,
+        srcloc: Option<SourceLoc>,
+    },
+
+    LoadFpuConst32 {
+        rd: Writable<Reg>,
+        const_data: f32,
+    },
+
+    LoadFpuConst64 {
+        rd: Writable<Reg>,
+        const_data: f64,
+    },
+
+    /// Conversions between FP and integer values.
+    FpuToInt {
+        op: FpuToIntOp,
+        rd: Writable<Reg>,
+        rn: Reg,
+    },
+
+    IntToFpu {
+        op: IntToFpuOp,
+        rd: Writable<Reg>,
+        rn: Reg,
+    },
+
+    // FP conditional select.
+    FpuCSel32 {
+        rd: Writable<Reg>,
+        rn: Reg,
+        rm: Reg,
+        cond: Cond,
+    },
+    FpuCSel64 {
+        rd: Writable<Reg>,
+        rn: Reg,
+        rm: Reg,
+        cond: Cond,
+    },
+
+    // Round to integer.
+    FpuRound {
+        op: FpuRoundMode,
+        rd: Writable<Reg>,
+        rn: Reg,
+    },
+
+    /// Move to a vector register from a GPR.
+    MovToVec64 {
+        rd: Writable<Reg>,
+        rn: Reg,
+    },
+
+    /// Move to a GPR from a vector register.
+    MovFromVec64 {
+        rd: Writable<Reg>,
+        rn: Reg,
+    },
+
+    /// A vector ALU op.
+    VecRRR {
+        alu_op: VecALUOp,
+        rd: Writable<Reg>,
+        rn: Reg,
+        rm: Reg,
+    },
+
+    /// Move to the NZCV flags (actually a `MSR NZCV, Xn` insn).
+    MovToNZCV {
+        rn: Reg,
+    },
+
+    /// Move from the NZCV flags (actually a `MRS Xn, NZCV` insn).
+    MovFromNZCV {
+        rd: Writable<Reg>,
+    },
+
+    /// Set a register to 1 if condition, else 0.
+    CondSet {
+        rd: Writable<Reg>,
+        cond: Cond,
+    },
+
+    /// A machine call instruction.
+    Call {
+        dest: ExternalName,
+        uses: Set<Reg>,
+        defs: Set<Writable<Reg>>,
+        loc: SourceLoc,
+        opcode: Opcode,
+    },
+    /// A machine indirect-call instruction.
+    CallInd {
+        rn: Reg,
+        uses: Set<Reg>,
+        defs: Set<Writable<Reg>>,
+        loc: SourceLoc,
+        opcode: Opcode,
+    },
+
+    // ---- branches (exactly one must appear at end of BB) ----
+    /// A machine return instruction.
+    Ret {},
+
+    /// A placeholder instruction, generating no code, meaning that a function epilogue must be
+    /// inserted there.
+    EpiloguePlaceholder {},
+
+    /// An unconditional branch.
+    Jump {
+        dest: BranchTarget,
+    },
+
+    /// A conditional branch.
+    CondBr {
+        taken: BranchTarget,
+        not_taken: BranchTarget,
+        kind: CondBrKind,
+    },
+
+    /// Lowered conditional branch: contains the original branch kind (or the
+    /// inverse), but only one BranchTarget is retained. The other is
+    /// implicitly the next instruction, given the final basic-block layout.
+    CondBrLowered {
+        target: BranchTarget,
+        kind: CondBrKind,
+    },
+
+    /// As for `CondBrLowered`, but represents a condbr/uncond-br sequence (two
+    /// actual machine instructions). Needed when the final block layout implies
+    /// that neither arm of a conditional branch targets the fallthrough block.
+    CondBrLoweredCompound {
+        taken: BranchTarget,
+        not_taken: BranchTarget,
+        kind: CondBrKind,
+    },
+
+    /// An indirect branch through a register, augmented with set of all
+    /// possible successors.
+    IndirectBr {
+        rn: Reg,
+        targets: Vec<BlockIndex>,
+    },
+
+    /// A "break" instruction, used for e.g. traps and debug breakpoints.
+    Brk,
+
+    /// An instruction guaranteed to always be undefined and to trigger an illegal instruction at
+    /// runtime.
+    Udf {
+        trap_info: (SourceLoc, TrapCode),
+    },
+
+    /// Load the address (using a PC-relative offset) of a MemLabel, using the
+    /// `ADR` instruction.
+    Adr {
+        rd: Writable<Reg>,
+        label: MemLabel,
+    },
+
+    /// Raw 32-bit word, used for inline constants and jump-table entries.
+    Word4 {
+        data: u32,
+    },
+
+    /// Raw 64-bit word, used for inline constants.
+    Word8 {
+        data: u64,
+    },
+
+    /// Jump-table sequence, as one compound instruction (see note in lower.rs
+    /// for rationale).
+    JTSequence {
+        targets: Vec<BranchTarget>,
+        targets_for_term: Vec<BlockIndex>, // needed for MachTerminator.
+        ridx: Reg,
+        rtmp1: Writable<Reg>,
+        rtmp2: Writable<Reg>,
+    },
+
+    /// Load an inline constant.
+    LoadConst64 {
+        rd: Writable<Reg>,
+        const_data: u64,
+    },
+
+    /// Load an inline symbol reference.
+    LoadExtName {
+        rd: Writable<Reg>,
+        name: ExternalName,
+        srcloc: SourceLoc,
+        offset: i64,
+    },
+}
+
+fn count_clear_half_words(mut value: u64) -> usize {
+    let mut count = 0;
+    for _ in 0..4 {
+        if value & 0xffff == 0 {
+            count += 1;
+        }
+        value >>= 16;
+    }
+
+    count
+}
+
+impl Inst {
+    /// Create a move instruction.
+    pub fn mov(to_reg: Writable<Reg>, from_reg: Reg) -> Inst {
+        assert!(to_reg.to_reg().get_class() == from_reg.get_class());
+        if from_reg.get_class() == RegClass::I64 {
+            Inst::Mov {
+                rd: to_reg,
+                rm: from_reg,
+            }
+        } else {
+            Inst::FpuMove64 {
+                rd: to_reg,
+                rn: from_reg,
+            }
+        }
+    }
+
+    /// Create a 32-bit move instruction.
+    pub fn mov32(to_reg: Writable<Reg>, from_reg: Reg) -> Inst {
+        Inst::Mov32 {
+            rd: to_reg,
+            rm: from_reg,
+        }
+    }
+
+    /// Create an instruction that loads a constant, using one of serveral options (MOVZ, MOVN,
+    /// logical immediate, or constant pool).
+    pub fn load_constant(rd: Writable<Reg>, value: u64) -> SmallVec<[Inst; 4]> {
+        if let Some(imm) = MoveWideConst::maybe_from_u64(value) {
+            // 16-bit immediate (shifted by 0, 16, 32 or 48 bits) in MOVZ
+            smallvec![Inst::MovZ { rd, imm }]
+        } else if let Some(imm) = MoveWideConst::maybe_from_u64(!value) {
+            // 16-bit immediate (shifted by 0, 16, 32 or 48 bits) in MOVN
+            smallvec![Inst::MovN { rd, imm }]
+        } else if let Some(imml) = ImmLogic::maybe_from_u64(value, I64) {
+            // Weird logical-instruction immediate in ORI using zero register
+            smallvec![Inst::AluRRImmLogic {
+                alu_op: ALUOp::Orr64,
+                rd,
+                rn: zero_reg(),
+                imml,
+            }]
+        } else {
+            let mut insts = smallvec![];
+
+            // If the number of 0xffff half words is greater than the number of 0x0000 half words
+            // it is more efficient to use `movn` for the first instruction.
+            let first_is_inverted = count_clear_half_words(!value) > count_clear_half_words(value);
+            // Either 0xffff or 0x0000 half words can be skipped, depending on the first
+            // instruction used.
+            let ignored_halfword = if first_is_inverted { 0xffff } else { 0 };
+            let mut first_mov_emitted = false;
+
+            for i in 0..4 {
+                let imm16 = (value >> (16 * i)) & 0xffff;
+                if imm16 != ignored_halfword {
+                    if !first_mov_emitted {
+                        first_mov_emitted = true;
+                        if first_is_inverted {
+                            let imm =
+                                MoveWideConst::maybe_with_shift(((!imm16) & 0xffff) as u16, i * 16)
+                                    .unwrap();
+                            insts.push(Inst::MovN { rd, imm });
+                        } else {
+                            let imm =
+                                MoveWideConst::maybe_with_shift(imm16 as u16, i * 16).unwrap();
+                            insts.push(Inst::MovZ { rd, imm });
+                        }
+                    } else {
+                        let imm = MoveWideConst::maybe_with_shift(imm16 as u16, i * 16).unwrap();
+                        insts.push(Inst::MovK { rd, imm });
+                    }
+                }
+            }
+
+            assert!(first_mov_emitted);
+
+            insts
+        }
+    }
+
+    /// Create an instruction that loads a 32-bit floating-point constant.
+    pub fn load_fp_constant32(rd: Writable<Reg>, value: f32) -> Inst {
+        // TODO: use FMOV immediate form when `value` has sufficiently few mantissa/exponent bits.
+        Inst::LoadFpuConst32 {
+            rd,
+            const_data: value,
+        }
+    }
+
+    /// Create an instruction that loads a 64-bit floating-point constant.
+    pub fn load_fp_constant64(rd: Writable<Reg>, value: f64) -> Inst {
+        // TODO: use FMOV immediate form when `value` has sufficiently few mantissa/exponent bits.
+        Inst::LoadFpuConst64 {
+            rd,
+            const_data: value,
+        }
+    }
+}
+
+//=============================================================================
+// Instructions: get_regs
+
+fn memarg_regs(memarg: &MemArg, collector: &mut RegUsageCollector) {
+    match memarg {
+        &MemArg::Unscaled(reg, ..) | &MemArg::UnsignedOffset(reg, ..) => {
+            collector.add_use(reg);
+        }
+        &MemArg::RegReg(r1, r2, ..)
+        | &MemArg::RegScaled(r1, r2, ..)
+        | &MemArg::RegScaledExtended(r1, r2, ..) => {
+            collector.add_use(r1);
+            collector.add_use(r2);
+        }
+        &MemArg::Label(..) => {}
+        &MemArg::PreIndexed(reg, ..) | &MemArg::PostIndexed(reg, ..) => {
+            collector.add_mod(reg);
+        }
+        &MemArg::FPOffset(..) => {
+            collector.add_use(fp_reg());
+        }
+        &MemArg::SPOffset(..) => {
+            collector.add_use(stack_reg());
+        }
+    }
+}
+
+fn pairmemarg_regs(pairmemarg: &PairMemArg, collector: &mut RegUsageCollector) {
+    match pairmemarg {
+        &PairMemArg::SignedOffset(reg, ..) => {
+            collector.add_use(reg);
+        }
+        &PairMemArg::PreIndexed(reg, ..) | &PairMemArg::PostIndexed(reg, ..) => {
+            collector.add_mod(reg);
+        }
+    }
+}
+
+fn arm64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
+    match inst {
+        &Inst::AluRRR { rd, rn, rm, .. } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+            collector.add_use(rm);
+        }
+        &Inst::AluRRRR { rd, rn, rm, ra, .. } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+            collector.add_use(rm);
+            collector.add_use(ra);
+        }
+        &Inst::AluRRImm12 { rd, rn, .. } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+        }
+        &Inst::AluRRImmLogic { rd, rn, .. } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+        }
+        &Inst::AluRRImmShift { rd, rn, .. } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+        }
+        &Inst::AluRRRShift { rd, rn, rm, .. } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+            collector.add_use(rm);
+        }
+        &Inst::AluRRRExtend { rd, rn, rm, .. } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+            collector.add_use(rm);
+        }
+        &Inst::BitRR { rd, rn, .. } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+        }
+        &Inst::ULoad8 { rd, ref mem, .. }
+        | &Inst::SLoad8 { rd, ref mem, .. }
+        | &Inst::ULoad16 { rd, ref mem, .. }
+        | &Inst::SLoad16 { rd, ref mem, .. }
+        | &Inst::ULoad32 { rd, ref mem, .. }
+        | &Inst::SLoad32 { rd, ref mem, .. }
+        | &Inst::ULoad64 { rd, ref mem, .. } => {
+            collector.add_def(rd);
+            memarg_regs(mem, collector);
+        }
+        &Inst::Store8 { rd, ref mem, .. }
+        | &Inst::Store16 { rd, ref mem, .. }
+        | &Inst::Store32 { rd, ref mem, .. }
+        | &Inst::Store64 { rd, ref mem, .. } => {
+            collector.add_use(rd);
+            memarg_regs(mem, collector);
+        }
+        &Inst::StoreP64 {
+            rt, rt2, ref mem, ..
+        } => {
+            collector.add_use(rt);
+            collector.add_use(rt2);
+            pairmemarg_regs(mem, collector);
+        }
+        &Inst::LoadP64 {
+            rt, rt2, ref mem, ..
+        } => {
+            collector.add_def(rt);
+            collector.add_def(rt2);
+            pairmemarg_regs(mem, collector);
+        }
+        &Inst::Mov { rd, rm } => {
+            collector.add_def(rd);
+            collector.add_use(rm);
+        }
+        &Inst::Mov32 { rd, rm } => {
+            collector.add_def(rd);
+            collector.add_use(rm);
+        }
+        &Inst::MovZ { rd, .. } | &Inst::MovN { rd, .. } => {
+            collector.add_def(rd);
+        }
+        &Inst::MovK { rd, .. } => {
+            collector.add_mod(rd);
+        }
+        &Inst::CSel { rd, rn, rm, .. } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+            collector.add_use(rm);
+        }
+        &Inst::CSet { rd, .. } => {
+            collector.add_def(rd);
+        }
+        &Inst::FpuMove64 { rd, rn } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+        }
+        &Inst::FpuRR { rd, rn, .. } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+        }
+        &Inst::FpuRRR { rd, rn, rm, .. } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+            collector.add_use(rm);
+        }
+        &Inst::FpuRRRR { rd, rn, rm, ra, .. } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+            collector.add_use(rm);
+            collector.add_use(ra);
+        }
+        &Inst::FpuCmp32 { rn, rm } | &Inst::FpuCmp64 { rn, rm } => {
+            collector.add_use(rn);
+            collector.add_use(rm);
+        }
+        &Inst::FpuLoad32 { rd, ref mem, .. } => {
+            collector.add_def(rd);
+            memarg_regs(mem, collector);
+        }
+        &Inst::FpuLoad64 { rd, ref mem, .. } => {
+            collector.add_def(rd);
+            memarg_regs(mem, collector);
+        }
+        &Inst::FpuLoad128 { rd, ref mem, .. } => {
+            collector.add_def(rd);
+            memarg_regs(mem, collector);
+        }
+        &Inst::FpuStore32 { rd, ref mem, .. } => {
+            collector.add_use(rd);
+            memarg_regs(mem, collector);
+        }
+        &Inst::FpuStore64 { rd, ref mem, .. } => {
+            collector.add_use(rd);
+            memarg_regs(mem, collector);
+        }
+        &Inst::FpuStore128 { rd, ref mem, .. } => {
+            collector.add_use(rd);
+            memarg_regs(mem, collector);
+        }
+        &Inst::LoadFpuConst32 { rd, .. } | &Inst::LoadFpuConst64 { rd, .. } => {
+            collector.add_def(rd);
+        }
+        &Inst::FpuToInt { rd, rn, .. } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+        }
+        &Inst::IntToFpu { rd, rn, .. } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+        }
+        &Inst::FpuCSel32 { rd, rn, rm, .. } | &Inst::FpuCSel64 { rd, rn, rm, .. } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+            collector.add_use(rm);
+        }
+        &Inst::FpuRound { rd, rn, .. } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+        }
+        &Inst::MovToVec64 { rd, rn } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+        }
+        &Inst::MovFromVec64 { rd, rn } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+        }
+        &Inst::VecRRR { rd, rn, rm, .. } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+            collector.add_use(rm);
+        }
+        &Inst::MovToNZCV { rn } => {
+            collector.add_use(rn);
+        }
+        &Inst::MovFromNZCV { rd } => {
+            collector.add_def(rd);
+        }
+        &Inst::CondSet { rd, .. } => {
+            collector.add_def(rd);
+        }
+        &Inst::Extend { rd, rn, .. } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+        }
+        &Inst::Jump { .. } | &Inst::Ret { .. } | &Inst::EpiloguePlaceholder { .. } => {}
+        &Inst::Call {
+            ref uses, ref defs, ..
+        } => {
+            collector.add_uses(uses);
+            collector.add_defs(defs);
+        }
+        &Inst::CallInd {
+            ref uses,
+            ref defs,
+            rn,
+            ..
+        } => {
+            collector.add_uses(uses);
+            collector.add_defs(defs);
+            collector.add_use(rn);
+        }
+        &Inst::CondBr { ref kind, .. }
+        | &Inst::CondBrLowered { ref kind, .. }
+        | &Inst::CondBrLoweredCompound { ref kind, .. } => match kind {
+            CondBrKind::Zero(rt) | CondBrKind::NotZero(rt) => {
+                collector.add_use(*rt);
+            }
+            CondBrKind::Cond(_) => {}
+        },
+        &Inst::IndirectBr { rn, .. } => {
+            collector.add_use(rn);
+        }
+        &Inst::Nop | Inst::Nop4 => {}
+        &Inst::Brk => {}
+        &Inst::Udf { .. } => {}
+        &Inst::Adr { rd, .. } => {
+            collector.add_def(rd);
+        }
+        &Inst::Word4 { .. } | &Inst::Word8 { .. } => {}
+        &Inst::JTSequence {
+            ridx, rtmp1, rtmp2, ..
+        } => {
+            collector.add_use(ridx);
+            collector.add_def(rtmp1);
+            collector.add_def(rtmp2);
+        }
+        &Inst::LoadConst64 { rd, .. } | &Inst::LoadExtName { rd, .. } => {
+            collector.add_def(rd);
+        }
+    }
+}
+
+//=============================================================================
+// Instructions: map_regs
+
+fn arm64_map_regs(
+    inst: &mut Inst,
+    pre_map: &RegallocMap<VirtualReg, RealReg>,
+    post_map: &RegallocMap<VirtualReg, RealReg>,
+) {
+    fn map(m: &RegallocMap<VirtualReg, RealReg>, r: Reg) -> Reg {
+        if r.is_virtual() {
+            m.get(&r.to_virtual_reg()).cloned().unwrap().to_reg()
+        } else {
+            r
+        }
+    }
+
+    fn map_wr(m: &RegallocMap<VirtualReg, RealReg>, r: Writable<Reg>) -> Writable<Reg> {
+        Writable::from_reg(map(m, r.to_reg()))
+    }
+
+    fn map_mem(u: &RegallocMap<VirtualReg, RealReg>, mem: &MemArg) -> MemArg {
+        // N.B.: we take only the pre-map here, but this is OK because the
+        // only addressing modes that update registers (pre/post-increment on
+        // ARM64) both read and write registers, so they are "mods" rather
+        // than "defs", so must be the same in both the pre- and post-map.
+        match mem {
+            &MemArg::Unscaled(reg, simm9) => MemArg::Unscaled(map(u, reg), simm9),
+            &MemArg::UnsignedOffset(reg, uimm12) => MemArg::UnsignedOffset(map(u, reg), uimm12),
+            &MemArg::RegReg(r1, r2) => MemArg::RegReg(map(u, r1), map(u, r2)),
+            &MemArg::RegScaled(r1, r2, ty) => MemArg::RegScaled(map(u, r1), map(u, r2), ty),
+            &MemArg::RegScaledExtended(r1, r2, ty, op) => {
+                MemArg::RegScaledExtended(map(u, r1), map(u, r2), ty, op)
+            }
+            &MemArg::Label(ref l) => MemArg::Label(l.clone()),
+            &MemArg::PreIndexed(r, simm9) => MemArg::PreIndexed(map_wr(u, r), simm9),
+            &MemArg::PostIndexed(r, simm9) => MemArg::PostIndexed(map_wr(u, r), simm9),
+            &MemArg::FPOffset(off) => MemArg::FPOffset(off),
+            &MemArg::SPOffset(off) => MemArg::SPOffset(off),
+        }
+    }
+
+    fn map_pairmem(u: &RegallocMap<VirtualReg, RealReg>, mem: &PairMemArg) -> PairMemArg {
+        match mem {
+            &PairMemArg::SignedOffset(reg, simm7) => PairMemArg::SignedOffset(map(u, reg), simm7),
+            &PairMemArg::PreIndexed(reg, simm7) => PairMemArg::PreIndexed(map_wr(u, reg), simm7),
+            &PairMemArg::PostIndexed(reg, simm7) => PairMemArg::PostIndexed(map_wr(u, reg), simm7),
+        }
+    }
+
+    fn map_br(u: &RegallocMap<VirtualReg, RealReg>, br: &CondBrKind) -> CondBrKind {
+        match br {
+            &CondBrKind::Zero(reg) => CondBrKind::Zero(map(u, reg)),
+            &CondBrKind::NotZero(reg) => CondBrKind::NotZero(map(u, reg)),
+            &CondBrKind::Cond(c) => CondBrKind::Cond(c),
+        }
+    }
+
+    let u = pre_map; // For brevity below.
+    let d = post_map;
+
+    let newval = match inst {
+        &mut Inst::AluRRR { alu_op, rd, rn, rm } => Inst::AluRRR {
+            alu_op,
+            rd: map_wr(d, rd),
+            rn: map(u, rn),
+            rm: map(u, rm),
+        },
+        &mut Inst::AluRRRR {
+            alu_op,
+            rd,
+            rn,
+            rm,
+            ra,
+        } => Inst::AluRRRR {
+            alu_op,
+            rd: map_wr(d, rd),
+            rn: map(u, rn),
+            rm: map(u, rm),
+            ra: map(u, ra),
+        },
+        &mut Inst::AluRRImm12 {
+            alu_op,
+            rd,
+            rn,
+            ref imm12,
+        } => Inst::AluRRImm12 {
+            alu_op,
+            rd: map_wr(d, rd),
+            rn: map(u, rn),
+            imm12: imm12.clone(),
+        },
+        &mut Inst::AluRRImmLogic {
+            alu_op,
+            rd,
+            rn,
+            ref imml,
+        } => Inst::AluRRImmLogic {
+            alu_op,
+            rd: map_wr(d, rd),
+            rn: map(u, rn),
+            imml: imml.clone(),
+        },
+        &mut Inst::AluRRImmShift {
+            alu_op,
+            rd,
+            rn,
+            ref immshift,
+        } => Inst::AluRRImmShift {
+            alu_op,
+            rd: map_wr(d, rd),
+            rn: map(u, rn),
+            immshift: immshift.clone(),
+        },
+        &mut Inst::AluRRRShift {
+            alu_op,
+            rd,
+            rn,
+            rm,
+            ref shiftop,
+        } => Inst::AluRRRShift {
+            alu_op,
+            rd: map_wr(d, rd),
+            rn: map(u, rn),
+            rm: map(u, rm),
+            shiftop: shiftop.clone(),
+        },
+        &mut Inst::AluRRRExtend {
+            alu_op,
+            rd,
+            rn,
+            rm,
+            ref extendop,
+        } => Inst::AluRRRExtend {
+            alu_op,
+            rd: map_wr(d, rd),
+            rn: map(u, rn),
+            rm: map(u, rm),
+            extendop: extendop.clone(),
+        },
+        &mut Inst::BitRR { op, rd, rn } => Inst::BitRR {
+            op,
+            rd: map_wr(d, rd),
+            rn: map(u, rn),
+        },
+        &mut Inst::ULoad8 {
+            rd,
+            ref mem,
+            srcloc,
+        } => Inst::ULoad8 {
+            rd: map_wr(d, rd),
+            mem: map_mem(u, mem),
+            srcloc,
+        },
+        &mut Inst::SLoad8 {
+            rd,
+            ref mem,
+            srcloc,
+        } => Inst::SLoad8 {
+            rd: map_wr(d, rd),
+            mem: map_mem(u, mem),
+            srcloc,
+        },
+        &mut Inst::ULoad16 {
+            rd,
+            ref mem,
+            srcloc,
+        } => Inst::ULoad16 {
+            rd: map_wr(d, rd),
+            mem: map_mem(u, mem),
+            srcloc,
+        },
+        &mut Inst::SLoad16 {
+            rd,
+            ref mem,
+            srcloc,
+        } => Inst::SLoad16 {
+            rd: map_wr(d, rd),
+            mem: map_mem(u, mem),
+            srcloc,
+        },
+        &mut Inst::ULoad32 {
+            rd,
+            ref mem,
+            srcloc,
+        } => Inst::ULoad32 {
+            rd: map_wr(d, rd),
+            mem: map_mem(u, mem),
+            srcloc,
+        },
+        &mut Inst::SLoad32 {
+            rd,
+            ref mem,
+            srcloc,
+        } => Inst::SLoad32 {
+            rd: map_wr(d, rd),
+            mem: map_mem(u, mem),
+            srcloc,
+        },
+        &mut Inst::ULoad64 {
+            rd,
+            ref mem,
+            srcloc,
+        } => Inst::ULoad64 {
+            rd: map_wr(d, rd),
+            mem: map_mem(u, mem),
+            srcloc,
+        },
+        &mut Inst::Store8 {
+            rd,
+            ref mem,
+            srcloc,
+        } => Inst::Store8 {
+            rd: map(u, rd),
+            mem: map_mem(u, mem),
+            srcloc,
+        },
+        &mut Inst::Store16 {
+            rd,
+            ref mem,
+            srcloc,
+        } => Inst::Store16 {
+            rd: map(u, rd),
+            mem: map_mem(u, mem),
+            srcloc,
+        },
+        &mut Inst::Store32 {
+            rd,
+            ref mem,
+            srcloc,
+        } => Inst::Store32 {
+            rd: map(u, rd),
+            mem: map_mem(u, mem),
+            srcloc,
+        },
+        &mut Inst::Store64 {
+            rd,
+            ref mem,
+            srcloc,
+        } => Inst::Store64 {
+            rd: map(u, rd),
+            mem: map_mem(u, mem),
+            srcloc,
+        },
+        &mut Inst::StoreP64 { rt, rt2, ref mem } => Inst::StoreP64 {
+            rt: map(u, rt),
+            rt2: map(u, rt2),
+            mem: map_pairmem(u, mem),
+        },
+        &mut Inst::LoadP64 { rt, rt2, ref mem } => Inst::LoadP64 {
+            rt: map_wr(d, rt),
+            rt2: map_wr(d, rt2),
+            mem: map_pairmem(u, mem),
+        },
+        &mut Inst::Mov { rd, rm } => Inst::Mov {
+            rd: map_wr(d, rd),
+            rm: map(u, rm),
+        },
+        &mut Inst::Mov32 { rd, rm } => Inst::Mov32 {
+            rd: map_wr(d, rd),
+            rm: map(u, rm),
+        },
+        &mut Inst::MovZ { rd, ref imm } => Inst::MovZ {
+            rd: map_wr(d, rd),
+            imm: imm.clone(),
+        },
+        &mut Inst::MovN { rd, ref imm } => Inst::MovN {
+            rd: map_wr(d, rd),
+            imm: imm.clone(),
+        },
+        &mut Inst::MovK { rd, ref imm } => Inst::MovK {
+            rd: map_wr(d, rd),
+            imm: imm.clone(),
+        },
+        &mut Inst::CSel { rd, rn, rm, cond } => Inst::CSel {
+            cond,
+            rd: map_wr(d, rd),
+            rn: map(u, rn),
+            rm: map(u, rm),
+        },
+        &mut Inst::CSet { rd, cond } => Inst::CSet {
+            cond,
+            rd: map_wr(d, rd),
+        },
+        &mut Inst::FpuMove64 { rd, rn } => Inst::FpuMove64 {
+            rd: map_wr(d, rd),
+            rn: map(u, rn),
+        },
+        &mut Inst::FpuRR { fpu_op, rd, rn } => Inst::FpuRR {
+            fpu_op,
+            rd: map_wr(d, rd),
+            rn: map(u, rn),
+        },
+        &mut Inst::FpuRRR { fpu_op, rd, rn, rm } => Inst::FpuRRR {
+            fpu_op,
+            rd: map_wr(d, rd),
+            rn: map(u, rn),
+            rm: map(u, rm),
+        },
+        &mut Inst::FpuRRRR {
+            fpu_op,
+            rd,
+            rn,
+            rm,
+            ra,
+        } => Inst::FpuRRRR {
+            fpu_op,
+            rd: map_wr(d, rd),
+            rn: map(u, rn),
+            rm: map(u, rm),
+            ra: map(u, ra),
+        },
+        &mut Inst::FpuCmp32 { rn, rm } => Inst::FpuCmp32 {
+            rn: map(u, rn),
+            rm: map(u, rm),
+        },
+        &mut Inst::FpuCmp64 { rn, rm } => Inst::FpuCmp64 {
+            rn: map(u, rn),
+            rm: map(u, rm),
+        },
+        &mut Inst::FpuLoad32 {
+            rd,
+            ref mem,
+            srcloc,
+        } => Inst::FpuLoad32 {
+            rd: map_wr(d, rd),
+            mem: map_mem(u, mem),
+            srcloc,
+        },
+        &mut Inst::FpuLoad64 {
+            rd,
+            ref mem,
+            srcloc,
+        } => Inst::FpuLoad64 {
+            rd: map_wr(d, rd),
+            mem: map_mem(u, mem),
+            srcloc,
+        },
+        &mut Inst::FpuLoad128 {
+            rd,
+            ref mem,
+            srcloc,
+        } => Inst::FpuLoad64 {
+            rd: map_wr(d, rd),
+            mem: map_mem(u, mem),
+            srcloc,
+        },
+        &mut Inst::FpuStore32 {
+            rd,
+            ref mem,
+            srcloc,
+        } => Inst::FpuStore32 {
+            rd: map(u, rd),
+            mem: map_mem(u, mem),
+            srcloc,
+        },
+        &mut Inst::FpuStore64 {
+            rd,
+            ref mem,
+            srcloc,
+        } => Inst::FpuStore64 {
+            rd: map(u, rd),
+            mem: map_mem(u, mem),
+            srcloc,
+        },
+        &mut Inst::FpuStore128 {
+            rd,
+            ref mem,
+            srcloc,
+        } => Inst::FpuStore64 {
+            rd: map(u, rd),
+            mem: map_mem(u, mem),
+            srcloc,
+        },
+        &mut Inst::LoadFpuConst32 { rd, const_data } => Inst::LoadFpuConst32 {
+            rd: map_wr(d, rd),
+            const_data,
+        },
+        &mut Inst::LoadFpuConst64 { rd, const_data } => Inst::LoadFpuConst64 {
+            rd: map_wr(d, rd),
+            const_data,
+        },
+        &mut Inst::FpuToInt { op, rd, rn } => Inst::FpuToInt {
+            op,
+            rd: map_wr(d, rd),
+            rn: map(u, rn),
+        },
+        &mut Inst::IntToFpu { op, rd, rn } => Inst::IntToFpu {
+            op,
+            rd: map_wr(d, rd),
+            rn: map(u, rn),
+        },
+        &mut Inst::FpuCSel32 { rd, rn, rm, cond } => Inst::FpuCSel32 {
+            cond,
+            rd: map_wr(d, rd),
+            rn: map(u, rn),
+            rm: map(u, rm),
+        },
+        &mut Inst::FpuCSel64 { rd, rn, rm, cond } => Inst::FpuCSel64 {
+            cond,
+            rd: map_wr(d, rd),
+            rn: map(u, rn),
+            rm: map(u, rm),
+        },
+        &mut Inst::FpuRound { op, rd, rn } => Inst::FpuRound {
+            op,
+            rd: map_wr(d, rd),
+            rn: map(u, rn),
+        },
+        &mut Inst::MovToVec64 { rd, rn } => Inst::MovToVec64 {
+            rd: map_wr(d, rd),
+            rn: map(u, rn),
+        },
+        &mut Inst::MovFromVec64 { rd, rn } => Inst::MovFromVec64 {
+            rd: map_wr(d, rd),
+            rn: map(u, rn),
+        },
+        &mut Inst::VecRRR { rd, rn, rm, alu_op } => Inst::VecRRR {
+            rd: map_wr(d, rd),
+            rn: map(u, rn),
+            rm: map(u, rm),
+            alu_op,
+        },
+        &mut Inst::MovToNZCV { rn } => Inst::MovToNZCV { rn: map(u, rn) },
+        &mut Inst::MovFromNZCV { rd } => Inst::MovFromNZCV { rd: map_wr(d, rd) },
+        &mut Inst::CondSet { rd, cond } => Inst::CondSet {
+            rd: map_wr(d, rd),
+            cond,
+        },
+        &mut Inst::Extend {
+            rd,
+            rn,
+            signed,
+            from_bits,
+            to_bits,
+        } => Inst::Extend {
+            rd: map_wr(d, rd),
+            rn: map(u, rn),
+            signed,
+            from_bits,
+            to_bits,
+        },
+        &mut Inst::Jump { dest } => Inst::Jump { dest },
+        &mut Inst::Call {
+            ref uses,
+            ref defs,
+            ref dest,
+            loc,
+            opcode,
+        } => {
+            let uses = uses.map(|r| map(u, *r));
+            let defs = defs.map(|r| map_wr(d, *r));
+            let dest = dest.clone();
+            Inst::Call {
+                dest,
+                uses,
+                defs,
+                loc,
+                opcode,
+            }
+        }
+        &mut Inst::Ret {} => Inst::Ret {},
+        &mut Inst::EpiloguePlaceholder {} => Inst::EpiloguePlaceholder {},
+        &mut Inst::CallInd {
+            ref uses,
+            ref defs,
+            rn,
+            loc,
+            opcode,
+        } => {
+            let uses = uses.map(|r| map(u, *r));
+            let defs = defs.map(|r| map_wr(d, *r));
+            Inst::CallInd {
+                uses,
+                defs,
+                rn: map(u, rn),
+                loc,
+                opcode,
+            }
+        }
+        &mut Inst::CondBr {
+            taken,
+            not_taken,
+            kind,
+        } => Inst::CondBr {
+            taken,
+            not_taken,
+            kind: map_br(u, &kind),
+        },
+        &mut Inst::CondBrLowered { target, kind } => Inst::CondBrLowered {
+            target,
+            kind: map_br(u, &kind),
+        },
+        &mut Inst::CondBrLoweredCompound {
+            taken,
+            not_taken,
+            kind,
+        } => Inst::CondBrLoweredCompound {
+            taken,
+            not_taken,
+            kind: map_br(u, &kind),
+        },
+        &mut Inst::IndirectBr { rn, ref targets } => Inst::IndirectBr {
+            rn: map(u, rn),
+            targets: targets.clone(),
+        },
+        &mut Inst::Nop => Inst::Nop,
+        &mut Inst::Nop4 => Inst::Nop4,
+        &mut Inst::Brk => Inst::Brk,
+        &mut Inst::Udf { trap_info } => Inst::Udf { trap_info },
+        &mut Inst::Adr { rd, ref label } => Inst::Adr {
+            rd: map_wr(d, rd),
+            label: label.clone(),
+        },
+        &mut Inst::Word4 { data } => Inst::Word4 { data },
+        &mut Inst::Word8 { data } => Inst::Word8 { data },
+        &mut Inst::JTSequence {
+            ridx,
+            rtmp1,
+            rtmp2,
+            ref targets,
+            ref targets_for_term,
+        } => Inst::JTSequence {
+            targets: targets.clone(),
+            targets_for_term: targets_for_term.clone(),
+            ridx: map(u, ridx),
+            rtmp1: map_wr(d, rtmp1),
+            rtmp2: map_wr(d, rtmp2),
+        },
+        &mut Inst::LoadConst64 { rd, const_data } => Inst::LoadConst64 {
+            rd: map_wr(d, rd),
+            const_data,
+        },
+        &mut Inst::LoadExtName {
+            rd,
+            ref name,
+            offset,
+            srcloc,
+        } => Inst::LoadExtName {
+            rd: map_wr(d, rd),
+            name: name.clone(),
+            offset,
+            srcloc,
+        },
+    };
+    *inst = newval;
+}
+
+//=============================================================================
+// Instructions: misc functions and external interface
+
+impl MachInst for Inst {
+    fn get_regs(&self, collector: &mut RegUsageCollector) {
+        arm64_get_regs(self, collector)
+    }
+
+    fn map_regs(
+        &mut self,
+        pre_map: &RegallocMap<VirtualReg, RealReg>,
+        post_map: &RegallocMap<VirtualReg, RealReg>,
+    ) {
+        arm64_map_regs(self, pre_map, post_map);
+    }
+
+    fn is_move(&self) -> Option<(Writable<Reg>, Reg)> {
+        match self {
+            &Inst::Mov { rd, rm } => Some((rd, rm)),
+            &Inst::FpuMove64 { rd, rn } => Some((rd, rn)),
+            _ => None,
+        }
+    }
+
+    fn is_epilogue_placeholder(&self) -> bool {
+        if let Inst::EpiloguePlaceholder { .. } = self {
+            true
+        } else {
+            false
+        }
+    }
+
+    fn is_term<'a>(&'a self) -> MachTerminator<'a> {
+        match self {
+            &Inst::Ret {} | &Inst::EpiloguePlaceholder {} => MachTerminator::Ret,
+            &Inst::Jump { dest } => MachTerminator::Uncond(dest.as_block_index().unwrap()),
+            &Inst::CondBr {
+                taken, not_taken, ..
+            } => MachTerminator::Cond(
+                taken.as_block_index().unwrap(),
+                not_taken.as_block_index().unwrap(),
+            ),
+            &Inst::CondBrLowered { .. } => {
+                // When this is used prior to branch finalization for branches
+                // within an open-coded sequence, i.e. with ResolvedOffsets,
+                // do not consider it a terminator. From the point of view of CFG analysis,
+                // it is part of a black-box single-in single-out region, hence is not
+                // denoted a terminator.
+                MachTerminator::None
+            }
+            &Inst::CondBrLoweredCompound { .. } => {
+                panic!("is_term() called after lowering branches");
+            }
+            &Inst::IndirectBr { ref targets, .. } => MachTerminator::Indirect(&targets[..]),
+            &Inst::JTSequence {
+                ref targets_for_term,
+                ..
+            } => MachTerminator::Indirect(&targets_for_term[..]),
+            _ => MachTerminator::None,
+        }
+    }
+
+    fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Inst {
+        assert!(ty.bits() <= 64); // no vector support yet!
+        Inst::mov(to_reg, from_reg)
+    }
+
+    fn gen_zero_len_nop() -> Inst {
+        Inst::Nop
+    }
+
+    fn gen_nop(preferred_size: usize) -> Inst {
+        // We can't give a NOP (or any insn) < 4 bytes.
+        assert!(preferred_size >= 4);
+        Inst::Nop4
+    }
+
+    fn maybe_direct_reload(&self, _reg: VirtualReg, _slot: SpillSlot) -> Option<Inst> {
+        None
+    }
+
+    fn rc_for_type(ty: Type) -> RegClass {
+        match ty {
+            I8 | I16 | I32 | I64 | B1 | B8 | B16 | B32 | B64 => RegClass::I64,
+            F32 | F64 => RegClass::V128,
+            I128 | B128 => RegClass::V128,
+            IFLAGS | FFLAGS => RegClass::I64,
+            _ => panic!("Unexpected SSA-value type: {}", ty),
+        }
+    }
+
+    fn gen_jump(blockindex: BlockIndex) -> Inst {
+        Inst::Jump {
+            dest: BranchTarget::Block(blockindex),
+        }
+    }
+
+    fn with_block_rewrites(&mut self, block_target_map: &[BlockIndex]) {
+        match self {
+            &mut Inst::Jump { ref mut dest } => {
+                dest.map(block_target_map);
+            }
+            &mut Inst::CondBr {
+                ref mut taken,
+                ref mut not_taken,
+                ..
+            } => {
+                taken.map(block_target_map);
+                not_taken.map(block_target_map);
+            }
+            &mut Inst::CondBrLowered { .. } => {
+                // See note in `is_term()`: this is used in open-coded sequences
+                // within blocks and should be left alone.
+            }
+            &mut Inst::CondBrLoweredCompound { .. } => {
+                panic!("with_block_rewrites called after branch lowering!");
+            }
+            _ => {}
+        }
+    }
+
+    fn with_fallthrough_block(&mut self, fallthrough: Option<BlockIndex>) {
+        match self {
+            &mut Inst::CondBr {
+                taken,
+                not_taken,
+                kind,
+            } => {
+                if taken.as_block_index() == fallthrough
+                    && not_taken.as_block_index() == fallthrough
+                {
+                    *self = Inst::Nop;
+                } else if taken.as_block_index() == fallthrough {
+                    *self = Inst::CondBrLowered {
+                        target: not_taken,
+                        kind: kind.invert(),
+                    };
+                } else if not_taken.as_block_index() == fallthrough {
+                    *self = Inst::CondBrLowered {
+                        target: taken,
+                        kind,
+                    };
+                } else {
+                    // We need a compound sequence (condbr / uncond-br).
+                    *self = Inst::CondBrLoweredCompound {
+                        taken,
+                        not_taken,
+                        kind,
+                    };
+                }
+            }
+            &mut Inst::Jump { dest } => {
+                if dest.as_block_index() == fallthrough {
+                    *self = Inst::Nop;
+                }
+            }
+            _ => {}
+        }
+    }
+
+    fn with_block_offsets(&mut self, my_offset: CodeOffset, targets: &[CodeOffset]) {
+        match self {
+            &mut Inst::CondBrLowered { ref mut target, .. } => {
+                target.lower(targets, my_offset);
+            }
+            &mut Inst::CondBrLoweredCompound {
+                ref mut taken,
+                ref mut not_taken,
+                ..
+            } => {
+                taken.lower(targets, my_offset);
+                not_taken.lower(targets, my_offset + 4);
+            }
+            &mut Inst::Jump { ref mut dest } => {
+                dest.lower(targets, my_offset);
+            }
+            &mut Inst::JTSequence {
+                targets: ref mut t, ..
+            } => {
+                for target in t {
+                    // offset+20: jumptable is 20 bytes into compound sequence.
+                    target.lower(targets, my_offset + 20);
+                }
+            }
+            _ => {}
+        }
+    }
+
+    fn reg_universe() -> RealRegUniverse {
+        create_reg_universe()
+    }
+}
+
+//=============================================================================
+// Pretty-printing of instructions.
+
+fn mem_finalize_for_show(mem: &MemArg, mb_rru: Option<&RealRegUniverse>) -> (String, MemArg) {
+    let (mem_insts, mem) = mem_finalize(0, mem);
+    let mut mem_str = mem_insts
+        .into_iter()
+        .map(|inst| inst.show_rru(mb_rru))
+        .collect::<Vec<_>>()
+        .join(" ; ");
+    if !mem_str.is_empty() {
+        mem_str += " ; ";
+    }
+
+    (mem_str, mem)
+}
+
+impl ShowWithRRU for Inst {
+    fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
+        fn op_is32(alu_op: ALUOp) -> (&'static str, bool) {
+            match alu_op {
+                ALUOp::Add32 => ("add", true),
+                ALUOp::Add64 => ("add", false),
+                ALUOp::Sub32 => ("sub", true),
+                ALUOp::Sub64 => ("sub", false),
+                ALUOp::Orr32 => ("orr", true),
+                ALUOp::Orr64 => ("orr", false),
+                ALUOp::And32 => ("and", true),
+                ALUOp::And64 => ("and", false),
+                ALUOp::Eor32 => ("eor", true),
+                ALUOp::Eor64 => ("eor", false),
+                ALUOp::AddS32 => ("adds", true),
+                ALUOp::AddS64 => ("adds", false),
+                ALUOp::SubS32 => ("subs", true),
+                ALUOp::SubS64 => ("subs", false),
+                ALUOp::MAdd32 => ("madd", true),
+                ALUOp::MAdd64 => ("madd", false),
+                ALUOp::MSub32 => ("msub", true),
+                ALUOp::MSub64 => ("msub", false),
+                ALUOp::SMulH => ("smulh", false),
+                ALUOp::UMulH => ("umulh", false),
+                ALUOp::SDiv64 => ("sdiv", false),
+                ALUOp::UDiv64 => ("udiv", false),
+                ALUOp::AndNot32 => ("bic", true),
+                ALUOp::AndNot64 => ("bic", false),
+                ALUOp::OrrNot32 => ("orn", true),
+                ALUOp::OrrNot64 => ("orn", false),
+                ALUOp::EorNot32 => ("eon", true),
+                ALUOp::EorNot64 => ("eon", false),
+                ALUOp::RotR32 => ("ror", true),
+                ALUOp::RotR64 => ("ror", false),
+                ALUOp::Lsr32 => ("lsr", true),
+                ALUOp::Lsr64 => ("lsr", false),
+                ALUOp::Asr32 => ("asr", true),
+                ALUOp::Asr64 => ("asr", false),
+                ALUOp::Lsl32 => ("lsl", true),
+                ALUOp::Lsl64 => ("lsl", false),
+            }
+        }
+
+        match self {
+            &Inst::Nop => "nop-zero-len".to_string(),
+            &Inst::Nop4 => "nop".to_string(),
+            &Inst::AluRRR { alu_op, rd, rn, rm } => {
+                let (op, is32) = op_is32(alu_op);
+                let rd = show_ireg_sized(rd.to_reg(), mb_rru, is32);
+                let rn = show_ireg_sized(rn, mb_rru, is32);
+                let rm = show_ireg_sized(rm, mb_rru, is32);
+                format!("{} {}, {}, {}", op, rd, rn, rm)
+            }
+            &Inst::AluRRRR {
+                alu_op,
+                rd,
+                rn,
+                rm,
+                ra,
+            } => {
+                let (op, is32) = op_is32(alu_op);
+                let four_args = alu_op != ALUOp::SMulH && alu_op != ALUOp::UMulH;
+                let rd = show_ireg_sized(rd.to_reg(), mb_rru, is32);
+                let rn = show_ireg_sized(rn, mb_rru, is32);
+                let rm = show_ireg_sized(rm, mb_rru, is32);
+                let ra = show_ireg_sized(ra, mb_rru, is32);
+                if four_args {
+                    format!("{} {}, {}, {}, {}", op, rd, rn, rm, ra)
+                } else {
+                    // smulh and umulh have Ra "hard-wired" to the zero register
+                    // and the canonical assembly form has only three regs.
+                    format!("{} {}, {}, {}", op, rd, rn, rm)
+                }
+            }
+            &Inst::AluRRImm12 {
+                alu_op,
+                rd,
+                rn,
+                ref imm12,
+            } => {
+                let (op, is32) = op_is32(alu_op);
+                let rd = show_ireg_sized(rd.to_reg(), mb_rru, is32);
+                let rn = show_ireg_sized(rn, mb_rru, is32);
+
+                if imm12.bits == 0 && alu_op == ALUOp::Add64 {
+                    // special-case MOV (used for moving into SP).
+                    format!("mov {}, {}", rd, rn)
+                } else {
+                    let imm12 = imm12.show_rru(mb_rru);
+                    format!("{} {}, {}, {}", op, rd, rn, imm12)
+                }
+            }
+            &Inst::AluRRImmLogic {
+                alu_op,
+                rd,
+                rn,
+                ref imml,
+            } => {
+                let (op, is32) = op_is32(alu_op);
+                let rd = show_ireg_sized(rd.to_reg(), mb_rru, is32);
+                let rn = show_ireg_sized(rn, mb_rru, is32);
+                let imml = imml.show_rru(mb_rru);
+                format!("{} {}, {}, {}", op, rd, rn, imml)
+            }
+            &Inst::AluRRImmShift {
+                alu_op,
+                rd,
+                rn,
+                ref immshift,
+            } => {
+                let (op, is32) = op_is32(alu_op);
+                let rd = show_ireg_sized(rd.to_reg(), mb_rru, is32);
+                let rn = show_ireg_sized(rn, mb_rru, is32);
+                let immshift = immshift.show_rru(mb_rru);
+                format!("{} {}, {}, {}", op, rd, rn, immshift)
+            }
+            &Inst::AluRRRShift {
+                alu_op,
+                rd,
+                rn,
+                rm,
+                ref shiftop,
+            } => {
+                let (op, is32) = op_is32(alu_op);
+                let rd = show_ireg_sized(rd.to_reg(), mb_rru, is32);
+                let rn = show_ireg_sized(rn, mb_rru, is32);
+                let rm = show_ireg_sized(rm, mb_rru, is32);
+                let shiftop = shiftop.show_rru(mb_rru);
+                format!("{} {}, {}, {}, {}", op, rd, rn, rm, shiftop)
+            }
+            &Inst::AluRRRExtend {
+                alu_op,
+                rd,
+                rn,
+                rm,
+                ref extendop,
+            } => {
+                let (op, is32) = op_is32(alu_op);
+                let rd = show_ireg_sized(rd.to_reg(), mb_rru, is32);
+                let rn = show_ireg_sized(rn, mb_rru, is32);
+                let rm = show_ireg_sized(rm, mb_rru, is32);
+                let extendop = extendop.show_rru(mb_rru);
+                format!("{} {}, {}, {}, {}", op, rd, rn, rm, extendop)
+            }
+            &Inst::BitRR { op, rd, rn } => {
+                let is32 = op.is_32_bit();
+                let op = op.op_str();
+                let rd = show_ireg_sized(rd.to_reg(), mb_rru, is32);
+                let rn = show_ireg_sized(rn, mb_rru, is32);
+                format!("{} {}, {}", op, rd, rn)
+            }
+            &Inst::ULoad8 {
+                rd,
+                ref mem,
+                srcloc: _srcloc,
+            }
+            | &Inst::SLoad8 {
+                rd,
+                ref mem,
+                srcloc: _srcloc,
+            }
+            | &Inst::ULoad16 {
+                rd,
+                ref mem,
+                srcloc: _srcloc,
+            }
+            | &Inst::SLoad16 {
+                rd,
+                ref mem,
+                srcloc: _srcloc,
+            }
+            | &Inst::ULoad32 {
+                rd,
+                ref mem,
+                srcloc: _srcloc,
+            }
+            | &Inst::SLoad32 {
+                rd,
+                ref mem,
+                srcloc: _srcloc,
+            }
+            | &Inst::ULoad64 {
+                rd,
+                ref mem,
+                srcloc: _srcloc,
+                ..
+            } => {
+                let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru);
+
+                let is_unscaled = match &mem {
+                    &MemArg::Unscaled(..) => true,
+                    _ => false,
+                };
+                let (op, is32) = match (self, is_unscaled) {
+                    (&Inst::ULoad8 { .. }, false) => ("ldrb", true),
+                    (&Inst::ULoad8 { .. }, true) => ("ldurb", true),
+                    (&Inst::SLoad8 { .. }, false) => ("ldrsb", false),
+                    (&Inst::SLoad8 { .. }, true) => ("ldursb", false),
+                    (&Inst::ULoad16 { .. }, false) => ("ldrh", true),
+                    (&Inst::ULoad16 { .. }, true) => ("ldurh", true),
+                    (&Inst::SLoad16 { .. }, false) => ("ldrsh", false),
+                    (&Inst::SLoad16 { .. }, true) => ("ldursh", false),
+                    (&Inst::ULoad32 { .. }, false) => ("ldr", true),
+                    (&Inst::ULoad32 { .. }, true) => ("ldur", true),
+                    (&Inst::SLoad32 { .. }, false) => ("ldrsw", false),
+                    (&Inst::SLoad32 { .. }, true) => ("ldursw", false),
+                    (&Inst::ULoad64 { .. }, false) => ("ldr", false),
+                    (&Inst::ULoad64 { .. }, true) => ("ldur", false),
+                    _ => unreachable!(),
+                };
+                let rd = show_ireg_sized(rd.to_reg(), mb_rru, is32);
+                let mem = mem.show_rru(mb_rru);
+                format!("{}{} {}, {}", mem_str, op, rd, mem)
+            }
+            &Inst::Store8 {
+                rd,
+                ref mem,
+                srcloc: _srcloc,
+            }
+            | &Inst::Store16 {
+                rd,
+                ref mem,
+                srcloc: _srcloc,
+            }
+            | &Inst::Store32 {
+                rd,
+                ref mem,
+                srcloc: _srcloc,
+            }
+            | &Inst::Store64 {
+                rd,
+                ref mem,
+                srcloc: _srcloc,
+                ..
+            } => {
+                let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru);
+
+                let is_unscaled = match &mem {
+                    &MemArg::Unscaled(..) => true,
+                    _ => false,
+                };
+                let (op, is32) = match (self, is_unscaled) {
+                    (&Inst::Store8 { .. }, false) => ("strb", true),
+                    (&Inst::Store8 { .. }, true) => ("sturb", true),
+                    (&Inst::Store16 { .. }, false) => ("strh", true),
+                    (&Inst::Store16 { .. }, true) => ("sturh", true),
+                    (&Inst::Store32 { .. }, false) => ("str", true),
+                    (&Inst::Store32 { .. }, true) => ("stur", true),
+                    (&Inst::Store64 { .. }, false) => ("str", false),
+                    (&Inst::Store64 { .. }, true) => ("stur", false),
+                    _ => unreachable!(),
+                };
+                let rd = show_ireg_sized(rd, mb_rru, is32);
+                let mem = mem.show_rru(mb_rru);
+                format!("{}{} {}, {}", mem_str, op, rd, mem)
+            }
+            &Inst::StoreP64 { rt, rt2, ref mem } => {
+                let rt = rt.show_rru(mb_rru);
+                let rt2 = rt2.show_rru(mb_rru);
+                let mem = mem.show_rru_sized(mb_rru, /* size = */ 8);
+                format!("stp {}, {}, {}", rt, rt2, mem)
+            }
+            &Inst::LoadP64 { rt, rt2, ref mem } => {
+                let rt = rt.to_reg().show_rru(mb_rru);
+                let rt2 = rt2.to_reg().show_rru(mb_rru);
+                let mem = mem.show_rru_sized(mb_rru, /* size = */ 8);
+                format!("ldp {}, {}, {}", rt, rt2, mem)
+            }
+            &Inst::Mov { rd, rm } => {
+                let rd = rd.to_reg().show_rru(mb_rru);
+                let rm = rm.show_rru(mb_rru);
+                format!("mov {}, {}", rd, rm)
+            }
+            &Inst::Mov32 { rd, rm } => {
+                let rd = show_ireg_sized(rd.to_reg(), mb_rru, /* is32 = */ true);
+                let rm = show_ireg_sized(rm, mb_rru, /* is32 = */ true);
+                format!("mov {}, {}", rd, rm)
+            }
+            &Inst::MovZ { rd, ref imm } => {
+                let rd = rd.to_reg().show_rru(mb_rru);
+                let imm = imm.show_rru(mb_rru);
+                format!("movz {}, {}", rd, imm)
+            }
+            &Inst::MovN { rd, ref imm } => {
+                let rd = rd.to_reg().show_rru(mb_rru);
+                let imm = imm.show_rru(mb_rru);
+                format!("movn {}, {}", rd, imm)
+            }
+            &Inst::MovK { rd, ref imm } => {
+                let rd = rd.to_reg().show_rru(mb_rru);
+                let imm = imm.show_rru(mb_rru);
+                format!("movk {}, {}", rd, imm)
+            }
+            &Inst::CSel { rd, rn, rm, cond } => {
+                let rd = rd.to_reg().show_rru(mb_rru);
+                let rn = rn.show_rru(mb_rru);
+                let rm = rm.show_rru(mb_rru);
+                let cond = cond.show_rru(mb_rru);
+                format!("csel {}, {}, {}, {}", rd, rn, rm, cond)
+            }
+            &Inst::CSet { rd, cond } => {
+                let rd = rd.to_reg().show_rru(mb_rru);
+                let cond = cond.show_rru(mb_rru);
+                format!("cset {}, {}", rd, cond)
+            }
+            &Inst::FpuMove64 { rd, rn } => {
+                let rd = rd.to_reg().show_rru(mb_rru);
+                let rn = rn.show_rru(mb_rru);
+                format!("mov {}.8b, {}.8b", rd, rn)
+            }
+            &Inst::FpuRR { fpu_op, rd, rn } => {
+                let (op, is32src, is32dst) = match fpu_op {
+                    FPUOp1::Abs32 => ("fabs", true, true),
+                    FPUOp1::Abs64 => ("fabs", false, false),
+                    FPUOp1::Neg32 => ("fneg", true, true),
+                    FPUOp1::Neg64 => ("fneg", false, false),
+                    FPUOp1::Sqrt32 => ("fsqrt", true, true),
+                    FPUOp1::Sqrt64 => ("fsqrt", false, false),
+                    FPUOp1::Cvt32To64 => ("fcvt", true, false),
+                    FPUOp1::Cvt64To32 => ("fcvt", false, true),
+                };
+                let rd = show_freg_sized(rd.to_reg(), mb_rru, is32dst);
+                let rn = show_freg_sized(rn, mb_rru, is32src);
+                format!("{} {}, {}", op, rd, rn)
+            }
+            &Inst::FpuRRR { fpu_op, rd, rn, rm } => {
+                let (op, is32) = match fpu_op {
+                    FPUOp2::Add32 => ("fadd", true),
+                    FPUOp2::Add64 => ("fadd", false),
+                    FPUOp2::Sub32 => ("fsub", true),
+                    FPUOp2::Sub64 => ("fsub", false),
+                    FPUOp2::Mul32 => ("fmul", true),
+                    FPUOp2::Mul64 => ("fmul", false),
+                    FPUOp2::Div32 => ("fdiv", true),
+                    FPUOp2::Div64 => ("fdiv", false),
+                    FPUOp2::Max32 => ("fmax", true),
+                    FPUOp2::Max64 => ("fmax", false),
+                    FPUOp2::Min32 => ("fmin", true),
+                    FPUOp2::Min64 => ("fmin", false),
+                };
+                let rd = show_freg_sized(rd.to_reg(), mb_rru, is32);
+                let rn = show_freg_sized(rn, mb_rru, is32);
+                let rm = show_freg_sized(rm, mb_rru, is32);
+                format!("{} {}, {}, {}", op, rd, rn, rm)
+            }
+            &Inst::FpuRRRR {
+                fpu_op,
+                rd,
+                rn,
+                rm,
+                ra,
+            } => {
+                let (op, is32) = match fpu_op {
+                    FPUOp3::MAdd32 => ("fmadd", true),
+                    FPUOp3::MAdd64 => ("fmadd", false),
+                };
+                let rd = show_freg_sized(rd.to_reg(), mb_rru, is32);
+                let rn = show_freg_sized(rn, mb_rru, is32);
+                let rm = show_freg_sized(rm, mb_rru, is32);
+                let ra = show_freg_sized(ra, mb_rru, is32);
+                format!("{} {}, {}, {}, {}", op, rd, rn, rm, ra)
+            }
+            &Inst::FpuCmp32 { rn, rm } => {
+                let rn = show_freg_sized(rn, mb_rru, /* is32 = */ true);
+                let rm = show_freg_sized(rm, mb_rru, /* is32 = */ true);
+                format!("fcmp {}, {}", rn, rm)
+            }
+            &Inst::FpuCmp64 { rn, rm } => {
+                let rn = show_freg_sized(rn, mb_rru, /* is32 = */ false);
+                let rm = show_freg_sized(rm, mb_rru, /* is32 = */ false);
+                format!("fcmp {}, {}", rn, rm)
+            }
+            &Inst::FpuLoad32 { rd, ref mem, .. } => {
+                let rd = show_freg_sized(rd.to_reg(), mb_rru, /* is32 = */ true);
+                let mem = mem.show_rru_sized(mb_rru, /* size = */ 4);
+                format!("ldr {}, {}", rd, mem)
+            }
+            &Inst::FpuLoad64 { rd, ref mem, .. } => {
+                let rd = show_freg_sized(rd.to_reg(), mb_rru, /* is32 = */ false);
+                let mem = mem.show_rru_sized(mb_rru, /* size = */ 8);
+                format!("ldr {}, {}", rd, mem)
+            }
+            &Inst::FpuLoad128 { rd, ref mem, .. } => {
+                let rd = rd.to_reg().show_rru(mb_rru);
+                let rd = "q".to_string() + &rd[1..];
+                let mem = mem.show_rru_sized(mb_rru, /* size = */ 8);
+                format!("ldr {}, {}", rd, mem)
+            }
+            &Inst::FpuStore32 { rd, ref mem, .. } => {
+                let rd = show_freg_sized(rd, mb_rru, /* is32 = */ true);
+                let mem = mem.show_rru_sized(mb_rru, /* size = */ 4);
+                format!("str {}, {}", rd, mem)
+            }
+            &Inst::FpuStore64 { rd, ref mem, .. } => {
+                let rd = show_freg_sized(rd, mb_rru, /* is32 = */ false);
+                let mem = mem.show_rru_sized(mb_rru, /* size = */ 8);
+                format!("str {}, {}", rd, mem)
+            }
+            &Inst::FpuStore128 { rd, ref mem, .. } => {
+                let rd = rd.show_rru(mb_rru);
+                let rd = "q".to_string() + &rd[1..];
+                let mem = mem.show_rru_sized(mb_rru, /* size = */ 8);
+                format!("str {}, {}", rd, mem)
+            }
+            &Inst::LoadFpuConst32 { rd, const_data } => {
+                let rd = show_freg_sized(rd.to_reg(), mb_rru, /* is32 = */ true);
+                format!("ldr {}, pc+8 ; b 8 ; data.f32 {}", rd, const_data)
+            }
+            &Inst::LoadFpuConst64 { rd, const_data } => {
+                let rd = show_freg_sized(rd.to_reg(), mb_rru, /* is32 = */ false);
+                format!("ldr {}, pc+8 ; b 12 ; data.f64 {}", rd, const_data)
+            }
+            &Inst::FpuToInt { op, rd, rn } => {
+                let (op, is32src, is32dest) = match op {
+                    FpuToIntOp::F32ToI32 => ("fcvtzs", true, true),
+                    FpuToIntOp::F32ToU32 => ("fcvtzu", true, true),
+                    FpuToIntOp::F32ToI64 => ("fcvtzs", true, false),
+                    FpuToIntOp::F32ToU64 => ("fcvtzu", true, false),
+                    FpuToIntOp::F64ToI32 => ("fcvtzs", false, true),
+                    FpuToIntOp::F64ToU32 => ("fcvtzu", false, true),
+                    FpuToIntOp::F64ToI64 => ("fcvtzs", false, false),
+                    FpuToIntOp::F64ToU64 => ("fcvtzu", false, false),
+                };
+                let rd = show_ireg_sized(rd.to_reg(), mb_rru, is32dest);
+                let rn = show_freg_sized(rn, mb_rru, is32src);
+                format!("{} {}, {}", op, rd, rn)
+            }
+            &Inst::IntToFpu { op, rd, rn } => {
+                let (op, is32src, is32dest) = match op {
+                    IntToFpuOp::I32ToF32 => ("scvtf", true, true),
+                    IntToFpuOp::U32ToF32 => ("ucvtf", true, true),
+                    IntToFpuOp::I64ToF32 => ("scvtf", false, true),
+                    IntToFpuOp::U64ToF32 => ("ucvtf", false, true),
+                    IntToFpuOp::I32ToF64 => ("scvtf", true, false),
+                    IntToFpuOp::U32ToF64 => ("ucvtf", true, false),
+                    IntToFpuOp::I64ToF64 => ("scvtf", false, false),
+                    IntToFpuOp::U64ToF64 => ("ucvtf", false, false),
+                };
+                let rd = show_freg_sized(rd.to_reg(), mb_rru, is32dest);
+                let rn = show_ireg_sized(rn, mb_rru, is32src);
+                format!("{} {}, {}", op, rd, rn)
+            }
+            &Inst::FpuCSel32 { rd, rn, rm, cond } => {
+                let rd = show_freg_sized(rd.to_reg(), mb_rru, /* is32 = */ true);
+                let rn = show_freg_sized(rn, mb_rru, /* is32 = */ true);
+                let rm = show_freg_sized(rm, mb_rru, /* is32 = */ true);
+                let cond = cond.show_rru(mb_rru);
+                format!("fcsel {}, {}, {}, {}", rd, rn, rm, cond)
+            }
+            &Inst::FpuCSel64 { rd, rn, rm, cond } => {
+                let rd = show_freg_sized(rd.to_reg(), mb_rru, /* is32 = */ false);
+                let rn = show_freg_sized(rn, mb_rru, /* is32 = */ false);
+                let rm = show_freg_sized(rm, mb_rru, /* is32 = */ false);
+                let cond = cond.show_rru(mb_rru);
+                format!("fcsel {}, {}, {}, {}", rd, rn, rm, cond)
+            }
+            &Inst::FpuRound { op, rd, rn } => {
+                let (inst, is32) = match op {
+                    FpuRoundMode::Minus32 => ("frintm", true),
+                    FpuRoundMode::Minus64 => ("frintm", false),
+                    FpuRoundMode::Plus32 => ("frintp", true),
+                    FpuRoundMode::Plus64 => ("frintp", false),
+                    FpuRoundMode::Zero32 => ("frintz", true),
+                    FpuRoundMode::Zero64 => ("frintz", false),
+                    FpuRoundMode::Nearest32 => ("frintn", true),
+                    FpuRoundMode::Nearest64 => ("frintn", false),
+                };
+                let rd = show_freg_sized(rd.to_reg(), mb_rru, is32);
+                let rn = show_freg_sized(rn, mb_rru, is32);
+                format!("{} {}, {}", inst, rd, rn)
+            }
+            &Inst::MovToVec64 { rd, rn } => {
+                let rd = rd.to_reg().show_rru(mb_rru);
+                let rn = rn.show_rru(mb_rru);
+                format!("mov {}.d[0], {}", rd, rn)
+            }
+            &Inst::MovFromVec64 { rd, rn } => {
+                let rd = rd.to_reg().show_rru(mb_rru);
+                let rn = rn.show_rru(mb_rru);
+                format!("mov {}, {}.d[0]", rd, rn)
+            }
+            &Inst::VecRRR { rd, rn, rm, alu_op } => {
+                let op = match alu_op {
+                    VecALUOp::SQAddScalar => "sqadd",
+                    VecALUOp::UQAddScalar => "uqadd",
+                    VecALUOp::SQSubScalar => "sqsub",
+                    VecALUOp::UQSubScalar => "uqsub",
+                };
+                let rd = show_vreg_scalar(rd.to_reg(), mb_rru);
+                let rn = show_vreg_scalar(rn, mb_rru);
+                let rm = show_vreg_scalar(rm, mb_rru);
+                format!("{} {}, {}, {}", op, rd, rn, rm)
+            }
+            &Inst::MovToNZCV { rn } => {
+                let rn = rn.show_rru(mb_rru);
+                format!("msr nzcv, {}", rn)
+            }
+            &Inst::MovFromNZCV { rd } => {
+                let rd = rd.to_reg().show_rru(mb_rru);
+                format!("mrs {}, nzcv", rd)
+            }
+            &Inst::CondSet { rd, cond } => {
+                let rd = rd.to_reg().show_rru(mb_rru);
+                let cond = cond.show_rru(mb_rru);
+                format!("cset {}, {}", rd, cond)
+            }
+            &Inst::Extend {
+                rd,
+                rn,
+                signed,
+                from_bits,
+                to_bits,
+            } if from_bits >= 8 => {
+                // Is the destination a 32-bit register? Corresponds to whether
+                // extend-to width is <= 32 bits, *unless* we have an unsigned
+                // 32-to-64-bit extension, which is implemented with a "mov" to a
+                // 32-bit (W-reg) dest, because this zeroes the top 32 bits.
+                let dest_is32 = if !signed && from_bits == 32 && to_bits == 64 {
+                    true
+                } else {
+                    to_bits <= 32
+                };
+                let rd = show_ireg_sized(rd.to_reg(), mb_rru, dest_is32);
+                let rn = show_ireg_sized(rn, mb_rru, from_bits <= 32);
+                let op = match (signed, from_bits, to_bits) {
+                    (false, 8, 32) => "uxtb",
+                    (true, 8, 32) => "sxtb",
+                    (false, 16, 32) => "uxth",
+                    (true, 16, 32) => "sxth",
+                    (false, 8, 64) => "uxtb",
+                    (true, 8, 64) => "sxtb",
+                    (false, 16, 64) => "uxth",
+                    (true, 16, 64) => "sxth",
+                    (false, 32, 64) => "mov", // special case (see above).
+                    (true, 32, 64) => "sxtw",
+                    _ => panic!("Unsupported Extend case: {:?}", self),
+                };
+                format!("{} {}, {}", op, rd, rn)
+            }
+            &Inst::Extend {
+                rd,
+                rn,
+                signed,
+                from_bits,
+                to_bits,
+            } if from_bits == 1 && signed => {
+                let dest_is32 = to_bits <= 32;
+                let zr = if dest_is32 { "wzr" } else { "xzr" };
+                let rd32 = show_ireg_sized(rd.to_reg(), mb_rru, /* is32 = */ true);
+                let rd = show_ireg_sized(rd.to_reg(), mb_rru, dest_is32);
+                let rn = show_ireg_sized(rn, mb_rru, /* is32 = */ true);
+                format!("and {}, {}, #1 ; sub {}, {}, {}", rd32, rn, rd, zr, rd)
+            }
+            &Inst::Extend {
+                rd,
+                rn,
+                signed,
+                from_bits,
+                ..
+            } if from_bits == 1 && !signed => {
+                let rd = show_ireg_sized(rd.to_reg(), mb_rru, /* is32 = */ true);
+                let rn = show_ireg_sized(rn, mb_rru, /* is32 = */ true);
+                format!("and {}, {}, #1", rd, rn)
+            }
+            &Inst::Extend { .. } => {
+                panic!("Unsupported Extend case");
+            }
+            &Inst::Call { dest: _, .. } => format!("bl 0"),
+            &Inst::CallInd { rn, .. } => {
+                let rn = rn.show_rru(mb_rru);
+                format!("blr {}", rn)
+            }
+            &Inst::Ret {} => "ret".to_string(),
+            &Inst::EpiloguePlaceholder {} => "epilogue placeholder".to_string(),
+            &Inst::Jump { ref dest } => {
+                let dest = dest.show_rru(mb_rru);
+                format!("b {}", dest)
+            }
+            &Inst::CondBr {
+                ref taken,
+                ref not_taken,
+                ref kind,
+            } => {
+                let taken = taken.show_rru(mb_rru);
+                let not_taken = not_taken.show_rru(mb_rru);
+                match kind {
+                    &CondBrKind::Zero(reg) => {
+                        let reg = reg.show_rru(mb_rru);
+                        format!("cbz {}, {} ; b {}", reg, taken, not_taken)
+                    }
+                    &CondBrKind::NotZero(reg) => {
+                        let reg = reg.show_rru(mb_rru);
+                        format!("cbnz {}, {} ; b {}", reg, taken, not_taken)
+                    }
+                    &CondBrKind::Cond(c) => {
+                        let c = c.show_rru(mb_rru);
+                        format!("b.{} {} ; b {}", c, taken, not_taken)
+                    }
+                }
+            }
+            &Inst::CondBrLowered {
+                ref target,
+                ref kind,
+            } => {
+                let target = target.show_rru(mb_rru);
+                match &kind {
+                    &CondBrKind::Zero(reg) => {
+                        let reg = reg.show_rru(mb_rru);
+                        format!("cbz {}, {}", reg, target)
+                    }
+                    &CondBrKind::NotZero(reg) => {
+                        let reg = reg.show_rru(mb_rru);
+                        format!("cbnz {}, {}", reg, target)
+                    }
+                    &CondBrKind::Cond(c) => {
+                        let c = c.show_rru(mb_rru);
+                        format!("b.{} {}", c, target)
+                    }
+                }
+            }
+            &Inst::CondBrLoweredCompound {
+                ref taken,
+                ref not_taken,
+                ref kind,
+            } => {
+                let first = Inst::CondBrLowered {
+                    target: taken.clone(),
+                    kind: kind.clone(),
+                };
+                let second = Inst::Jump {
+                    dest: not_taken.clone(),
+                };
+                first.show_rru(mb_rru) + " ; " + &second.show_rru(mb_rru)
+            }
+            &Inst::IndirectBr { rn, .. } => {
+                let rn = rn.show_rru(mb_rru);
+                format!("br {}", rn)
+            }
+            &Inst::Brk => "brk #0".to_string(),
+            &Inst::Udf { .. } => "udf".to_string(),
+            &Inst::Adr { rd, ref label } => {
+                let rd = rd.show_rru(mb_rru);
+                let label = label.show_rru(mb_rru);
+                format!("adr {}, {}", rd, label)
+            }
+            &Inst::Word4 { data } => format!("data.i32 {}", data),
+            &Inst::Word8 { data } => format!("data.i64 {}", data),
+            &Inst::JTSequence {
+                ref targets,
+                ridx,
+                rtmp1,
+                rtmp2,
+                ..
+            } => {
+                let ridx = ridx.show_rru(mb_rru);
+                let rtmp1 = rtmp1.show_rru(mb_rru);
+                let rtmp2 = rtmp2.show_rru(mb_rru);
+                format!(
+                    concat!(
+                        "adr {}, pc+16 ; ",
+                        "ldrsw {}, [{}, {}, LSL 2] ; ",
+                        "add {}, {}, {} ; ",
+                        "br {} ; ",
+                        "jt_entries {:?}"
+                    ),
+                    rtmp1, rtmp2, rtmp1, ridx, rtmp1, rtmp1, rtmp2, rtmp1, targets
+                )
+            }
+            &Inst::LoadConst64 { rd, const_data } => {
+                let rd = rd.show_rru(mb_rru);
+                format!("ldr {}, 8 ; b 12 ; data {:?}", rd, const_data)
+            }
+            &Inst::LoadExtName {
+                rd,
+                ref name,
+                offset,
+                srcloc: _srcloc,
+            } => {
+                let rd = rd.show_rru(mb_rru);
+                format!("ldr {}, 8 ; b 12 ; data {:?} + {}", rd, name, offset)
+            }
+        }
+    }
+}
diff --git a/cranelift/codegen/src/isa/arm64/inst/regs.rs b/cranelift/codegen/src/isa/arm64/inst/regs.rs
new file mode 100644
index 0000000000..31a915410a
--- /dev/null
+++ b/cranelift/codegen/src/isa/arm64/inst/regs.rs
@@ -0,0 +1,273 @@
+//! ARM64 ISA definitions: registers.
+
+#![allow(dead_code)]
+
+use crate::machinst::*;
+
+use regalloc::{
+    RealReg, RealRegUniverse, Reg, RegClass, RegClassInfo, SpillSlot, VirtualReg, Writable,
+    NUM_REG_CLASSES,
+};
+
+use std::string::{String, ToString};
+
+//=============================================================================
+// Registers, the Universe thereof, and printing
+
+#[rustfmt::skip]
+const XREG_INDICES: [u8; 31] = [
+    // X0 - X7
+    32, 33, 34, 35, 36, 37, 38, 39,
+    // X8 - X14
+    40, 41, 42, 43, 44, 45, 46,
+    // X15
+    59,
+    // X16, X17
+    47, 48,
+    // X18
+    60,
+    // X19 - X28
+    49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
+    // X29
+    61,
+    // X30
+    62,
+];
+
+const ZERO_REG_INDEX: u8 = 63;
+
+const SP_REG_INDEX: u8 = 64;
+
+/// Get a reference to an X-register (integer register).
+pub fn xreg(num: u8) -> Reg {
+    assert!(num < 31);
+    Reg::new_real(
+        RegClass::I64,
+        /* enc = */ num,
+        /* index = */ XREG_INDICES[num as usize],
+    )
+}
+
+/// Get a writable reference to an X-register.
+pub fn writable_xreg(num: u8) -> Writable<Reg> {
+    Writable::from_reg(xreg(num))
+}
+
+/// Get a reference to a V-register (vector/FP register).
+pub fn vreg(num: u8) -> Reg {
+    assert!(num < 32);
+    Reg::new_real(RegClass::V128, /* enc = */ num, /* index = */ num)
+}
+
+/// Get a writable reference to a V-register.
+pub fn writable_vreg(num: u8) -> Writable<Reg> {
+    Writable::from_reg(vreg(num))
+}
+
+/// Get a reference to the zero-register.
+pub fn zero_reg() -> Reg {
+    // This should be the same as what xreg(31) returns, except that
+    // we use the special index into the register index space.
+    Reg::new_real(
+        RegClass::I64,
+        /* enc = */ 31,
+        /* index = */ ZERO_REG_INDEX,
+    )
+}
+
+/// Get a writable reference to the zero-register (this discards a result).
+pub fn writable_zero_reg() -> Writable<Reg> {
+    Writable::from_reg(zero_reg())
+}
+
+/// Get a reference to the stack-pointer register.
+pub fn stack_reg() -> Reg {
+    // XSP (stack) and XZR (zero) are logically different registers which have
+    // the same hardware encoding, and whose meaning, in real arm64
+    // instructions, is context-dependent.  For convenience of
+    // universe-construction and for correct printing, we make them be two
+    // different real registers.
+    Reg::new_real(
+        RegClass::I64,
+        /* enc = */ 31,
+        /* index = */ SP_REG_INDEX,
+    )
+}
+
+/// Get a writable reference to the stack-pointer register.
+pub fn writable_stack_reg() -> Writable<Reg> {
+    Writable::from_reg(stack_reg())
+}
+
+/// Get a reference to the link register (x30).
+pub fn link_reg() -> Reg {
+    xreg(30)
+}
+
+/// Get a writable reference to the link register.
+pub fn writable_link_reg() -> Writable<Reg> {
+    Writable::from_reg(link_reg())
+}
+
+/// Get a reference to the frame pointer (x29).
+pub fn fp_reg() -> Reg {
+    xreg(29)
+}
+
+/// Get a writable reference to the frame pointer.
+pub fn writable_fp_reg() -> Writable<Reg> {
+    Writable::from_reg(fp_reg())
+}
+
+/// Get a reference to the "spill temp" register. This register is used to
+/// compute the address of a spill slot when a direct offset addressing mode from
+/// FP is not sufficient (+/- 2^11 words). We exclude this register from regalloc
+/// and reserve it for this purpose for simplicity; otherwise we need a
+/// multi-stage analysis where we first determine how many spill slots we have,
+/// then perhaps remove the reg from the pool and recompute regalloc.
+pub fn spilltmp_reg() -> Reg {
+    xreg(15)
+}
+
+/// Get a writable reference to the spilltmp reg.
+pub fn writable_spilltmp_reg() -> Writable<Reg> {
+    Writable::from_reg(spilltmp_reg())
+}
+
+/// Create the register universe for ARM64.
+pub fn create_reg_universe() -> RealRegUniverse {
+    let mut regs = vec![];
+    let mut allocable_by_class = [None; NUM_REG_CLASSES];
+
+    // Numbering Scheme: we put V-regs first, then X-regs. The X-regs
+    // exclude several registers: x18 (globally reserved for platform-specific
+    // purposes), x29 (frame pointer), x30 (link register), x31 (stack pointer
+    // or zero register, depending on context).
+
+    let v_reg_base = 0u8; // in contiguous real-register index space
+    let v_reg_count = 32;
+    for i in 0u8..v_reg_count {
+        let reg = Reg::new_real(
+            RegClass::V128,
+            /* enc = */ i,
+            /* index = */ v_reg_base + i,
+        )
+        .to_real_reg();
+        let name = format!("v{}", i);
+        regs.push((reg, name));
+    }
+    let v_reg_last = v_reg_base + v_reg_count - 1;
+
+    // Add the X registers. N.B.: the order here must match the order implied
+    // by XREG_INDICES, ZERO_REG_INDEX, and SP_REG_INDEX above.
+
+    let x_reg_base = 32u8; // in contiguous real-register index space
+    let mut x_reg_count = 0;
+    for i in 0u8..32u8 {
+        // See above for excluded registers.
+        if i == 15 || i == 18 || i == 29 || i == 30 || i == 31 {
+            continue;
+        }
+        let reg = Reg::new_real(
+            RegClass::I64,
+            /* enc = */ i,
+            /* index = */ x_reg_base + x_reg_count,
+        )
+        .to_real_reg();
+        let name = format!("x{}", i);
+        regs.push((reg, name));
+        x_reg_count += 1;
+    }
+    let x_reg_last = x_reg_base + x_reg_count - 1;
+
+    allocable_by_class[RegClass::I64.rc_to_usize()] = Some(RegClassInfo {
+        first: x_reg_base as usize,
+        last: x_reg_last as usize,
+        suggested_scratch: Some(XREG_INDICES[13] as usize),
+    });
+    allocable_by_class[RegClass::V128.rc_to_usize()] = Some(RegClassInfo {
+        first: v_reg_base as usize,
+        last: v_reg_last as usize,
+        suggested_scratch: Some(/* V31: */ 31),
+    });
+
+    // Other regs, not available to the allocator.
+    let allocable = regs.len();
+    regs.push((xreg(15).to_real_reg(), "x15".to_string()));
+    regs.push((xreg(18).to_real_reg(), "x18".to_string()));
+    regs.push((fp_reg().to_real_reg(), "fp".to_string()));
+    regs.push((link_reg().to_real_reg(), "lr".to_string()));
+    regs.push((zero_reg().to_real_reg(), "xzr".to_string()));
+    regs.push((stack_reg().to_real_reg(), "sp".to_string()));
+    // FIXME JRS 2020Feb06: unfortunately this pushes the number of real regs
+    // to 65, which is potentially inconvenient from a compiler performance
+    // standpoint.  We could possibly drop back to 64 by "losing" a vector
+    // register in future.
+
+    // Assert sanity: the indices in the register structs must match their
+    // actual indices in the array.
+    for (i, reg) in regs.iter().enumerate() {
+        assert_eq!(i, reg.0.get_index());
+    }
+
+    RealRegUniverse {
+        regs,
+        allocable,
+        allocable_by_class,
+    }
+}
+
+/// If |ireg| denotes an I64-classed reg, make a best-effort attempt to show
+/// its name at the 32-bit size.
+pub fn show_ireg_sized(reg: Reg, mb_rru: Option<&RealRegUniverse>, is32: bool) -> String {
+    let mut s = reg.show_rru(mb_rru);
+    if reg.get_class() != RegClass::I64 || !is32 {
+        // We can't do any better.
+        return s;
+    }
+
+    if reg.is_real() {
+        // Change (eg) "x42" into "w42" as appropriate
+        if reg.get_class() == RegClass::I64 && is32 && s.starts_with("x") {
+            s = "w".to_string() + &s[1..];
+        }
+    } else {
+        // Add a "w" suffix to RegClass::I64 vregs used in a 32-bit role
+        if reg.get_class() == RegClass::I64 && is32 {
+            s = s + &"w";
+        }
+    }
+    s
+}
+
+/// Show a vector register when its use as a 32-bit or 64-bit float is known.
+pub fn show_freg_sized(reg: Reg, mb_rru: Option<&RealRegUniverse>, is32: bool) -> String {
+    let s = reg.show_rru(mb_rru);
+    if reg.get_class() != RegClass::V128 {
+        return s;
+    }
+    let prefix = if is32 { "s" } else { "d" };
+    prefix.to_string() + &s[1..]
+}
+
+/// Show a vector register used in a scalar context.
+pub fn show_vreg_scalar(reg: Reg, mb_rru: Option<&RealRegUniverse>) -> String {
+    let mut s = reg.show_rru(mb_rru);
+    if reg.get_class() != RegClass::V128 {
+        // We can't do any better.
+        return s;
+    }
+
+    if reg.is_real() {
+        // Change (eg) "v0" into "d0".
+        if reg.get_class() == RegClass::V128 && s.starts_with("v") {
+            s = "d".to_string() + &s[1..];
+        }
+    } else {
+        // Add a "d" suffix to RegClass::V128 vregs.
+        if reg.get_class() == RegClass::V128 {
+            s = s + &"d";
+        }
+    }
+    s
+}
diff --git a/cranelift/codegen/src/isa/arm64/mod.rs b/cranelift/codegen/src/isa/arm64/mod.rs
index 2bd6dce476..b6a28a5dbd 100644
--- a/cranelift/codegen/src/isa/arm64/mod.rs
+++ b/cranelift/codegen/src/isa/arm64/mod.rs
@@ -1 +1 @@
-// Empty.
+mod inst;