Merge pull request #1494 from cfallin/arm64-merge

Add new `MachInst` backend and ARM64 support.
2020-04-16 10:02:02 -07:00
parent c268704743 48cf2c2f50
commit 7da6101732
63 changed files with 16668 additions and 322 deletions
--- a/cranelift/codegen/src/isa/aarch64/abi.rs
+++ b/cranelift/codegen/src/isa/aarch64/abi.rs
@@ -0,0 +1,885 @@
+//! Implementation of the standard AArch64 ABI.
+
+use crate::ir;
+use crate::ir::types;
+use crate::ir::types::*;
+use crate::ir::StackSlot;
+use crate::isa;
+use crate::isa::aarch64::inst::*;
+use crate::machinst::*;
+use crate::settings;
+
+use alloc::vec::Vec;
+
+use regalloc::{RealReg, Reg, RegClass, Set, SpillSlot, Writable};
+
+use log::debug;
+
+/// A location for an argument or return value.
+#[derive(Clone, Copy, Debug)]
+enum ABIArg {
+    /// In a real register.
+    Reg(RealReg, ir::Type),
+    /// Arguments only: on stack, at given offset from SP at entry.
+    Stack(i64, ir::Type),
+}
+
+/// AArch64 ABI information shared between body (callee) and caller.
+struct ABISig {
+    args: Vec<ABIArg>,
+    rets: Vec<ABIArg>,
+    stack_arg_space: i64,
+    call_conv: isa::CallConv,
+}
+
+// Spidermonkey specific ABI convention.
+
+/// This is SpiderMonkey's `WasmTableCallSigReg`.
+static BALDRDASH_SIG_REG: u8 = 10;
+
+/// This is SpiderMonkey's `WasmTlsReg`.
+static BALDRDASH_TLS_REG: u8 = 23;
+
+// These two lists represent the registers the JIT may *not* use at any point in generated code.
+//
+// So these are callee-preserved from the JIT's point of view, and every register not in this list
+// has to be caller-preserved by definition.
+//
+// Keep these lists in sync with the NonAllocatableMask set in Spidermonkey's
+// Architecture-arm64.cpp.
+
+// Indexed by physical register number.
+#[rustfmt::skip]
+static BALDRDASH_JIT_CALLEE_SAVED_GPR: &[bool] = &[
+    /* 0 = */ false, false, false, false, false, false, false, false,
+    /* 8 = */ false, false, false, false, false, false, false, false,
+    /* 16 = */ true /* x16 / ip1 */, true /* x17 / ip2 */, true /* x18 / TLS */, false,
+    /* 20 = */ false, false, false, false,
+    /* 24 = */ false, false, false, false,
+    // There should be 28, the pseudo stack pointer in this list, however the wasm stubs trash it
+    // gladly right now.
+    /* 28 = */ false, false, true /* x30 = FP */, true /* x31 = SP */
+];
+
+#[rustfmt::skip]
+static BALDRDASH_JIT_CALLEE_SAVED_FPU: &[bool] = &[
+    /* 0 = */ false, false, false, false, false, false, false, false,
+    /* 8 = */ false, false, false, false, false, false, false, false,
+    /* 16 = */ false, false, false, false, false, false, false, false,
+    /* 24 = */ false, false, false, false, false, false, false, true /* v31 / d31 */
+];
+
+/// Try to fill a Baldrdash register, returning it if it was found.
+fn try_fill_baldrdash_reg(call_conv: isa::CallConv, param: &ir::AbiParam) -> Option<ABIArg> {
+    if call_conv.extends_baldrdash() {
+        match &param.purpose {
+            &ir::ArgumentPurpose::VMContext => {
+                // This is SpiderMonkey's `WasmTlsReg`.
+                Some(ABIArg::Reg(
+                    xreg(BALDRDASH_TLS_REG).to_real_reg(),
+                    ir::types::I64,
+                ))
+            }
+            &ir::ArgumentPurpose::SignatureId => {
+                // This is SpiderMonkey's `WasmTableCallSigReg`.
+                Some(ABIArg::Reg(
+                    xreg(BALDRDASH_SIG_REG).to_real_reg(),
+                    ir::types::I64,
+                ))
+            }
+            _ => None,
+        }
+    } else {
+        None
+    }
+}
+
+/// Process a list of parameters or return values and allocate them to X-regs,
+/// V-regs, and stack slots.
+///
+/// Returns the list of argument locations, and the stack-space used (rounded up
+/// to a 16-byte-aligned boundary).
+fn compute_arg_locs(call_conv: isa::CallConv, params: &[ir::AbiParam]) -> (Vec<ABIArg>, i64) {
+    // See AArch64 ABI (https://c9x.me/compile/bib/abi-arm64.pdf), sections 5.4.
+    let mut next_xreg = 0;
+    let mut next_vreg = 0;
+    let mut next_stack: u64 = 0;
+    let mut ret = vec![];
+    for param in params {
+        // Validate "purpose".
+        match &param.purpose {
+            &ir::ArgumentPurpose::VMContext
+            | &ir::ArgumentPurpose::Normal
+            | &ir::ArgumentPurpose::SignatureId => {}
+            _ => panic!(
+                "Unsupported argument purpose {:?} in signature: {:?}",
+                param.purpose, params
+            ),
+        }
+
+        if in_int_reg(param.value_type) {
+            if let Some(param) = try_fill_baldrdash_reg(call_conv, param) {
+                ret.push(param);
+            } else if next_xreg < 8 {
+                ret.push(ABIArg::Reg(xreg(next_xreg).to_real_reg(), param.value_type));
+                next_xreg += 1;
+            } else {
+                ret.push(ABIArg::Stack(next_stack as i64, param.value_type));
+                next_stack += 8;
+            }
+        } else if in_vec_reg(param.value_type) {
+            if next_vreg < 8 {
+                ret.push(ABIArg::Reg(vreg(next_vreg).to_real_reg(), param.value_type));
+                next_vreg += 1;
+            } else {
+                let size: u64 = match param.value_type {
+                    F32 | F64 => 8,
+                    _ => panic!("Unsupported vector-reg argument type"),
+                };
+                // Align.
+                assert!(size.is_power_of_two());
+                next_stack = (next_stack + size - 1) & !(size - 1);
+                ret.push(ABIArg::Stack(next_stack as i64, param.value_type));
+                next_stack += size;
+            }
+        }
+    }
+
+    next_stack = (next_stack + 15) & !15;
+
+    (ret, next_stack as i64)
+}
+
+impl ABISig {
+    fn from_func_sig(sig: &ir::Signature) -> ABISig {
+        // Compute args and retvals from signature.
+        // TODO: pass in arg-mode or ret-mode. (Does not matter
+        // for the types of arguments/return values that we support.)
+        let (args, stack_arg_space) = compute_arg_locs(sig.call_conv, &sig.params);
+        let (rets, _) = compute_arg_locs(sig.call_conv, &sig.returns);
+
+        // Verify that there are no return values on the stack.
+        assert!(rets.iter().all(|a| match a {
+            &ABIArg::Stack(..) => false,
+            _ => true,
+        }));
+
+        ABISig {
+            args,
+            rets,
+            stack_arg_space,
+            call_conv: sig.call_conv,
+        }
+    }
+}
+
+/// AArch64 ABI object for a function body.
+pub struct AArch64ABIBody {
+    /// signature: arg and retval regs
+    sig: ABISig,
+    /// offsets to each stackslot
+    stackslots: Vec<u32>,
+    /// total stack size of all stackslots
+    stackslots_size: u32,
+    /// clobbered registers, from regalloc.
+    clobbered: Set<Writable<RealReg>>,
+    /// total number of spillslots, from regalloc.
+    spillslots: Option<usize>,
+    /// Total frame size.
+    frame_size: Option<u32>,
+    /// Calling convention this function expects.
+    call_conv: isa::CallConv,
+}
+
+fn in_int_reg(ty: ir::Type) -> bool {
+    match ty {
+        types::I8 | types::I16 | types::I32 | types::I64 => true,
+        types::B1 | types::B8 | types::B16 | types::B32 | types::B64 => true,
+        _ => false,
+    }
+}
+
+fn in_vec_reg(ty: ir::Type) -> bool {
+    match ty {
+        types::F32 | types::F64 => true,
+        _ => false,
+    }
+}
+
+impl AArch64ABIBody {
+    /// Create a new body ABI instance.
+    pub fn new(f: &ir::Function) -> Self {
+        debug!("AArch64 ABI: func signature {:?}", f.signature);
+
+        let sig = ABISig::from_func_sig(&f.signature);
+
+        let call_conv = f.signature.call_conv;
+        // Only these calling conventions are supported.
+        assert!(
+            call_conv == isa::CallConv::SystemV
+                || call_conv == isa::CallConv::Fast
+                || call_conv == isa::CallConv::Cold
+                || call_conv.extends_baldrdash(),
+            "Unsupported calling convention: {:?}",
+            call_conv
+        );
+
+        // Compute stackslot locations and total stackslot size.
+        let mut stack_offset: u32 = 0;
+        let mut stackslots = vec![];
+        for (stackslot, data) in f.stack_slots.iter() {
+            let off = stack_offset;
+            stack_offset += data.size;
+            stack_offset = (stack_offset + 7) & !7;
+            assert_eq!(stackslot.as_u32() as usize, stackslots.len());
+            stackslots.push(off);
+        }
+
+        Self {
+            sig,
+            stackslots,
+            stackslots_size: stack_offset,
+            clobbered: Set::empty(),
+            spillslots: None,
+            frame_size: None,
+            call_conv,
+        }
+    }
+}
+
+fn load_stack(fp_offset: i64, into_reg: Writable<Reg>, ty: Type) -> Inst {
+    let mem = MemArg::FPOffset(fp_offset);
+
+    match ty {
+        types::B1
+        | types::B8
+        | types::I8
+        | types::B16
+        | types::I16
+        | types::B32
+        | types::I32
+        | types::B64
+        | types::I64 => Inst::ULoad64 {
+            rd: into_reg,
+            mem,
+            srcloc: None,
+        },
+        types::F32 => Inst::FpuLoad32 {
+            rd: into_reg,
+            mem,
+            srcloc: None,
+        },
+        types::F64 => Inst::FpuLoad64 {
+            rd: into_reg,
+            mem,
+            srcloc: None,
+        },
+        _ => unimplemented!("load_stack({})", ty),
+    }
+}
+
+fn store_stack(fp_offset: i64, from_reg: Reg, ty: Type) -> Inst {
+    let mem = MemArg::FPOffset(fp_offset);
+
+    match ty {
+        types::B1
+        | types::B8
+        | types::I8
+        | types::B16
+        | types::I16
+        | types::B32
+        | types::I32
+        | types::B64
+        | types::I64 => Inst::Store64 {
+            rd: from_reg,
+            mem,
+            srcloc: None,
+        },
+        types::F32 => Inst::FpuStore32 {
+            rd: from_reg,
+            mem,
+            srcloc: None,
+        },
+        types::F64 => Inst::FpuStore64 {
+            rd: from_reg,
+            mem,
+            srcloc: None,
+        },
+        _ => unimplemented!("store_stack({})", ty),
+    }
+}
+
+fn is_callee_save(call_conv: isa::CallConv, r: RealReg) -> bool {
+    if call_conv.extends_baldrdash() {
+        match r.get_class() {
+            RegClass::I64 => {
+                let enc = r.get_hw_encoding();
+                if BALDRDASH_JIT_CALLEE_SAVED_GPR[enc] {
+                    return true;
+                }
+                // Otherwise, fall through to preserve native ABI registers.
+            }
+            RegClass::V128 => {
+                let enc = r.get_hw_encoding();
+                if BALDRDASH_JIT_CALLEE_SAVED_FPU[enc] {
+                    return true;
+                }
+                // Otherwise, fall through to preserve native ABI registers.
+            }
+            _ => unimplemented!("baldrdash callee saved on non-i64 reg classes"),
+        };
+    }
+
+    match r.get_class() {
+        RegClass::I64 => {
+            // x19 - x28 inclusive are callee-saves.
+            r.get_hw_encoding() >= 19 && r.get_hw_encoding() <= 28
+        }
+        RegClass::V128 => {
+            // v8 - v15 inclusive are callee-saves.
+            r.get_hw_encoding() >= 8 && r.get_hw_encoding() <= 15
+        }
+        _ => panic!("Unexpected RegClass"),
+    }
+}
+
+fn get_callee_saves(
+    call_conv: isa::CallConv,
+    regs: Vec<Writable<RealReg>>,
+) -> (Vec<Writable<RealReg>>, Vec<Writable<RealReg>>) {
+    let mut int_saves = vec![];
+    let mut vec_saves = vec![];
+    for reg in regs.into_iter() {
+        if is_callee_save(call_conv, reg.to_reg()) {
+            match reg.to_reg().get_class() {
+                RegClass::I64 => int_saves.push(reg),
+                RegClass::V128 => vec_saves.push(reg),
+                _ => panic!("Unexpected RegClass"),
+            }
+        }
+    }
+    (int_saves, vec_saves)
+}
+
+fn is_caller_save(call_conv: isa::CallConv, r: RealReg) -> bool {
+    if call_conv.extends_baldrdash() {
+        match r.get_class() {
+            RegClass::I64 => {
+                let enc = r.get_hw_encoding();
+                if !BALDRDASH_JIT_CALLEE_SAVED_GPR[enc] {
+                    return true;
+                }
+                // Otherwise, fall through to preserve native's ABI caller-saved.
+            }
+            RegClass::V128 => {
+                let enc = r.get_hw_encoding();
+                if !BALDRDASH_JIT_CALLEE_SAVED_FPU[enc] {
+                    return true;
+                }
+                // Otherwise, fall through to preserve native's ABI caller-saved.
+            }
+            _ => unimplemented!("baldrdash callee saved on non-i64 reg classes"),
+        };
+    }
+
+    match r.get_class() {
+        RegClass::I64 => {
+            // x0 - x17 inclusive are caller-saves.
+            r.get_hw_encoding() <= 17
+        }
+        RegClass::V128 => {
+            // v0 - v7 inclusive and v16 - v31 inclusive are caller-saves.
+            r.get_hw_encoding() <= 7 || (r.get_hw_encoding() >= 16 && r.get_hw_encoding() <= 31)
+        }
+        _ => panic!("Unexpected RegClass"),
+    }
+}
+
+fn get_caller_saves_set(call_conv: isa::CallConv) -> Set<Writable<Reg>> {
+    let mut set = Set::empty();
+    for i in 0..29 {
+        let x = writable_xreg(i);
+        if is_caller_save(call_conv, x.to_reg().to_real_reg()) {
+            set.insert(x);
+        }
+    }
+    for i in 0..32 {
+        let v = writable_vreg(i);
+        if is_caller_save(call_conv, v.to_reg().to_real_reg()) {
+            set.insert(v);
+        }
+    }
+    set
+}
+
+impl ABIBody for AArch64ABIBody {
+    type I = Inst;
+
+    fn liveins(&self) -> Set<RealReg> {
+        let mut set: Set<RealReg> = Set::empty();
+        for &arg in &self.sig.args {
+            if let ABIArg::Reg(r, _) = arg {
+                set.insert(r);
+            }
+        }
+        set
+    }
+
+    fn liveouts(&self) -> Set<RealReg> {
+        let mut set: Set<RealReg> = Set::empty();
+        for &ret in &self.sig.rets {
+            if let ABIArg::Reg(r, _) = ret {
+                set.insert(r);
+            }
+        }
+        set
+    }
+
+    fn num_args(&self) -> usize {
+        self.sig.args.len()
+    }
+
+    fn num_retvals(&self) -> usize {
+        self.sig.rets.len()
+    }
+
+    fn num_stackslots(&self) -> usize {
+        self.stackslots.len()
+    }
+
+    fn gen_copy_arg_to_reg(&self, idx: usize, into_reg: Writable<Reg>) -> Inst {
+        match &self.sig.args[idx] {
+            &ABIArg::Reg(r, ty) => Inst::gen_move(into_reg, r.to_reg(), ty),
+            &ABIArg::Stack(off, ty) => load_stack(off + 16, into_reg, ty),
+        }
+    }
+
+    fn gen_copy_reg_to_retval(&self, idx: usize, from_reg: Reg) -> Inst {
+        match &self.sig.rets[idx] {
+            &ABIArg::Reg(r, ty) => Inst::gen_move(Writable::from_reg(r.to_reg()), from_reg, ty),
+            &ABIArg::Stack(off, ty) => store_stack(off + 16, from_reg, ty),
+        }
+    }
+
+    fn gen_ret(&self) -> Inst {
+        Inst::Ret {}
+    }
+
+    fn gen_epilogue_placeholder(&self) -> Inst {
+        Inst::EpiloguePlaceholder {}
+    }
+
+    fn set_num_spillslots(&mut self, slots: usize) {
+        self.spillslots = Some(slots);
+    }
+
+    fn set_clobbered(&mut self, clobbered: Set<Writable<RealReg>>) {
+        self.clobbered = clobbered;
+    }
+
+    fn load_stackslot(
+        &self,
+        slot: StackSlot,
+        offset: u32,
+        ty: Type,
+        into_reg: Writable<Reg>,
+    ) -> Inst {
+        // Offset from beginning of stackslot area, which is at FP - stackslots_size.
+        let stack_off = self.stackslots[slot.as_u32() as usize] as i64;
+        let fp_off: i64 = -(self.stackslots_size as i64) + stack_off + (offset as i64);
+        load_stack(fp_off, into_reg, ty)
+    }
+
+    fn store_stackslot(&self, slot: StackSlot, offset: u32, ty: Type, from_reg: Reg) -> Inst {
+        // Offset from beginning of stackslot area, which is at FP - stackslots_size.
+        let stack_off = self.stackslots[slot.as_u32() as usize] as i64;
+        let fp_off: i64 = -(self.stackslots_size as i64) + stack_off + (offset as i64);
+        store_stack(fp_off, from_reg, ty)
+    }
+
+    // Load from a spillslot.
+    fn load_spillslot(&self, slot: SpillSlot, ty: Type, into_reg: Writable<Reg>) -> Inst {
+        // Note that when spills/fills are generated, we don't yet know how many
+        // spillslots there will be, so we allocate *downward* from the beginning
+        // of the stackslot area. Hence: FP - stackslot_size - 8*spillslot -
+        // sizeof(ty).
+        let islot = slot.get() as i64;
+        let ty_size = self.get_spillslot_size(into_reg.to_reg().get_class(), ty) * 8;
+        let fp_off: i64 = -(self.stackslots_size as i64) - (8 * islot) - ty_size as i64;
+        load_stack(fp_off, into_reg, ty)
+    }
+
+    // Store to a spillslot.
+    fn store_spillslot(&self, slot: SpillSlot, ty: Type, from_reg: Reg) -> Inst {
+        let islot = slot.get() as i64;
+        let ty_size = self.get_spillslot_size(from_reg.get_class(), ty) * 8;
+        let fp_off: i64 = -(self.stackslots_size as i64) - (8 * islot) - ty_size as i64;
+        store_stack(fp_off, from_reg, ty)
+    }
+
+    fn gen_prologue(&mut self, flags: &settings::Flags) -> Vec<Inst> {
+        let mut insts = vec![];
+        if !self.call_conv.extends_baldrdash() {
+            // stp fp (x29), lr (x30), [sp, #-16]!
+            insts.push(Inst::StoreP64 {
+                rt: fp_reg(),
+                rt2: link_reg(),
+                mem: PairMemArg::PreIndexed(
+                    writable_stack_reg(),
+                    SImm7Scaled::maybe_from_i64(-16, types::I64).unwrap(),
+                ),
+            });
+            // mov fp (x29), sp. This uses the ADDI rd, rs, 0 form of `MOV` because
+            // the usual encoding (`ORR`) does not work with SP.
+            insts.push(Inst::AluRRImm12 {
+                alu_op: ALUOp::Add64,
+                rd: writable_fp_reg(),
+                rn: stack_reg(),
+                imm12: Imm12 {
+                    bits: 0,
+                    shift12: false,
+                },
+            });
+        }
+
+        let mut total_stacksize = self.stackslots_size + 8 * self.spillslots.unwrap() as u32;
+        if self.call_conv.extends_baldrdash() {
+            debug_assert!(
+                !flags.enable_probestack(),
+                "baldrdash does not expect cranelift to emit stack probes"
+            );
+            total_stacksize += flags.baldrdash_prologue_words() as u32 * 8;
+        }
+        let total_stacksize = (total_stacksize + 15) & !15; // 16-align the stack.
+
+        if !self.call_conv.extends_baldrdash() && total_stacksize > 0 {
+            // sub sp, sp, #total_stacksize
+            if let Some(imm12) = Imm12::maybe_from_u64(total_stacksize as u64) {
+                let sub_inst = Inst::AluRRImm12 {
+                    alu_op: ALUOp::Sub64,
+                    rd: writable_stack_reg(),
+                    rn: stack_reg(),
+                    imm12,
+                };
+                insts.push(sub_inst);
+            } else {
+                let tmp = writable_spilltmp_reg();
+                let const_inst = Inst::LoadConst64 {
+                    rd: tmp,
+                    const_data: total_stacksize as u64,
+                };
+                let sub_inst = Inst::AluRRRExtend {
+                    alu_op: ALUOp::Sub64,
+                    rd: writable_stack_reg(),
+                    rn: stack_reg(),
+                    rm: tmp.to_reg(),
+                    extendop: ExtendOp::UXTX,
+                };
+                insts.push(const_inst);
+                insts.push(sub_inst);
+            }
+        }
+
+        // Save clobbered registers.
+        let (clobbered_int, clobbered_vec) =
+            get_callee_saves(self.call_conv, self.clobbered.to_vec());
+        for reg_pair in clobbered_int.chunks(2) {
+            let (r1, r2) = if reg_pair.len() == 2 {
+                // .to_reg().to_reg(): Writable<RealReg> --> RealReg --> Reg
+                (reg_pair[0].to_reg().to_reg(), reg_pair[1].to_reg().to_reg())
+            } else {
+                (reg_pair[0].to_reg().to_reg(), zero_reg())
+            };
+
+            debug_assert!(r1.get_class() == RegClass::I64);
+            debug_assert!(r2.get_class() == RegClass::I64);
+
+            // stp r1, r2, [sp, #-16]!
+            insts.push(Inst::StoreP64 {
+                rt: r1,
+                rt2: r2,
+                mem: PairMemArg::PreIndexed(
+                    writable_stack_reg(),
+                    SImm7Scaled::maybe_from_i64(-16, types::I64).unwrap(),
+                ),
+            });
+        }
+        let vec_save_bytes = clobbered_vec.len() * 16;
+        if vec_save_bytes != 0 {
+            insts.push(Inst::AluRRImm12 {
+                alu_op: ALUOp::Sub64,
+                rd: writable_stack_reg(),
+                rn: stack_reg(),
+                imm12: Imm12::maybe_from_u64(vec_save_bytes as u64).unwrap(),
+            });
+        }
+        for (i, reg) in clobbered_vec.iter().enumerate() {
+            insts.push(Inst::FpuStore128 {
+                rd: reg.to_reg().to_reg(),
+                mem: MemArg::Unscaled(stack_reg(), SImm9::maybe_from_i64((i * 16) as i64).unwrap()),
+                srcloc: None,
+            });
+        }
+
+        self.frame_size = Some(total_stacksize);
+        insts
+    }
+
+    fn gen_epilogue(&self, _flags: &settings::Flags) -> Vec<Inst> {
+        let mut insts = vec![];
+
+        // Restore clobbered registers.
+        let (clobbered_int, clobbered_vec) =
+            get_callee_saves(self.call_conv, self.clobbered.to_vec());
+
+        for (i, reg) in clobbered_vec.iter().enumerate() {
+            insts.push(Inst::FpuLoad128 {
+                rd: Writable::from_reg(reg.to_reg().to_reg()),
+                mem: MemArg::Unscaled(stack_reg(), SImm9::maybe_from_i64((i * 16) as i64).unwrap()),
+                srcloc: None,
+            });
+        }
+        let vec_save_bytes = clobbered_vec.len() * 16;
+        if vec_save_bytes != 0 {
+            insts.push(Inst::AluRRImm12 {
+                alu_op: ALUOp::Add64,
+                rd: writable_stack_reg(),
+                rn: stack_reg(),
+                imm12: Imm12::maybe_from_u64(vec_save_bytes as u64).unwrap(),
+            });
+        }
+
+        for reg_pair in clobbered_int.chunks(2).rev() {
+            let (r1, r2) = if reg_pair.len() == 2 {
+                (
+                    reg_pair[0].map(|r| r.to_reg()),
+                    reg_pair[1].map(|r| r.to_reg()),
+                )
+            } else {
+                (reg_pair[0].map(|r| r.to_reg()), writable_zero_reg())
+            };
+
+            debug_assert!(r1.to_reg().get_class() == RegClass::I64);
+            debug_assert!(r2.to_reg().get_class() == RegClass::I64);
+
+            // ldp r1, r2, [sp], #16
+            insts.push(Inst::LoadP64 {
+                rt: r1,
+                rt2: r2,
+                mem: PairMemArg::PostIndexed(
+                    writable_stack_reg(),
+                    SImm7Scaled::maybe_from_i64(16, types::I64).unwrap(),
+                ),
+            });
+        }
+
+        if !self.call_conv.extends_baldrdash() {
+            // The MOV (alias of ORR) interprets x31 as XZR, so use an ADD here.
+            // MOV to SP is an alias of ADD.
+            insts.push(Inst::AluRRImm12 {
+                alu_op: ALUOp::Add64,
+                rd: writable_stack_reg(),
+                rn: fp_reg(),
+                imm12: Imm12 {
+                    bits: 0,
+                    shift12: false,
+                },
+            });
+            insts.push(Inst::LoadP64 {
+                rt: writable_fp_reg(),
+                rt2: writable_link_reg(),
+                mem: PairMemArg::PostIndexed(
+                    writable_stack_reg(),
+                    SImm7Scaled::maybe_from_i64(16, types::I64).unwrap(),
+                ),
+            });
+            insts.push(Inst::Ret {});
+        }
+
+        debug!("Epilogue: {:?}", insts);
+        insts
+    }
+
+    fn frame_size(&self) -> u32 {
+        self.frame_size
+            .expect("frame size not computed before prologue generation")
+    }
+
+    fn get_spillslot_size(&self, rc: RegClass, ty: Type) -> u32 {
+        // We allocate in terms of 8-byte slots.
+        match (rc, ty) {
+            (RegClass::I64, _) => 1,
+            (RegClass::V128, F32) | (RegClass::V128, F64) => 1,
+            (RegClass::V128, _) => 2,
+            _ => panic!("Unexpected register class!"),
+        }
+    }
+
+    fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, ty: Type) -> Inst {
+        self.store_spillslot(to_slot, ty, from_reg.to_reg())
+    }
+
+    fn gen_reload(&self, to_reg: Writable<RealReg>, from_slot: SpillSlot, ty: Type) -> Inst {
+        self.load_spillslot(from_slot, ty, to_reg.map(|r| r.to_reg()))
+    }
+}
+
+enum CallDest {
+    ExtName(ir::ExternalName),
+    Reg(Reg),
+}
+
+/// AArch64 ABI object for a function call.
+pub struct AArch64ABICall {
+    sig: ABISig,
+    uses: Set<Reg>,
+    defs: Set<Writable<Reg>>,
+    dest: CallDest,
+    loc: ir::SourceLoc,
+    opcode: ir::Opcode,
+}
+
+fn abisig_to_uses_and_defs(sig: &ABISig) -> (Set<Reg>, Set<Writable<Reg>>) {
+    // Compute uses: all arg regs.
+    let mut uses = Set::empty();
+    for arg in &sig.args {
+        match arg {
+            &ABIArg::Reg(reg, _) => uses.insert(reg.to_reg()),
+            _ => {}
+        }
+    }
+
+    // Compute defs: all retval regs, and all caller-save (clobbered) regs.
+    let mut defs = get_caller_saves_set(sig.call_conv);
+    for ret in &sig.rets {
+        match ret {
+            &ABIArg::Reg(reg, _) => defs.insert(Writable::from_reg(reg.to_reg())),
+            _ => {}
+        }
+    }
+
+    (uses, defs)
+}
+
+impl AArch64ABICall {
+    /// Create a callsite ABI object for a call directly to the specified function.
+    pub fn from_func(
+        sig: &ir::Signature,
+        extname: &ir::ExternalName,
+        loc: ir::SourceLoc,
+    ) -> AArch64ABICall {
+        let sig = ABISig::from_func_sig(sig);
+        let (uses, defs) = abisig_to_uses_and_defs(&sig);
+        AArch64ABICall {
+            sig,
+            uses,
+            defs,
+            dest: CallDest::ExtName(extname.clone()),
+            loc,
+            opcode: ir::Opcode::Call,
+        }
+    }
+
+    /// Create a callsite ABI object for a call to a function pointer with the
+    /// given signature.
+    pub fn from_ptr(
+        sig: &ir::Signature,
+        ptr: Reg,
+        loc: ir::SourceLoc,
+        opcode: ir::Opcode,
+    ) -> AArch64ABICall {
+        let sig = ABISig::from_func_sig(sig);
+        let (uses, defs) = abisig_to_uses_and_defs(&sig);
+        AArch64ABICall {
+            sig,
+            uses,
+            defs,
+            dest: CallDest::Reg(ptr),
+            loc,
+            opcode,
+        }
+    }
+}
+
+fn adjust_stack(amt: u64, is_sub: bool) -> Vec<Inst> {
+    if amt > 0 {
+        let alu_op = if is_sub { ALUOp::Sub64 } else { ALUOp::Add64 };
+        if let Some(imm12) = Imm12::maybe_from_u64(amt) {
+            vec![Inst::AluRRImm12 {
+                alu_op,
+                rd: writable_stack_reg(),
+                rn: stack_reg(),
+                imm12,
+            }]
+        } else {
+            let const_load = Inst::LoadConst64 {
+                rd: writable_spilltmp_reg(),
+                const_data: amt,
+            };
+            let adj = Inst::AluRRRExtend {
+                alu_op,
+                rd: writable_stack_reg(),
+                rn: stack_reg(),
+                rm: spilltmp_reg(),
+                extendop: ExtendOp::UXTX,
+            };
+            vec![const_load, adj]
+        }
+    } else {
+        vec![]
+    }
+}
+
+impl ABICall for AArch64ABICall {
+    type I = Inst;
+
+    fn num_args(&self) -> usize {
+        self.sig.args.len()
+    }
+
+    fn gen_stack_pre_adjust(&self) -> Vec<Inst> {
+        adjust_stack(self.sig.stack_arg_space as u64, /* is_sub = */ true)
+    }
+
+    fn gen_stack_post_adjust(&self) -> Vec<Inst> {
+        adjust_stack(self.sig.stack_arg_space as u64, /* is_sub = */ false)
+    }
+
+    fn gen_copy_reg_to_arg(&self, idx: usize, from_reg: Reg) -> Inst {
+        match &self.sig.args[idx] {
+            &ABIArg::Reg(reg, ty) => Inst::gen_move(Writable::from_reg(reg.to_reg()), from_reg, ty),
+            &ABIArg::Stack(off, _) => Inst::Store64 {
+                rd: from_reg,
+                mem: MemArg::SPOffset(off),
+                srcloc: None,
+            },
+        }
+    }
+
+    fn gen_copy_retval_to_reg(&self, idx: usize, into_reg: Writable<Reg>) -> Inst {
+        match &self.sig.rets[idx] {
+            &ABIArg::Reg(reg, ty) => Inst::gen_move(into_reg, reg.to_reg(), ty),
+            _ => unimplemented!(),
+        }
+    }
+
+    fn gen_call(&self) -> Vec<Inst> {
+        let (uses, defs) = (self.uses.clone(), self.defs.clone());
+        match &self.dest {
+            &CallDest::ExtName(ref name) => vec![Inst::Call {
+                dest: name.clone(),
+                uses,
+                defs,
+                loc: self.loc,
+                opcode: self.opcode,
+            }],
+            &CallDest::Reg(reg) => vec![Inst::CallInd {
+                rn: reg,
+                uses,
+                defs,
+                loc: self.loc,
+                opcode: self.opcode,
+            }],
+        }
+    }
+}
--- a/cranelift/codegen/src/isa/aarch64/inst/args.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/args.rs
@@ -0,0 +1,528 @@
+//! AArch64 ISA definitions: instruction arguments.
+
+// Some variants are never constructed, but we still want them as options in the future.
+#![allow(dead_code)]
+
+use crate::binemit::CodeOffset;
+use crate::ir::Type;
+use crate::isa::aarch64::inst::*;
+
+use regalloc::{RealRegUniverse, Reg, Writable};
+
+use core::convert::{Into, TryFrom};
+use std::string::String;
+
+/// A shift operator for a register or immediate.
+#[derive(Clone, Copy, Debug)]
+#[repr(u8)]
+pub enum ShiftOp {
+    LSL = 0b00,
+    LSR = 0b01,
+    ASR = 0b10,
+    ROR = 0b11,
+}
+
+impl ShiftOp {
+    /// Get the encoding of this shift op.
+    pub fn bits(self) -> u8 {
+        self as u8
+    }
+}
+
+/// A shift operator amount.
+#[derive(Clone, Copy, Debug)]
+pub struct ShiftOpShiftImm(u8);
+
+impl ShiftOpShiftImm {
+    /// Maximum shift for shifted-register operands.
+    pub const MAX_SHIFT: u64 = 63;
+
+    /// Create a new shiftop shift amount, if possible.
+    pub fn maybe_from_shift(shift: u64) -> Option<ShiftOpShiftImm> {
+        if shift <= Self::MAX_SHIFT {
+            Some(ShiftOpShiftImm(shift as u8))
+        } else {
+            None
+        }
+    }
+
+    /// Return the shift amount.
+    pub fn value(self) -> u8 {
+        self.0
+    }
+}
+
+/// A shift operator with an amount, guaranteed to be within range.
+#[derive(Clone, Debug)]
+pub struct ShiftOpAndAmt {
+    op: ShiftOp,
+    shift: ShiftOpShiftImm,
+}
+
+impl ShiftOpAndAmt {
+    pub fn new(op: ShiftOp, shift: ShiftOpShiftImm) -> ShiftOpAndAmt {
+        ShiftOpAndAmt { op, shift }
+    }
+
+    /// Get the shift op.
+    pub fn op(&self) -> ShiftOp {
+        self.op
+    }
+
+    /// Get the shift amount.
+    pub fn amt(&self) -> ShiftOpShiftImm {
+        self.shift
+    }
+}
+
+/// An extend operator for a register.
+#[derive(Clone, Copy, Debug)]
+#[repr(u8)]
+pub enum ExtendOp {
+    UXTB = 0b000,
+    UXTH = 0b001,
+    UXTW = 0b010,
+    UXTX = 0b011,
+    SXTB = 0b100,
+    SXTH = 0b101,
+    SXTW = 0b110,
+    SXTX = 0b111,
+}
+
+impl ExtendOp {
+    /// Encoding of this op.
+    pub fn bits(self) -> u8 {
+        self as u8
+    }
+}
+
+//=============================================================================
+// Instruction sub-components (memory addresses): definitions
+
+/// A reference to some memory address.
+#[derive(Clone, Debug)]
+pub enum MemLabel {
+    /// An address in the code, a constant pool or jumptable, with relative
+    /// offset from this instruction. This form must be used at emission time;
+    /// see `memlabel_finalize()` for how other forms are lowered to this one.
+    PCRel(i32),
+}
+
+/// A memory argument to load/store, encapsulating the possible addressing modes.
+#[derive(Clone, Debug)]
+pub enum MemArg {
+    Label(MemLabel),
+    /// "post-indexed" mode as per AArch64 docs: postincrement reg after address computation.
+    PostIndexed(Writable<Reg>, SImm9),
+    /// "pre-indexed" mode as per AArch64 docs: preincrement reg before address computation.
+    PreIndexed(Writable<Reg>, SImm9),
+
+    // N.B.: RegReg, RegScaled, and RegScaledExtended all correspond to
+    // what the ISA calls the "register offset" addressing mode. We split out
+    // several options here for more ergonomic codegen.
+    /// Register plus register offset.
+    RegReg(Reg, Reg),
+
+    /// Register plus register offset, scaled by type's size.
+    RegScaled(Reg, Reg, Type),
+
+    /// Register plus register offset, scaled by type's size, with index sign- or zero-extended
+    /// first.
+    RegScaledExtended(Reg, Reg, Type, ExtendOp),
+
+    /// Unscaled signed 9-bit immediate offset from reg.
+    Unscaled(Reg, SImm9),
+
+    /// Scaled (by size of a type) unsigned 12-bit immediate offset from reg.
+    UnsignedOffset(Reg, UImm12Scaled),
+
+    /// Offset from the stack pointer. Lowered into a real amode at emission.
+    SPOffset(i64),
+
+    /// Offset from the frame pointer. Lowered into a real amode at emission.
+    FPOffset(i64),
+}
+
+impl MemArg {
+    /// Memory reference using an address in a register.
+    pub fn reg(reg: Reg) -> MemArg {
+        // Use UnsignedOffset rather than Unscaled to use ldr rather than ldur.
+        // This also does not use PostIndexed / PreIndexed as they update the register.
+        MemArg::UnsignedOffset(reg, UImm12Scaled::zero(I64))
+    }
+
+    /// Memory reference using an address in a register and an offset, if possible.
+    pub fn reg_maybe_offset(reg: Reg, offset: i64, value_type: Type) -> Option<MemArg> {
+        if let Some(simm9) = SImm9::maybe_from_i64(offset) {
+            Some(MemArg::Unscaled(reg, simm9))
+        } else if let Some(uimm12s) = UImm12Scaled::maybe_from_i64(offset, value_type) {
+            Some(MemArg::UnsignedOffset(reg, uimm12s))
+        } else {
+            None
+        }
+    }
+
+    /// Memory reference using the sum of two registers as an address.
+    pub fn reg_plus_reg(reg1: Reg, reg2: Reg) -> MemArg {
+        MemArg::RegReg(reg1, reg2)
+    }
+
+    /// Memory reference using `reg1 + sizeof(ty) * reg2` as an address.
+    pub fn reg_plus_reg_scaled(reg1: Reg, reg2: Reg, ty: Type) -> MemArg {
+        MemArg::RegScaled(reg1, reg2, ty)
+    }
+
+    /// Memory reference using `reg1 + sizeof(ty) * reg2` as an address, with `reg2` sign- or
+    /// zero-extended as per `op`.
+    pub fn reg_plus_reg_scaled_extended(reg1: Reg, reg2: Reg, ty: Type, op: ExtendOp) -> MemArg {
+        MemArg::RegScaledExtended(reg1, reg2, ty, op)
+    }
+
+    /// Memory reference to a label: a global function or value, or data in the constant pool.
+    pub fn label(label: MemLabel) -> MemArg {
+        MemArg::Label(label)
+    }
+}
+
+/// A memory argument to a load/store-pair.
+#[derive(Clone, Debug)]
+pub enum PairMemArg {
+    SignedOffset(Reg, SImm7Scaled),
+    PreIndexed(Writable<Reg>, SImm7Scaled),
+    PostIndexed(Writable<Reg>, SImm7Scaled),
+}
+
+//=============================================================================
+// Instruction sub-components (conditions, branches and branch targets):
+// definitions
+
+/// Condition for conditional branches.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+#[repr(u8)]
+pub enum Cond {
+    Eq = 0,
+    Ne = 1,
+    Hs = 2,
+    Lo = 3,
+    Mi = 4,
+    Pl = 5,
+    Vs = 6,
+    Vc = 7,
+    Hi = 8,
+    Ls = 9,
+    Ge = 10,
+    Lt = 11,
+    Gt = 12,
+    Le = 13,
+    Al = 14,
+    Nv = 15,
+}
+
+impl Cond {
+    /// Return the inverted condition.
+    pub fn invert(self) -> Cond {
+        match self {
+            Cond::Eq => Cond::Ne,
+            Cond::Ne => Cond::Eq,
+
+            Cond::Hs => Cond::Lo,
+            Cond::Lo => Cond::Hs,
+
+            Cond::Mi => Cond::Pl,
+            Cond::Pl => Cond::Mi,
+
+            Cond::Vs => Cond::Vc,
+            Cond::Vc => Cond::Vs,
+
+            Cond::Hi => Cond::Ls,
+            Cond::Ls => Cond::Hi,
+
+            Cond::Ge => Cond::Lt,
+            Cond::Lt => Cond::Ge,
+
+            Cond::Gt => Cond::Le,
+            Cond::Le => Cond::Gt,
+
+            Cond::Al => Cond::Nv,
+            Cond::Nv => Cond::Al,
+        }
+    }
+
+    /// Return the machine encoding of this condition.
+    pub fn bits(self) -> u32 {
+        self as u32
+    }
+}
+
+/// The kind of conditional branch: the common-case-optimized "reg-is-zero" /
+/// "reg-is-nonzero" variants, or the generic one that tests the machine
+/// condition codes.
+#[derive(Clone, Copy, Debug)]
+pub enum CondBrKind {
+    /// Condition: given register is zero.
+    Zero(Reg),
+    /// Condition: given register is nonzero.
+    NotZero(Reg),
+    /// Condition: the given condition-code test is true.
+    Cond(Cond),
+}
+
+impl CondBrKind {
+    /// Return the inverted branch condition.
+    pub fn invert(self) -> CondBrKind {
+        match self {
+            CondBrKind::Zero(reg) => CondBrKind::NotZero(reg),
+            CondBrKind::NotZero(reg) => CondBrKind::Zero(reg),
+            CondBrKind::Cond(c) => CondBrKind::Cond(c.invert()),
+        }
+    }
+}
+
+/// A branch target. Either unresolved (basic-block index) or resolved (offset
+/// from end of current instruction).
+#[derive(Clone, Copy, Debug)]
+pub enum BranchTarget {
+    /// An unresolved reference to a BlockIndex, as passed into
+    /// `lower_branch_group()`.
+    Block(BlockIndex),
+    /// A resolved reference to another instruction, after
+    /// `Inst::with_block_offsets()`.
+    ResolvedOffset(isize),
+}
+
+impl BranchTarget {
+    /// Lower the branch target given offsets of each block.
+    pub fn lower(&mut self, targets: &[CodeOffset], my_offset: CodeOffset) {
+        match self {
+            &mut BranchTarget::Block(bix) => {
+                let bix = usize::try_from(bix).unwrap();
+                assert!(bix < targets.len());
+                let block_offset_in_func = targets[bix];
+                let branch_offset = (block_offset_in_func as isize) - (my_offset as isize);
+                *self = BranchTarget::ResolvedOffset(branch_offset);
+            }
+            &mut BranchTarget::ResolvedOffset(..) => {}
+        }
+    }
+
+    /// Get the block index.
+    pub fn as_block_index(&self) -> Option<BlockIndex> {
+        match self {
+            &BranchTarget::Block(bix) => Some(bix),
+            _ => None,
+        }
+    }
+
+    /// Get the offset as 4-byte words. Returns `0` if not
+    /// yet resolved (in that case, we're only computing
+    /// size and the offset doesn't matter).
+    pub fn as_offset_words(&self) -> isize {
+        match self {
+            &BranchTarget::ResolvedOffset(off) => off >> 2,
+            _ => 0,
+        }
+    }
+
+    /// Get the offset as a 26-bit offset suitable for a 26-bit jump, or `None` if overflow.
+    pub fn as_off26(&self) -> Option<u32> {
+        let off = self.as_offset_words();
+        if (off < (1 << 25)) && (off >= -(1 << 25)) {
+            Some((off as u32) & ((1 << 26) - 1))
+        } else {
+            None
+        }
+    }
+
+    /// Get the offset as a 19-bit offset, or `None` if overflow.
+    pub fn as_off19(&self) -> Option<u32> {
+        let off = self.as_offset_words();
+        if (off < (1 << 18)) && (off >= -(1 << 18)) {
+            Some((off as u32) & ((1 << 19) - 1))
+        } else {
+            None
+        }
+    }
+
+    /// Map the block index given a transform map.
+    pub fn map(&mut self, block_index_map: &[BlockIndex]) {
+        match self {
+            &mut BranchTarget::Block(ref mut bix) => {
+                let n = block_index_map[usize::try_from(*bix).unwrap()];
+                *bix = n;
+            }
+            &mut BranchTarget::ResolvedOffset(_) => {}
+        }
+    }
+}
+
+impl ShowWithRRU for ShiftOpAndAmt {
+    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+        format!("{:?} {}", self.op(), self.amt().value())
+    }
+}
+
+impl ShowWithRRU for ExtendOp {
+    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+        format!("{:?}", self)
+    }
+}
+
+impl ShowWithRRU for MemLabel {
+    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+        match self {
+            &MemLabel::PCRel(off) => format!("pc+{}", off),
+        }
+    }
+}
+
+fn shift_for_type(ty: Type) -> usize {
+    match ty.bytes() {
+        1 => 0,
+        2 => 1,
+        4 => 2,
+        8 => 3,
+        16 => 4,
+        _ => panic!("unknown type: {}", ty),
+    }
+}
+
+impl ShowWithRRU for MemArg {
+    fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
+        match self {
+            &MemArg::Unscaled(reg, simm9) => {
+                if simm9.value != 0 {
+                    format!("[{}, {}]", reg.show_rru(mb_rru), simm9.show_rru(mb_rru))
+                } else {
+                    format!("[{}]", reg.show_rru(mb_rru))
+                }
+            }
+            &MemArg::UnsignedOffset(reg, uimm12) => {
+                if uimm12.value != 0 {
+                    format!("[{}, {}]", reg.show_rru(mb_rru), uimm12.show_rru(mb_rru))
+                } else {
+                    format!("[{}]", reg.show_rru(mb_rru))
+                }
+            }
+            &MemArg::RegReg(r1, r2) => {
+                format!("[{}, {}]", r1.show_rru(mb_rru), r2.show_rru(mb_rru),)
+            }
+            &MemArg::RegScaled(r1, r2, ty) => {
+                let shift = shift_for_type(ty);
+                format!(
+                    "[{}, {}, LSL #{}]",
+                    r1.show_rru(mb_rru),
+                    r2.show_rru(mb_rru),
+                    shift,
+                )
+            }
+            &MemArg::RegScaledExtended(r1, r2, ty, op) => {
+                let shift = shift_for_type(ty);
+                let size = match op {
+                    ExtendOp::SXTW | ExtendOp::UXTW => InstSize::Size32,
+                    _ => InstSize::Size64,
+                };
+                let op = op.show_rru(mb_rru);
+                format!(
+                    "[{}, {}, {} #{}]",
+                    r1.show_rru(mb_rru),
+                    show_ireg_sized(r2, mb_rru, size),
+                    op,
+                    shift
+                )
+            }
+            &MemArg::Label(ref label) => label.show_rru(mb_rru),
+            &MemArg::PreIndexed(r, simm9) => format!(
+                "[{}, {}]!",
+                r.to_reg().show_rru(mb_rru),
+                simm9.show_rru(mb_rru)
+            ),
+            &MemArg::PostIndexed(r, simm9) => format!(
+                "[{}], {}",
+                r.to_reg().show_rru(mb_rru),
+                simm9.show_rru(mb_rru)
+            ),
+            // Eliminated by `mem_finalize()`.
+            &MemArg::SPOffset(..) | &MemArg::FPOffset(..) => {
+                panic!("Unexpected stack-offset mem-arg mode!")
+            }
+        }
+    }
+}
+
+impl ShowWithRRU for PairMemArg {
+    fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
+        match self {
+            &PairMemArg::SignedOffset(reg, simm7) => {
+                if simm7.value != 0 {
+                    format!("[{}, {}]", reg.show_rru(mb_rru), simm7.show_rru(mb_rru))
+                } else {
+                    format!("[{}]", reg.show_rru(mb_rru))
+                }
+            }
+            &PairMemArg::PreIndexed(reg, simm7) => format!(
+                "[{}, {}]!",
+                reg.to_reg().show_rru(mb_rru),
+                simm7.show_rru(mb_rru)
+            ),
+            &PairMemArg::PostIndexed(reg, simm7) => format!(
+                "[{}], {}",
+                reg.to_reg().show_rru(mb_rru),
+                simm7.show_rru(mb_rru)
+            ),
+        }
+    }
+}
+
+impl ShowWithRRU for Cond {
+    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+        let mut s = format!("{:?}", self);
+        s.make_ascii_lowercase();
+        s
+    }
+}
+
+impl ShowWithRRU for BranchTarget {
+    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+        match self {
+            &BranchTarget::Block(block) => format!("block{}", block),
+            &BranchTarget::ResolvedOffset(off) => format!("{}", off),
+        }
+    }
+}
+
+/// Type used to communicate the operand size of a machine instruction, as AArch64 has 32- and
+/// 64-bit variants of many instructions (and integer registers).
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum InstSize {
+    Size32,
+    Size64,
+}
+
+impl InstSize {
+    /// 32-bit case?
+    pub fn is32(self) -> bool {
+        self == InstSize::Size32
+    }
+    /// 64-bit case?
+    pub fn is64(self) -> bool {
+        self == InstSize::Size64
+    }
+    /// Convert from an `is32` boolean flag to an `InstSize`.
+    pub fn from_is32(is32: bool) -> InstSize {
+        if is32 {
+            InstSize::Size32
+        } else {
+            InstSize::Size64
+        }
+    }
+    /// Convert from a needed width to the smallest size that fits.
+    pub fn from_bits<I: Into<usize>>(bits: I) -> InstSize {
+        let bits: usize = bits.into();
+        assert!(bits <= 64);
+        if bits <= 32 {
+            InstSize::Size32
+        } else {
+            InstSize::Size64
+        }
+    }
+}
--- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs
--- a/cranelift/codegen/src/isa/aarch64/inst/imms.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/imms.rs
@@ -0,0 +1,752 @@
+//! AArch64 ISA definitions: immediate constants.
+
+// Some variants are never constructed, but we still want them as options in the future.
+#[allow(dead_code)]
+use crate::ir::types::*;
+use crate::ir::Type;
+use crate::machinst::*;
+
+use regalloc::RealRegUniverse;
+
+use core::convert::TryFrom;
+use std::string::String;
+
+/// A signed, scaled 7-bit offset.
+#[derive(Clone, Copy, Debug)]
+pub struct SImm7Scaled {
+    /// The value.
+    pub value: i16,
+    /// multiplied by the size of this type
+    pub scale_ty: Type,
+}
+
+impl SImm7Scaled {
+    /// Create a SImm7Scaled from a raw offset and the known scale type, if
+    /// possible.
+    pub fn maybe_from_i64(value: i64, scale_ty: Type) -> Option<SImm7Scaled> {
+        assert!(scale_ty == I64 || scale_ty == I32);
+        let scale = scale_ty.bytes();
+        assert!(scale.is_power_of_two());
+        let scale = i64::from(scale);
+        let upper_limit = 63 * scale;
+        let lower_limit = -(64 * scale);
+        if value >= lower_limit && value <= upper_limit && (value & (scale - 1)) == 0 {
+            Some(SImm7Scaled {
+                value: i16::try_from(value).unwrap(),
+                scale_ty,
+            })
+        } else {
+            None
+        }
+    }
+
+    /// Create a zero immediate of this format.
+    pub fn zero(scale_ty: Type) -> SImm7Scaled {
+        SImm7Scaled { value: 0, scale_ty }
+    }
+
+    /// Bits for encoding.
+    pub fn bits(&self) -> u32 {
+        let ty_bytes: i16 = self.scale_ty.bytes() as i16;
+        let scaled: i16 = self.value / ty_bytes;
+        assert!(scaled <= 63 && scaled >= -64);
+        let scaled: i8 = scaled as i8;
+        let encoded: u32 = scaled as u32;
+        encoded & 0x7f
+    }
+}
+
+/// a 9-bit signed offset.
+#[derive(Clone, Copy, Debug)]
+pub struct SImm9 {
+    /// The value.
+    pub value: i16,
+}
+
+impl SImm9 {
+    /// Create a signed 9-bit offset from a full-range value, if possible.
+    pub fn maybe_from_i64(value: i64) -> Option<SImm9> {
+        if value >= -256 && value <= 255 {
+            Some(SImm9 {
+                value: value as i16,
+            })
+        } else {
+            None
+        }
+    }
+
+    /// Create a zero immediate of this format.
+    pub fn zero() -> SImm9 {
+        SImm9 { value: 0 }
+    }
+
+    /// Bits for encoding.
+    pub fn bits(&self) -> u32 {
+        (self.value as u32) & 0x1ff
+    }
+}
+
+/// An unsigned, scaled 12-bit offset.
+#[derive(Clone, Copy, Debug)]
+pub struct UImm12Scaled {
+    /// The value.
+    pub value: u16,
+    /// multiplied by the size of this type
+    pub scale_ty: Type,
+}
+
+impl UImm12Scaled {
+    /// Create a UImm12Scaled from a raw offset and the known scale type, if
+    /// possible.
+    pub fn maybe_from_i64(value: i64, scale_ty: Type) -> Option<UImm12Scaled> {
+        let scale = scale_ty.bytes();
+        assert!(scale.is_power_of_two());
+        let scale = scale as i64;
+        let limit = 4095 * scale;
+        if value >= 0 && value <= limit && (value & (scale - 1)) == 0 {
+            Some(UImm12Scaled {
+                value: value as u16,
+                scale_ty,
+            })
+        } else {
+            None
+        }
+    }
+
+    /// Create a zero immediate of this format.
+    pub fn zero(scale_ty: Type) -> UImm12Scaled {
+        UImm12Scaled { value: 0, scale_ty }
+    }
+
+    /// Encoded bits.
+    pub fn bits(&self) -> u32 {
+        (self.value as u32 / self.scale_ty.bytes()) & 0xfff
+    }
+}
+
+/// A shifted immediate value in 'imm12' format: supports 12 bits, shifted
+/// left by 0 or 12 places.
+#[derive(Clone, Debug)]
+pub struct Imm12 {
+    /// The immediate bits.
+    pub bits: u16,
+    /// Whether the immediate bits are shifted left by 12 or not.
+    pub shift12: bool,
+}
+
+impl Imm12 {
+    /// Compute a Imm12 from raw bits, if possible.
+    pub fn maybe_from_u64(val: u64) -> Option<Imm12> {
+        if val == 0 {
+            Some(Imm12 {
+                bits: 0,
+                shift12: false,
+            })
+        } else if val < 0xfff {
+            Some(Imm12 {
+                bits: val as u16,
+                shift12: false,
+            })
+        } else if val < 0xfff_000 && (val & 0xfff == 0) {
+            Some(Imm12 {
+                bits: (val >> 12) as u16,
+                shift12: true,
+            })
+        } else {
+            None
+        }
+    }
+
+    /// Bits for 2-bit "shift" field in e.g. AddI.
+    pub fn shift_bits(&self) -> u32 {
+        if self.shift12 {
+            0b01
+        } else {
+            0b00
+        }
+    }
+
+    /// Bits for 12-bit "imm" field in e.g. AddI.
+    pub fn imm_bits(&self) -> u32 {
+        self.bits as u32
+    }
+}
+
+/// An immediate for logical instructions.
+#[derive(Clone, Debug)]
+#[cfg_attr(test, derive(PartialEq))]
+pub struct ImmLogic {
+    /// The actual value.
+    value: u64,
+    /// `N` flag.
+    pub n: bool,
+    /// `S` field: element size and element bits.
+    pub r: u8,
+    /// `R` field: rotate amount.
+    pub s: u8,
+}
+
+impl ImmLogic {
+    /// Compute an ImmLogic from raw bits, if possible.
+    pub fn maybe_from_u64(value: u64, ty: Type) -> Option<ImmLogic> {
+        // Note: This function is a port of VIXL's Assembler::IsImmLogical.
+
+        if ty != I64 && ty != I32 {
+            return None;
+        }
+
+        let original_value = value;
+
+        let value = if ty == I32 {
+            // To handle 32-bit logical immediates, the very easiest thing is to repeat
+            // the input value twice to make a 64-bit word. The correct encoding of that
+            // as a logical immediate will also be the correct encoding of the 32-bit
+            // value.
+
+            // Avoid making the assumption that the most-significant 32 bits are zero by
+            // shifting the value left and duplicating it.
+            let value = value << 32;
+            value | value >> 32
+        } else {
+            value
+        };
+
+        // Logical immediates are encoded using parameters n, imm_s and imm_r using
+        // the following table:
+        //
+        //    N   imms    immr    size        S             R
+        //    1  ssssss  rrrrrr    64    UInt(ssssss)  UInt(rrrrrr)
+        //    0  0sssss  xrrrrr    32    UInt(sssss)   UInt(rrrrr)
+        //    0  10ssss  xxrrrr    16    UInt(ssss)    UInt(rrrr)
+        //    0  110sss  xxxrrr     8    UInt(sss)     UInt(rrr)
+        //    0  1110ss  xxxxrr     4    UInt(ss)      UInt(rr)
+        //    0  11110s  xxxxxr     2    UInt(s)       UInt(r)
+        // (s bits must not be all set)
+        //
+        // A pattern is constructed of size bits, where the least significant S+1 bits
+        // are set. The pattern is rotated right by R, and repeated across a 32 or
+        // 64-bit value, depending on destination register width.
+        //
+        // Put another way: the basic format of a logical immediate is a single
+        // contiguous stretch of 1 bits, repeated across the whole word at intervals
+        // given by a power of 2. To identify them quickly, we first locate the
+        // lowest stretch of 1 bits, then the next 1 bit above that; that combination
+        // is different for every logical immediate, so it gives us all the
+        // information we need to identify the only logical immediate that our input
+        // could be, and then we simply check if that's the value we actually have.
+        //
+        // (The rotation parameter does give the possibility of the stretch of 1 bits
+        // going 'round the end' of the word. To deal with that, we observe that in
+        // any situation where that happens the bitwise NOT of the value is also a
+        // valid logical immediate. So we simply invert the input whenever its low bit
+        // is set, and then we know that the rotated case can't arise.)
+        let (value, inverted) = if value & 1 == 1 {
+            (!value, true)
+        } else {
+            (value, false)
+        };
+
+        if value == 0 {
+            return None;
+        }
+
+        // The basic analysis idea: imagine our input word looks like this.
+        //
+        //    0011111000111110001111100011111000111110001111100011111000111110
+        //                                                          c  b    a
+        //                                                          |<--d-->|
+        //
+        // We find the lowest set bit (as an actual power-of-2 value, not its index)
+        // and call it a. Then we add a to our original number, which wipes out the
+        // bottommost stretch of set bits and replaces it with a 1 carried into the
+        // next zero bit. Then we look for the new lowest set bit, which is in
+        // position b, and subtract it, so now our number is just like the original
+        // but with the lowest stretch of set bits completely gone. Now we find the
+        // lowest set bit again, which is position c in the diagram above. Then we'll
+        // measure the distance d between bit positions a and c (using CLZ), and that
+        // tells us that the only valid logical immediate that could possibly be equal
+        // to this number is the one in which a stretch of bits running from a to just
+        // below b is replicated every d bits.
+        fn lowest_set_bit(value: u64) -> u64 {
+            let bit = value.trailing_zeros();
+            1u64.checked_shl(bit).unwrap_or(0)
+        }
+        let a = lowest_set_bit(value);
+        assert_ne!(0, a);
+        let value_plus_a = value.wrapping_add(a);
+        let b = lowest_set_bit(value_plus_a);
+        let value_plus_a_minus_b = value_plus_a - b;
+        let c = lowest_set_bit(value_plus_a_minus_b);
+
+        let (d, clz_a, out_n, mask) = if c != 0 {
+            // The general case, in which there is more than one stretch of set bits.
+            // Compute the repeat distance d, and set up a bitmask covering the basic
+            // unit of repetition (i.e. a word with the bottom d bits set). Also, in all
+            // of these cases the N bit of the output will be zero.
+            let clz_a = a.leading_zeros();
+            let clz_c = c.leading_zeros();
+            let d = clz_a - clz_c;
+            let mask = (1 << d) - 1;
+            (d, clz_a, 0, mask)
+        } else {
+            (64, a.leading_zeros(), 1, u64::max_value())
+        };
+
+        // If the repeat period d is not a power of two, it can't be encoded.
+        if !d.is_power_of_two() {
+            return None;
+        }
+
+        if ((b.wrapping_sub(a)) & !mask) != 0 {
+            // If the bit stretch (b - a) does not fit within the mask derived from the
+            // repeat period, then fail.
+            return None;
+        }
+
+        // The only possible option is b - a repeated every d bits. Now we're going to
+        // actually construct the valid logical immediate derived from that
+        // specification, and see if it equals our original input.
+        //
+        // To repeat a value every d bits, we multiply it by a number of the form
+        // (1 + 2^d + 2^(2d) + ...), i.e. 0x0001000100010001 or similar. These can
+        // be derived using a table lookup on CLZ(d).
+        const MULTIPLIERS: [u64; 6] = [
+            0x0000000000000001,
+            0x0000000100000001,
+            0x0001000100010001,
+            0x0101010101010101,
+            0x1111111111111111,
+            0x5555555555555555,
+        ];
+        let multiplier = MULTIPLIERS[(u64::from(d).leading_zeros() - 57) as usize];
+        let candidate = b.wrapping_sub(a) * multiplier;
+
+        if value != candidate {
+            // The candidate pattern doesn't match our input value, so fail.
+            return None;
+        }
+
+        // We have a match! This is a valid logical immediate, so now we have to
+        // construct the bits and pieces of the instruction encoding that generates
+        // it.
+
+        // Count the set bits in our basic stretch. The special case of clz(0) == -1
+        // makes the answer come out right for stretches that reach the very top of
+        // the word (e.g. numbers like 0xffffc00000000000).
+        let clz_b = if b == 0 {
+            u32::max_value() // -1
+        } else {
+            b.leading_zeros()
+        };
+        let s = clz_a.wrapping_sub(clz_b);
+
+        // Decide how many bits to rotate right by, to put the low bit of that basic
+        // stretch in position a.
+        let (s, r) = if inverted {
+            // If we inverted the input right at the start of this function, here's
+            // where we compensate: the number of set bits becomes the number of clear
+            // bits, and the rotation count is based on position b rather than position
+            // a (since b is the location of the 'lowest' 1 bit after inversion).
+            // Need wrapping for when clz_b is max_value() (for when b == 0).
+            (d - s, clz_b.wrapping_add(1) & (d - 1))
+        } else {
+            (s, (clz_a + 1) & (d - 1))
+        };
+
+        // Now we're done, except for having to encode the S output in such a way that
+        // it gives both the number of set bits and the length of the repeated
+        // segment. The s field is encoded like this:
+        //
+        //     imms    size        S
+        //    ssssss    64    UInt(ssssss)
+        //    0sssss    32    UInt(sssss)
+        //    10ssss    16    UInt(ssss)
+        //    110sss     8    UInt(sss)
+        //    1110ss     4    UInt(ss)
+        //    11110s     2    UInt(s)
+        //
+        // So we 'or' (2 * -d) with our computed s to form imms.
+        let s = ((d * 2).wrapping_neg() | (s - 1)) & 0x3f;
+        debug_assert!(u8::try_from(r).is_ok());
+        debug_assert!(u8::try_from(s).is_ok());
+        Some(ImmLogic {
+            value: original_value,
+            n: out_n != 0,
+            r: r as u8,
+            s: s as u8,
+        })
+    }
+
+    pub fn from_raw(value: u64, n: bool, r: u8, s: u8) -> ImmLogic {
+        ImmLogic { n, r, s, value }
+    }
+
+    /// Returns bits ready for encoding: (N:1, R:6, S:6)
+    pub fn enc_bits(&self) -> u32 {
+        ((self.n as u32) << 12) | ((self.r as u32) << 6) | (self.s as u32)
+    }
+
+    /// Returns the value that this immediate represents.
+    pub fn value(&self) -> u64 {
+        self.value
+    }
+
+    /// Return an immediate for the bitwise-inverted value.
+    pub fn invert(&self) -> ImmLogic {
+        // For every ImmLogical immediate, the inverse can also be encoded.
+        Self::maybe_from_u64(!self.value, I64).unwrap()
+    }
+}
+
+/// An immediate for shift instructions.
+#[derive(Clone, Debug)]
+pub struct ImmShift {
+    /// 6-bit shift amount.
+    pub imm: u8,
+}
+
+impl ImmShift {
+    /// Create an ImmShift from raw bits, if possible.
+    pub fn maybe_from_u64(val: u64) -> Option<ImmShift> {
+        if val < 64 {
+            Some(ImmShift { imm: val as u8 })
+        } else {
+            None
+        }
+    }
+
+    /// Get the immediate value.
+    pub fn value(&self) -> u8 {
+        self.imm
+    }
+}
+
+/// A 16-bit immediate for a MOVZ instruction, with a {0,16,32,48}-bit shift.
+#[derive(Clone, Copy, Debug)]
+pub struct MoveWideConst {
+    /// The value.
+    pub bits: u16,
+    /// Result is `bits` shifted 16*shift bits to the left.
+    pub shift: u8,
+}
+
+impl MoveWideConst {
+    /// Construct a MoveWideConst from an arbitrary 64-bit constant if possible.
+    pub fn maybe_from_u64(value: u64) -> Option<MoveWideConst> {
+        let mask0 = 0x0000_0000_0000_ffffu64;
+        let mask1 = 0x0000_0000_ffff_0000u64;
+        let mask2 = 0x0000_ffff_0000_0000u64;
+        let mask3 = 0xffff_0000_0000_0000u64;
+
+        if value == (value & mask0) {
+            return Some(MoveWideConst {
+                bits: (value & mask0) as u16,
+                shift: 0,
+            });
+        }
+        if value == (value & mask1) {
+            return Some(MoveWideConst {
+                bits: ((value >> 16) & mask0) as u16,
+                shift: 1,
+            });
+        }
+        if value == (value & mask2) {
+            return Some(MoveWideConst {
+                bits: ((value >> 32) & mask0) as u16,
+                shift: 2,
+            });
+        }
+        if value == (value & mask3) {
+            return Some(MoveWideConst {
+                bits: ((value >> 48) & mask0) as u16,
+                shift: 3,
+            });
+        }
+        None
+    }
+
+    pub fn maybe_with_shift(imm: u16, shift: u8) -> Option<MoveWideConst> {
+        let shift_enc = shift / 16;
+        if shift_enc > 3 {
+            None
+        } else {
+            Some(MoveWideConst {
+                bits: imm,
+                shift: shift_enc,
+            })
+        }
+    }
+
+    /// Returns the value that this constant represents.
+    pub fn value(&self) -> u64 {
+        (self.bits as u64) << (16 * self.shift)
+    }
+}
+
+impl ShowWithRRU for Imm12 {
+    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+        let shift = if self.shift12 { 12 } else { 0 };
+        let value = u32::from(self.bits) << shift;
+        format!("#{}", value)
+    }
+}
+
+impl ShowWithRRU for SImm7Scaled {
+    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+        format!("#{}", self.value)
+    }
+}
+
+impl ShowWithRRU for SImm9 {
+    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+        format!("#{}", self.value)
+    }
+}
+
+impl ShowWithRRU for UImm12Scaled {
+    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+        format!("#{}", self.value)
+    }
+}
+
+impl ShowWithRRU for ImmLogic {
+    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+        format!("#{}", self.value())
+    }
+}
+
+impl ShowWithRRU for ImmShift {
+    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+        format!("#{}", self.imm)
+    }
+}
+
+impl ShowWithRRU for MoveWideConst {
+    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+        if self.shift == 0 {
+            format!("#{}", self.bits)
+        } else {
+            format!("#{}, LSL #{}", self.bits, self.shift * 16)
+        }
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+
+    #[test]
+    fn imm_logical_test() {
+        assert_eq!(None, ImmLogic::maybe_from_u64(0, I64));
+        assert_eq!(None, ImmLogic::maybe_from_u64(u64::max_value(), I64));
+
+        assert_eq!(
+            Some(ImmLogic {
+                value: 1,
+                n: true,
+                r: 0,
+                s: 0
+            }),
+            ImmLogic::maybe_from_u64(1, I64)
+        );
+
+        assert_eq!(
+            Some(ImmLogic {
+                value: 2,
+                n: true,
+                r: 63,
+                s: 0
+            }),
+            ImmLogic::maybe_from_u64(2, I64)
+        );
+
+        assert_eq!(None, ImmLogic::maybe_from_u64(5, I64));
+
+        assert_eq!(None, ImmLogic::maybe_from_u64(11, I64));
+
+        assert_eq!(
+            Some(ImmLogic {
+                value: 248,
+                n: true,
+                r: 61,
+                s: 4
+            }),
+            ImmLogic::maybe_from_u64(248, I64)
+        );
+
+        assert_eq!(None, ImmLogic::maybe_from_u64(249, I64));
+
+        assert_eq!(
+            Some(ImmLogic {
+                value: 1920,
+                n: true,
+                r: 57,
+                s: 3
+            }),
+            ImmLogic::maybe_from_u64(1920, I64)
+        );
+
+        assert_eq!(
+            Some(ImmLogic {
+                value: 0x7ffe,
+                n: true,
+                r: 63,
+                s: 13
+            }),
+            ImmLogic::maybe_from_u64(0x7ffe, I64)
+        );
+
+        assert_eq!(
+            Some(ImmLogic {
+                value: 0x30000,
+                n: true,
+                r: 48,
+                s: 1
+            }),
+            ImmLogic::maybe_from_u64(0x30000, I64)
+        );
+
+        assert_eq!(
+            Some(ImmLogic {
+                value: 0x100000,
+                n: true,
+                r: 44,
+                s: 0
+            }),
+            ImmLogic::maybe_from_u64(0x100000, I64)
+        );
+
+        assert_eq!(
+            Some(ImmLogic {
+                value: u64::max_value() - 1,
+                n: true,
+                r: 63,
+                s: 62
+            }),
+            ImmLogic::maybe_from_u64(u64::max_value() - 1, I64)
+        );
+
+        assert_eq!(
+            Some(ImmLogic {
+                value: 0xaaaaaaaaaaaaaaaa,
+                n: false,
+                r: 1,
+                s: 60
+            }),
+            ImmLogic::maybe_from_u64(0xaaaaaaaaaaaaaaaa, I64)
+        );
+
+        assert_eq!(
+            Some(ImmLogic {
+                value: 0x8181818181818181,
+                n: false,
+                r: 1,
+                s: 49
+            }),
+            ImmLogic::maybe_from_u64(0x8181818181818181, I64)
+        );
+
+        assert_eq!(
+            Some(ImmLogic {
+                value: 0xffc3ffc3ffc3ffc3,
+                n: false,
+                r: 10,
+                s: 43
+            }),
+            ImmLogic::maybe_from_u64(0xffc3ffc3ffc3ffc3, I64)
+        );
+
+        assert_eq!(
+            Some(ImmLogic {
+                value: 0x100000001,
+                n: false,
+                r: 0,
+                s: 0
+            }),
+            ImmLogic::maybe_from_u64(0x100000001, I64)
+        );
+
+        assert_eq!(
+            Some(ImmLogic {
+                value: 0x1111111111111111,
+                n: false,
+                r: 0,
+                s: 56
+            }),
+            ImmLogic::maybe_from_u64(0x1111111111111111, I64)
+        );
+
+        for n in 0..2 {
+            let types = if n == 0 { vec![I64, I32] } else { vec![I64] };
+            for s in 0..64 {
+                for r in 0..64 {
+                    let imm = get_logical_imm(n, s, r);
+                    for &ty in &types {
+                        match ImmLogic::maybe_from_u64(imm, ty) {
+                            Some(ImmLogic { value, .. }) => {
+                                assert_eq!(imm, value);
+                                ImmLogic::maybe_from_u64(!value, ty).unwrap();
+                            }
+                            None => assert_eq!(0, imm),
+                        };
+                    }
+                }
+            }
+        }
+    }
+
+    // Repeat a value that has `width` bits, across a 64-bit value.
+    fn repeat(value: u64, width: u64) -> u64 {
+        let mut result = value & ((1 << width) - 1);
+        let mut i = width;
+        while i < 64 {
+            result |= result << i;
+            i *= 2;
+        }
+        result
+    }
+
+    // Get the logical immediate, from the encoding N/R/S bits.
+    fn get_logical_imm(n: u32, s: u32, r: u32) -> u64 {
+        // An integer is constructed from the n, imm_s and imm_r bits according to
+        // the following table:
+        //
+        //  N   imms    immr    size        S             R
+        //  1  ssssss  rrrrrr    64    UInt(ssssss)  UInt(rrrrrr)
+        //  0  0sssss  xrrrrr    32    UInt(sssss)   UInt(rrrrr)
+        //  0  10ssss  xxrrrr    16    UInt(ssss)    UInt(rrrr)
+        //  0  110sss  xxxrrr     8    UInt(sss)     UInt(rrr)
+        //  0  1110ss  xxxxrr     4    UInt(ss)      UInt(rr)
+        //  0  11110s  xxxxxr     2    UInt(s)       UInt(r)
+        // (s bits must not be all set)
+        //
+        // A pattern is constructed of size bits, where the least significant S+1
+        // bits are set. The pattern is rotated right by R, and repeated across a
+        // 64-bit value.
+
+        if n == 1 {
+            if s == 0x3f {
+                return 0;
+            }
+            let bits = (1u64 << (s + 1)) - 1;
+            bits.rotate_right(r)
+        } else {
+            if (s >> 1) == 0x1f {
+                return 0;
+            }
+            let mut width = 0x20;
+            while width >= 0x2 {
+                if (s & width) == 0 {
+                    let mask = width - 1;
+                    if (s & mask) == mask {
+                        return 0;
+                    }
+                    let bits = (1u64 << ((s & mask) + 1)) - 1;
+                    return repeat(bits.rotate_right(r & mask), width.into());
+                }
+                width >>= 1;
+            }
+            unreachable!();
+        }
+    }
+}
--- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs
--- a/cranelift/codegen/src/isa/aarch64/inst/regs.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/regs.rs
@@ -0,0 +1,270 @@
+//! AArch64 ISA definitions: registers.
+
+use crate::isa::aarch64::inst::InstSize;
+use crate::machinst::*;
+
+use regalloc::{RealRegUniverse, Reg, RegClass, RegClassInfo, Writable, NUM_REG_CLASSES};
+
+use std::string::{String, ToString};
+
+//=============================================================================
+// Registers, the Universe thereof, and printing
+
+#[rustfmt::skip]
+const XREG_INDICES: [u8; 31] = [
+    // X0 - X7
+    32, 33, 34, 35, 36, 37, 38, 39,
+    // X8 - X14
+    40, 41, 42, 43, 44, 45, 46,
+    // X15
+    59,
+    // X16, X17
+    47, 48,
+    // X18
+    60,
+    // X19 - X28
+    49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
+    // X29
+    61,
+    // X30
+    62,
+];
+
+const ZERO_REG_INDEX: u8 = 63;
+
+const SP_REG_INDEX: u8 = 64;
+
+/// Get a reference to an X-register (integer register).
+pub fn xreg(num: u8) -> Reg {
+    assert!(num < 31);
+    Reg::new_real(
+        RegClass::I64,
+        /* enc = */ num,
+        /* index = */ XREG_INDICES[num as usize],
+    )
+}
+
+/// Get a writable reference to an X-register.
+pub fn writable_xreg(num: u8) -> Writable<Reg> {
+    Writable::from_reg(xreg(num))
+}
+
+/// Get a reference to a V-register (vector/FP register).
+pub fn vreg(num: u8) -> Reg {
+    assert!(num < 32);
+    Reg::new_real(RegClass::V128, /* enc = */ num, /* index = */ num)
+}
+
+/// Get a writable reference to a V-register.
+pub fn writable_vreg(num: u8) -> Writable<Reg> {
+    Writable::from_reg(vreg(num))
+}
+
+/// Get a reference to the zero-register.
+pub fn zero_reg() -> Reg {
+    // This should be the same as what xreg(31) returns, except that
+    // we use the special index into the register index space.
+    Reg::new_real(
+        RegClass::I64,
+        /* enc = */ 31,
+        /* index = */ ZERO_REG_INDEX,
+    )
+}
+
+/// Get a writable reference to the zero-register (this discards a result).
+pub fn writable_zero_reg() -> Writable<Reg> {
+    Writable::from_reg(zero_reg())
+}
+
+/// Get a reference to the stack-pointer register.
+pub fn stack_reg() -> Reg {
+    // XSP (stack) and XZR (zero) are logically different registers which have
+    // the same hardware encoding, and whose meaning, in real aarch64
+    // instructions, is context-dependent.  For convenience of
+    // universe-construction and for correct printing, we make them be two
+    // different real registers.
+    Reg::new_real(
+        RegClass::I64,
+        /* enc = */ 31,
+        /* index = */ SP_REG_INDEX,
+    )
+}
+
+/// Get a writable reference to the stack-pointer register.
+pub fn writable_stack_reg() -> Writable<Reg> {
+    Writable::from_reg(stack_reg())
+}
+
+/// Get a reference to the link register (x30).
+pub fn link_reg() -> Reg {
+    xreg(30)
+}
+
+/// Get a writable reference to the link register.
+pub fn writable_link_reg() -> Writable<Reg> {
+    Writable::from_reg(link_reg())
+}
+
+/// Get a reference to the frame pointer (x29).
+pub fn fp_reg() -> Reg {
+    xreg(29)
+}
+
+/// Get a writable reference to the frame pointer.
+pub fn writable_fp_reg() -> Writable<Reg> {
+    Writable::from_reg(fp_reg())
+}
+
+/// Get a reference to the "spill temp" register. This register is used to
+/// compute the address of a spill slot when a direct offset addressing mode from
+/// FP is not sufficient (+/- 2^11 words). We exclude this register from regalloc
+/// and reserve it for this purpose for simplicity; otherwise we need a
+/// multi-stage analysis where we first determine how many spill slots we have,
+/// then perhaps remove the reg from the pool and recompute regalloc.
+pub fn spilltmp_reg() -> Reg {
+    xreg(15)
+}
+
+/// Get a writable reference to the spilltmp reg.
+pub fn writable_spilltmp_reg() -> Writable<Reg> {
+    Writable::from_reg(spilltmp_reg())
+}
+
+/// Create the register universe for AArch64.
+pub fn create_reg_universe() -> RealRegUniverse {
+    let mut regs = vec![];
+    let mut allocable_by_class = [None; NUM_REG_CLASSES];
+
+    // Numbering Scheme: we put V-regs first, then X-regs. The X-regs
+    // exclude several registers: x18 (globally reserved for platform-specific
+    // purposes), x29 (frame pointer), x30 (link register), x31 (stack pointer
+    // or zero register, depending on context).
+
+    let v_reg_base = 0u8; // in contiguous real-register index space
+    let v_reg_count = 32;
+    for i in 0u8..v_reg_count {
+        let reg = Reg::new_real(
+            RegClass::V128,
+            /* enc = */ i,
+            /* index = */ v_reg_base + i,
+        )
+        .to_real_reg();
+        let name = format!("v{}", i);
+        regs.push((reg, name));
+    }
+    let v_reg_last = v_reg_base + v_reg_count - 1;
+
+    // Add the X registers. N.B.: the order here must match the order implied
+    // by XREG_INDICES, ZERO_REG_INDEX, and SP_REG_INDEX above.
+
+    let x_reg_base = 32u8; // in contiguous real-register index space
+    let mut x_reg_count = 0;
+    for i in 0u8..32u8 {
+        // See above for excluded registers.
+        if i == 15 || i == 18 || i == 29 || i == 30 || i == 31 {
+            continue;
+        }
+        let reg = Reg::new_real(
+            RegClass::I64,
+            /* enc = */ i,
+            /* index = */ x_reg_base + x_reg_count,
+        )
+        .to_real_reg();
+        let name = format!("x{}", i);
+        regs.push((reg, name));
+        x_reg_count += 1;
+    }
+    let x_reg_last = x_reg_base + x_reg_count - 1;
+
+    allocable_by_class[RegClass::I64.rc_to_usize()] = Some(RegClassInfo {
+        first: x_reg_base as usize,
+        last: x_reg_last as usize,
+        suggested_scratch: Some(XREG_INDICES[13] as usize),
+    });
+    allocable_by_class[RegClass::V128.rc_to_usize()] = Some(RegClassInfo {
+        first: v_reg_base as usize,
+        last: v_reg_last as usize,
+        suggested_scratch: Some(/* V31: */ 31),
+    });
+
+    // Other regs, not available to the allocator.
+    let allocable = regs.len();
+    regs.push((xreg(15).to_real_reg(), "x15".to_string()));
+    regs.push((xreg(18).to_real_reg(), "x18".to_string()));
+    regs.push((fp_reg().to_real_reg(), "fp".to_string()));
+    regs.push((link_reg().to_real_reg(), "lr".to_string()));
+    regs.push((zero_reg().to_real_reg(), "xzr".to_string()));
+    regs.push((stack_reg().to_real_reg(), "sp".to_string()));
+    // FIXME JRS 2020Feb06: unfortunately this pushes the number of real regs
+    // to 65, which is potentially inconvenient from a compiler performance
+    // standpoint.  We could possibly drop back to 64 by "losing" a vector
+    // register in future.
+
+    // Assert sanity: the indices in the register structs must match their
+    // actual indices in the array.
+    for (i, reg) in regs.iter().enumerate() {
+        assert_eq!(i, reg.0.get_index());
+    }
+
+    RealRegUniverse {
+        regs,
+        allocable,
+        allocable_by_class,
+    }
+}
+
+/// If `ireg` denotes an I64-classed reg, make a best-effort attempt to show
+/// its name at the 32-bit size.
+pub fn show_ireg_sized(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: InstSize) -> String {
+    let mut s = reg.show_rru(mb_rru);
+    if reg.get_class() != RegClass::I64 || !size.is32() {
+        // We can't do any better.
+        return s;
+    }
+
+    if reg.is_real() {
+        // Change (eg) "x42" into "w42" as appropriate
+        if reg.get_class() == RegClass::I64 && size.is32() && s.starts_with("x") {
+            s = "w".to_string() + &s[1..];
+        }
+    } else {
+        // Add a "w" suffix to RegClass::I64 vregs used in a 32-bit role
+        if reg.get_class() == RegClass::I64 && size.is32() {
+            s.push('w');
+        }
+    }
+    s
+}
+
+/// Show a vector register when its use as a 32-bit or 64-bit float is known.
+pub fn show_freg_sized(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: InstSize) -> String {
+    let mut s = reg.show_rru(mb_rru);
+    if reg.get_class() != RegClass::V128 {
+        return s;
+    }
+    let prefix = if size.is32() { "s" } else { "d" };
+    s.replace_range(0..1, prefix);
+    s
+}
+
+/// Show a vector register used in a scalar context.
+pub fn show_vreg_scalar(reg: Reg, mb_rru: Option<&RealRegUniverse>) -> String {
+    let mut s = reg.show_rru(mb_rru);
+    if reg.get_class() != RegClass::V128 {
+        // We can't do any better.
+        return s;
+    }
+
+    if reg.is_real() {
+        // Change (eg) "v0" into "d0".
+        if reg.get_class() == RegClass::V128 && s.starts_with("v") {
+            s.replace_range(0..1, "d");
+        }
+    } else {
+        // Add a "d" suffix to RegClass::V128 vregs.
+        if reg.get_class() == RegClass::V128 {
+            s.push('d');
+        }
+    }
+    s
+}
--- a/cranelift/codegen/src/isa/aarch64/lower.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower.rs
--- a/cranelift/codegen/src/isa/aarch64/mod.rs
+++ b/cranelift/codegen/src/isa/aarch64/mod.rs
@@ -0,0 +1,220 @@
+//! ARM 64-bit Instruction Set Architecture.
+
+use crate::ir::Function;
+use crate::isa::Builder as IsaBuilder;
+use crate::machinst::{
+    compile, MachBackend, MachCompileResult, ShowWithRRU, TargetIsaAdapter, VCode,
+};
+use crate::result::CodegenResult;
+use crate::settings;
+
+use alloc::boxed::Box;
+
+use regalloc::RealRegUniverse;
+use target_lexicon::{Aarch64Architecture, Architecture, Triple};
+
+// New backend:
+mod abi;
+mod inst;
+mod lower;
+
+use inst::create_reg_universe;
+
+/// An AArch64 backend.
+pub struct AArch64Backend {
+    triple: Triple,
+    flags: settings::Flags,
+}
+
+impl AArch64Backend {
+    /// Create a new AArch64 backend with the given (shared) flags.
+    pub fn new_with_flags(triple: Triple, flags: settings::Flags) -> AArch64Backend {
+        AArch64Backend { triple, flags }
+    }
+
+    fn compile_vcode(&self, func: &Function, flags: &settings::Flags) -> VCode<inst::Inst> {
+        // This performs lowering to VCode, register-allocates the code, computes
+        // block layout and finalizes branches. The result is ready for binary emission.
+        let abi = Box::new(abi::AArch64ABIBody::new(func));
+        compile::compile::<AArch64Backend>(func, self, abi, flags)
+    }
+}
+
+impl MachBackend for AArch64Backend {
+    fn compile_function(
+        &self,
+        func: &Function,
+        want_disasm: bool,
+    ) -> CodegenResult<MachCompileResult> {
+        let flags = self.flags();
+        let vcode = self.compile_vcode(func, flags);
+        let sections = vcode.emit();
+        let frame_size = vcode.frame_size();
+
+        let disasm = if want_disasm {
+            Some(vcode.show_rru(Some(&create_reg_universe())))
+        } else {
+            None
+        };
+
+        Ok(MachCompileResult {
+            sections,
+            frame_size,
+            disasm,
+        })
+    }
+
+    fn name(&self) -> &'static str {
+        "aarch64"
+    }
+
+    fn triple(&self) -> Triple {
+        self.triple.clone()
+    }
+
+    fn flags(&self) -> &settings::Flags {
+        &self.flags
+    }
+
+    fn reg_universe(&self) -> RealRegUniverse {
+        create_reg_universe()
+    }
+}
+
+/// Create a new `isa::Builder`.
+pub fn isa_builder(triple: Triple) -> IsaBuilder {
+    assert!(triple.architecture == Architecture::Aarch64(Aarch64Architecture::Aarch64));
+    IsaBuilder {
+        triple,
+        setup: settings::builder(),
+        constructor: |triple, shared_flags, _| {
+            let backend = AArch64Backend::new_with_flags(triple, shared_flags);
+            Box::new(TargetIsaAdapter::new(backend))
+        },
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use crate::cursor::{Cursor, FuncCursor};
+    use crate::ir::types::*;
+    use crate::ir::{AbiParam, ExternalName, Function, InstBuilder, Signature};
+    use crate::isa::CallConv;
+    use crate::settings;
+    use crate::settings::Configurable;
+    use core::str::FromStr;
+    use target_lexicon::Triple;
+
+    #[test]
+    fn test_compile_function() {
+        let name = ExternalName::testcase("test0");
+        let mut sig = Signature::new(CallConv::SystemV);
+        sig.params.push(AbiParam::new(I32));
+        sig.returns.push(AbiParam::new(I32));
+        let mut func = Function::with_name_signature(name, sig);
+
+        let bb0 = func.dfg.make_block();
+        let arg0 = func.dfg.append_block_param(bb0, I32);
+
+        let mut pos = FuncCursor::new(&mut func);
+        pos.insert_block(bb0);
+        let v0 = pos.ins().iconst(I32, 0x1234);
+        let v1 = pos.ins().iadd(arg0, v0);
+        pos.ins().return_(&[v1]);
+
+        let mut shared_flags = settings::builder();
+        shared_flags.set("opt_level", "none").unwrap();
+        let backend = AArch64Backend::new_with_flags(
+            Triple::from_str("aarch64").unwrap(),
+            settings::Flags::new(shared_flags),
+        );
+        let sections = backend.compile_function(&mut func, false).unwrap().sections;
+        let code = &sections.sections[0].data;
+
+        // stp x29, x30, [sp, #-16]!
+        // mov x29, sp
+        // mov x1, #0x1234
+        // add w0, w0, w1
+        // mov sp, x29
+        // ldp x29, x30, [sp], #16
+        // ret
+        let golden = vec![
+            0xfd, 0x7b, 0xbf, 0xa9, 0xfd, 0x03, 0x00, 0x91, 0x81, 0x46, 0x82, 0xd2, 0x00, 0x00,
+            0x01, 0x0b, 0xbf, 0x03, 0x00, 0x91, 0xfd, 0x7b, 0xc1, 0xa8, 0xc0, 0x03, 0x5f, 0xd6,
+        ];
+
+        assert_eq!(code, &golden);
+    }
+
+    #[test]
+    fn test_branch_lowering() {
+        let name = ExternalName::testcase("test0");
+        let mut sig = Signature::new(CallConv::SystemV);
+        sig.params.push(AbiParam::new(I32));
+        sig.returns.push(AbiParam::new(I32));
+        let mut func = Function::with_name_signature(name, sig);
+
+        let bb0 = func.dfg.make_block();
+        let arg0 = func.dfg.append_block_param(bb0, I32);
+        let bb1 = func.dfg.make_block();
+        let bb2 = func.dfg.make_block();
+        let bb3 = func.dfg.make_block();
+
+        let mut pos = FuncCursor::new(&mut func);
+        pos.insert_block(bb0);
+        let v0 = pos.ins().iconst(I32, 0x1234);
+        let v1 = pos.ins().iadd(arg0, v0);
+        pos.ins().brnz(v1, bb1, &[]);
+        pos.ins().jump(bb2, &[]);
+        pos.insert_block(bb1);
+        pos.ins().brnz(v1, bb2, &[]);
+        pos.ins().jump(bb3, &[]);
+        pos.insert_block(bb2);
+        let v2 = pos.ins().iadd(v1, v0);
+        pos.ins().brnz(v2, bb2, &[]);
+        pos.ins().jump(bb1, &[]);
+        pos.insert_block(bb3);
+        let v3 = pos.ins().isub(v1, v0);
+        pos.ins().return_(&[v3]);
+
+        let mut shared_flags = settings::builder();
+        shared_flags.set("opt_level", "none").unwrap();
+        let backend = AArch64Backend::new_with_flags(
+            Triple::from_str("aarch64").unwrap(),
+            settings::Flags::new(shared_flags),
+        );
+        let result = backend
+            .compile_function(&mut func, /* want_disasm = */ false)
+            .unwrap();
+        let code = &result.sections.sections[0].data;
+
+        // stp	x29, x30, [sp, #-16]!
+        // mov	x29, sp
+        // mov	x1, x0
+        // mov  x0, #0x1234
+        // add	w1, w1, w0
+        // mov	w2, w1
+        // cbz	x2, ...
+        // mov	w2, w1
+        // cbz	x2, ...
+        // sub	w0, w1, w0
+        // mov	sp, x29
+        // ldp	x29, x30, [sp], #16
+        // ret
+        // add	w2, w1, w0
+        // mov	w2, w2
+        // cbnz	x2, ... <---- compound branch (cond / uncond)
+        // b ...        <----
+
+        let golden = vec![
+            0xfd, 0x7b, 0xbf, 0xa9, 0xfd, 0x03, 0x00, 0x91, 0xe1, 0x03, 0x00, 0xaa, 0x80, 0x46,
+            0x82, 0xd2, 0x21, 0x00, 0x00, 0x0b, 0xe2, 0x03, 0x01, 0x2a, 0xe2, 0x00, 0x00, 0xb4,
+            0xe2, 0x03, 0x01, 0x2a, 0xa2, 0x00, 0x00, 0xb5, 0x20, 0x00, 0x00, 0x4b, 0xbf, 0x03,
+            0x00, 0x91, 0xfd, 0x7b, 0xc1, 0xa8, 0xc0, 0x03, 0x5f, 0xd6, 0x22, 0x00, 0x00, 0x0b,
+            0xe2, 0x03, 0x02, 0x2a, 0xc2, 0xff, 0xff, 0xb5, 0xf7, 0xff, 0xff, 0x17,
+        ];
+
+        assert_eq!(code, &golden);
+    }
+}
--- a/cranelift/codegen/src/isa/arm64/abi.rs
+++ b/cranelift/codegen/src/isa/arm64/abi.rs
@@ -1,31 +0,0 @@
-//! ARM 64 ABI implementation.
-
-use super::registers::{FPR, GPR};
-use crate::ir;
-use crate::isa::RegClass;
-use crate::regalloc::RegisterSet;
-use crate::settings as shared_settings;
-use alloc::borrow::Cow;
-
-/// Legalize `sig`.
-pub fn legalize_signature(
-    _sig: &mut Cow<ir::Signature>,
-    _flags: &shared_settings::Flags,
-    _current: bool,
-) {
-    unimplemented!()
-}
-
-/// Get register class for a type appearing in a legalized signature.
-pub fn regclass_for_abi_type(ty: ir::Type) -> RegClass {
-    if ty.is_int() {
-        GPR
-    } else {
-        FPR
-    }
-}
-
-/// Get the set of allocatable registers for `func`.
-pub fn allocatable_registers(_func: &ir::Function) -> RegisterSet {
-    unimplemented!()
-}
--- a/cranelift/codegen/src/isa/arm64/binemit.rs
+++ b/cranelift/codegen/src/isa/arm64/binemit.rs
@@ -1,8 +0,0 @@
-//! Emitting binary ARM64 machine code.
-
-use crate::binemit::{bad_encoding, CodeSink};
-use crate::ir::{Function, Inst};
-use crate::isa::TargetIsa;
-use crate::regalloc::RegDiversions;
-
-include!(concat!(env!("OUT_DIR"), "/binemit-arm64.rs"));
--- a/cranelift/codegen/src/isa/arm64/enc_tables.rs
+++ b/cranelift/codegen/src/isa/arm64/enc_tables.rs
@@ -1,10 +0,0 @@
-//! Encoding tables for ARM64 ISA.
-
-use crate::ir;
-use crate::isa;
-use crate::isa::constraints::*;
-use crate::isa::enc_tables::*;
-use crate::isa::encoding::RecipeSizing;
-
-include!(concat!(env!("OUT_DIR"), "/encoding-arm64.rs"));
-include!(concat!(env!("OUT_DIR"), "/legalize-arm64.rs"));
--- a/cranelift/codegen/src/isa/arm64/mod.rs
+++ b/cranelift/codegen/src/isa/arm64/mod.rs
@@ -1,132 +0,0 @@
-//! ARM 64-bit Instruction Set Architecture.
-
-mod abi;
-mod binemit;
-mod enc_tables;
-mod registers;
-pub mod settings;
-
-use super::super::settings as shared_settings;
-#[cfg(feature = "testing_hooks")]
-use crate::binemit::CodeSink;
-use crate::binemit::{emit_function, MemoryCodeSink};
-use crate::ir;
-use crate::isa::enc_tables::{lookup_enclist, Encodings};
-use crate::isa::Builder as IsaBuilder;
-use crate::isa::{EncInfo, RegClass, RegInfo, TargetIsa};
-use crate::regalloc;
-use alloc::borrow::Cow;
-use alloc::boxed::Box;
-use core::fmt;
-use target_lexicon::Triple;
-
-#[allow(dead_code)]
-struct Isa {
-    triple: Triple,
-    shared_flags: shared_settings::Flags,
-    isa_flags: settings::Flags,
-}
-
-/// Get an ISA builder for creating ARM64 targets.
-pub fn isa_builder(triple: Triple) -> IsaBuilder {
-    IsaBuilder {
-        triple,
-        setup: settings::builder(),
-        constructor: isa_constructor,
-    }
-}
-
-fn isa_constructor(
-    triple: Triple,
-    shared_flags: shared_settings::Flags,
-    builder: shared_settings::Builder,
-) -> Box<dyn TargetIsa> {
-    Box::new(Isa {
-        triple,
-        isa_flags: settings::Flags::new(&shared_flags, builder),
-        shared_flags,
-    })
-}
-
-impl TargetIsa for Isa {
-    fn name(&self) -> &'static str {
-        "arm64"
-    }
-
-    fn triple(&self) -> &Triple {
-        &self.triple
-    }
-
-    fn flags(&self) -> &shared_settings::Flags {
-        &self.shared_flags
-    }
-
-    fn register_info(&self) -> RegInfo {
-        registers::INFO.clone()
-    }
-
-    fn encoding_info(&self) -> EncInfo {
-        enc_tables::INFO.clone()
-    }
-
-    fn legal_encodings<'a>(
-        &'a self,
-        func: &'a ir::Function,
-        inst: &'a ir::InstructionData,
-        ctrl_typevar: ir::Type,
-    ) -> Encodings<'a> {
-        lookup_enclist(
-            ctrl_typevar,
-            inst,
-            func,
-            &enc_tables::LEVEL1_A64[..],
-            &enc_tables::LEVEL2[..],
-            &enc_tables::ENCLISTS[..],
-            &enc_tables::LEGALIZE_ACTIONS[..],
-            &enc_tables::RECIPE_PREDICATES[..],
-            &enc_tables::INST_PREDICATES[..],
-            self.isa_flags.predicate_view(),
-        )
-    }
-
-    fn legalize_signature(&self, sig: &mut Cow<ir::Signature>, current: bool) {
-        abi::legalize_signature(sig, &self.shared_flags, current)
-    }
-
-    fn regclass_for_abi_type(&self, ty: ir::Type) -> RegClass {
-        abi::regclass_for_abi_type(ty)
-    }
-
-    fn allocatable_registers(&self, func: &ir::Function) -> regalloc::RegisterSet {
-        abi::allocatable_registers(func)
-    }
-
-    #[cfg(feature = "testing_hooks")]
-    fn emit_inst(
-        &self,
-        func: &ir::Function,
-        inst: ir::Inst,
-        divert: &mut regalloc::RegDiversions,
-        sink: &mut dyn CodeSink,
-    ) {
-        binemit::emit_inst(func, inst, divert, sink, self)
-    }
-
-    fn emit_function_to_memory(&self, func: &ir::Function, sink: &mut MemoryCodeSink) {
-        emit_function(func, binemit::emit_inst, sink, self)
-    }
-
-    fn unsigned_add_overflow_condition(&self) -> ir::condcodes::IntCC {
-        ir::condcodes::IntCC::UnsignedLessThan
-    }
-
-    fn unsigned_sub_overflow_condition(&self) -> ir::condcodes::IntCC {
-        ir::condcodes::IntCC::UnsignedGreaterThanOrEqual
-    }
-}
-
-impl fmt::Display for Isa {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "{}\n{}", self.shared_flags, self.isa_flags)
-    }
-}
--- a/cranelift/codegen/src/isa/arm64/registers.rs
+++ b/cranelift/codegen/src/isa/arm64/registers.rs
@@ -1,39 +0,0 @@
-//! ARM64 register descriptions.
-
-use crate::isa::registers::{RegBank, RegClass, RegClassData, RegInfo, RegUnit};
-
-include!(concat!(env!("OUT_DIR"), "/registers-arm64.rs"));
-
-#[cfg(test)]
-mod tests {
-    use super::INFO;
-    use crate::isa::RegUnit;
-    use alloc::string::{String, ToString};
-
-    #[test]
-    fn unit_encodings() {
-        assert_eq!(INFO.parse_regunit("x0"), Some(0));
-        assert_eq!(INFO.parse_regunit("x31"), Some(31));
-        assert_eq!(INFO.parse_regunit("v0"), Some(32));
-        assert_eq!(INFO.parse_regunit("v31"), Some(63));
-
-        assert_eq!(INFO.parse_regunit("x32"), None);
-        assert_eq!(INFO.parse_regunit("v32"), None);
-    }
-
-    #[test]
-    fn unit_names() {
-        fn uname(ru: RegUnit) -> String {
-            INFO.display_regunit(ru).to_string()
-        }
-
-        assert_eq!(uname(0), "%x0");
-        assert_eq!(uname(1), "%x1");
-        assert_eq!(uname(31), "%x31");
-        assert_eq!(uname(32), "%v0");
-        assert_eq!(uname(33), "%v1");
-        assert_eq!(uname(63), "%v31");
-        assert_eq!(uname(64), "%nzcv");
-        assert_eq!(uname(65), "%INVALID65");
-    }
-}
--- a/cranelift/codegen/src/isa/arm64/settings.rs
+++ b/cranelift/codegen/src/isa/arm64/settings.rs
@@ -1,9 +0,0 @@
-//! ARM64 Settings.
-
-use crate::settings::{self, detail, Builder};
-use core::fmt;
-
-// Include code generated by `cranelift-codegen/meta/src/gen_settings.rs`. This file contains a
-// public `Flags` struct with an impl for all of the settings defined in
-// `cranelift-codegen/meta/src/isa/arm64/mod.rs`.
-include!(concat!(env!("OUT_DIR"), "/settings-arm64.rs"));
--- a/cranelift/codegen/src/isa/mod.rs
+++ b/cranelift/codegen/src/isa/mod.rs
@@ -48,6 +48,7 @@ pub use crate::isa::call_conv::CallConv;
 pub use crate::isa::constraints::{
    BranchRange, ConstraintKind, OperandConstraint, RecipeConstraints,
 };
+pub use crate::isa::enc_tables::Encodings;
 pub use crate::isa::encoding::{base_size, EncInfo, Encoding};
 pub use crate::isa::registers::{regs_overlap, RegClass, RegClassIndex, RegInfo, RegUnit};
 pub use crate::isa::stack::{StackBase, StackBaseMask, StackRef};
@@ -55,9 +56,9 @@ pub use crate::isa::stack::{StackBase, StackBaseMask, StackRef};
 use crate::binemit;
 use crate::flowgraph;
 use crate::ir;
-use crate::isa::enc_tables::Encodings;
-#[cfg(feature = "unwind")]
 use crate::isa::fde::RegisterMappingError;
+#[cfg(feature = "unwind")]
+use crate::machinst::MachBackend;
 use crate::regalloc;
 use crate::result::CodegenResult;
 use crate::settings;
@@ -83,7 +84,7 @@ pub mod fde;
 mod arm32;

 #[cfg(feature = "arm64")]
-mod arm64;
+mod aarch64;

 mod call_conv;
 mod constraints;
@@ -92,6 +93,9 @@ mod encoding;
 pub mod registers;
 mod stack;

+#[cfg(test)]
+mod test_utils;
+
 /// Returns a builder that can create a corresponding `TargetIsa`
 /// or `Err(LookupError::SupportDisabled)` if not enabled.
 macro_rules! isa_builder {
@@ -116,7 +120,7 @@ pub fn lookup(triple: Triple) -> Result<Builder, LookupError> {
            isa_builder!(x86, "x86", triple)
        }
        Architecture::Arm { .. } => isa_builder!(arm32, "arm32", triple),
-        Architecture::Aarch64 { .. } => isa_builder!(arm64, "arm64", triple),
+        Architecture::Aarch64 { .. } => isa_builder!(aarch64, "arm64", triple),
        _ => Err(LookupError::Unsupported),
    }
 }
@@ -402,6 +406,11 @@ pub trait TargetIsa: fmt::Display + Send + Sync {
        // No-op by default
        Ok(())
    }
+
+    /// Get the new-style MachBackend, if this is an adapter around one.
+    fn get_mach_backend(&self) -> Option<&dyn MachBackend> {
+        None
+    }
 }

 impl Debug for &dyn TargetIsa {
--- a/cranelift/codegen/src/isa/test_utils.rs
+++ b/cranelift/codegen/src/isa/test_utils.rs
@@ -0,0 +1,88 @@
+// This is unused when no platforms with the new backend are enabled.
+#![allow(dead_code)]
+
+use crate::binemit::{Addend, CodeOffset, CodeSink, Reloc};
+use crate::ir::Value;
+use crate::ir::{ConstantOffset, ExternalName, Function, JumpTable, Opcode, SourceLoc, TrapCode};
+use crate::isa::TargetIsa;
+
+use alloc::vec::Vec;
+use std::string::String;
+
+pub struct TestCodeSink {
+    bytes: Vec<u8>,
+}
+
+impl TestCodeSink {
+    /// Create a new TestCodeSink.
+    pub fn new() -> TestCodeSink {
+        TestCodeSink { bytes: vec![] }
+    }
+
+    /// Return the code emitted to this sink as a hex string.
+    pub fn stringify(&self) -> String {
+        // This is pretty lame, but whatever ..
+        use std::fmt::Write;
+        let mut s = String::with_capacity(self.bytes.len() * 2);
+        for b in &self.bytes {
+            write!(&mut s, "{:02X}", b).unwrap();
+        }
+        s
+    }
+}
+
+impl CodeSink for TestCodeSink {
+    fn offset(&self) -> CodeOffset {
+        self.bytes.len() as CodeOffset
+    }
+
+    fn put1(&mut self, x: u8) {
+        self.bytes.push(x);
+    }
+
+    fn put2(&mut self, x: u16) {
+        self.bytes.push((x >> 0) as u8);
+        self.bytes.push((x >> 8) as u8);
+    }
+
+    fn put4(&mut self, mut x: u32) {
+        for _ in 0..4 {
+            self.bytes.push(x as u8);
+            x >>= 8;
+        }
+    }
+
+    fn put8(&mut self, mut x: u64) {
+        for _ in 0..8 {
+            self.bytes.push(x as u8);
+            x >>= 8;
+        }
+    }
+
+    fn reloc_block(&mut self, _rel: Reloc, _block_offset: CodeOffset) {}
+
+    fn reloc_external(
+        &mut self,
+        _srcloc: SourceLoc,
+        _rel: Reloc,
+        _name: &ExternalName,
+        _addend: Addend,
+    ) {
+    }
+
+    fn reloc_constant(&mut self, _rel: Reloc, _constant_offset: ConstantOffset) {}
+
+    fn reloc_jt(&mut self, _rel: Reloc, _jt: JumpTable) {}
+
+    fn trap(&mut self, _code: TrapCode, _srcloc: SourceLoc) {}
+
+    fn begin_jumptables(&mut self) {}
+
+    fn begin_rodata(&mut self) {}
+
+    fn end_codegen(&mut self) {}
+
+    fn add_stackmap(&mut self, _val_list: &[Value], _func: &Function, _isa: &dyn TargetIsa) {}
+
+    fn add_call_site(&mut self, _opcode: Opcode, _srcloc: SourceLoc) {}
+}