Add a work-in-progress backend for x86_64 using the new instruction selection;

Most of the work is credited to Julian Seward. Co-authored-by: Julian Seward <jseward@acm.org> Co-authored-by: Chris Fallin <cfallin@mozilla.com>
2020-04-27 16:19:08 +02:00
parent 6bee767129
commit fa54422854
12 changed files with 5690 additions and 6 deletions
--- a/cranelift/codegen/Cargo.toml
+++ b/cranelift/codegen/Cargo.toml
@@ -58,10 +58,12 @@ x86 = []
 arm32 = []
 arm64 = []
 riscv = []
+x64 = [] # New work-in-progress codegen backend for x86_64 based on the new isel.

 # Option to enable all architectures.
 all-arch = [
    "x86",
+    "x64",
    "arm32",
    "arm64",
    "riscv"
--- a/cranelift/codegen/meta/src/isa/x86/settings.rs
+++ b/cranelift/codegen/meta/src/isa/x86/settings.rs
@@ -3,6 +3,12 @@ use crate::cdsl::settings::{PredicateNode, SettingGroup, SettingGroupBuilder};
 pub(crate) fn define(shared: &SettingGroup) -> SettingGroup {
    let mut settings = SettingGroupBuilder::new("x86");

+    settings.add_bool(
+        "use_new_backend",
+        "Whether to use the new codegen backend using the new isel",
+        false,
+    );
+
    // CPUID.01H:ECX
    let has_sse3 = settings.add_bool("has_sse3", "SSE3: CPUID.01H:ECX.SSE3[bit 0]", false);
    let has_ssse3 = settings.add_bool("has_ssse3", "SSSE3: CPUID.01H:ECX.SSSE3[bit 9]", false);
--- a/cranelift/codegen/src/isa/mod.rs
+++ b/cranelift/codegen/src/isa/mod.rs
@@ -77,6 +77,9 @@ mod riscv;
 #[cfg(feature = "x86")]
 mod x86;

+#[cfg(feature = "x64")]
+mod x64;
+
 #[cfg(feature = "arm32")]
 mod arm32;

--- a/cranelift/codegen/src/isa/x64/abi.rs
+++ b/cranelift/codegen/src/isa/x64/abi.rs
@@ -0,0 +1,457 @@
+//! Implementation of the standard x64 ABI.
+
+use alloc::vec::Vec;
+use regalloc::{RealReg, Reg, RegClass, Set, SpillSlot, Writable};
+
+use crate::ir::{self, types, types::*, ArgumentExtension, StackSlot, Type};
+use crate::isa::{self, x64::inst::*};
+use crate::machinst::*;
+use crate::settings;
+
+use args::*;
+
+#[derive(Clone, Debug)]
+enum ABIArg {
+    Reg(RealReg),
+    _Stack,
+}
+
+#[derive(Clone, Debug)]
+enum ABIRet {
+    Reg(RealReg),
+    _Stack,
+}
+
+pub(crate) struct X64ABIBody {
+    args: Vec<ABIArg>,
+    rets: Vec<ABIRet>,
+
+    /// Offsets to each stack slot.
+    _stack_slots: Vec<usize>,
+
+    /// Total stack size of all the stack slots.
+    stack_slots_size: usize,
+
+    /// Clobbered registers, as indicated by regalloc.
+    clobbered: Set<Writable<RealReg>>,
+
+    /// Total number of spill slots, as indicated by regalloc.
+    num_spill_slots: Option<usize>,
+
+    /// Calculated while creating the prologue, and used when creating the epilogue. Amount by
+    /// which RSP is adjusted downwards to allocate the spill area.
+    frame_size_bytes: Option<usize>,
+
+    call_conv: isa::CallConv,
+
+    /// The settings controlling this function's compilation.
+    flags: settings::Flags,
+}
+
+fn in_int_reg(ty: types::Type) -> bool {
+    match ty {
+        types::I8
+        | types::I16
+        | types::I32
+        | types::I64
+        | types::B1
+        | types::B8
+        | types::B16
+        | types::B32
+        | types::B64 => true,
+        _ => false,
+    }
+}
+
+fn get_intreg_for_arg_systemv(idx: usize) -> Option<Reg> {
+    match idx {
+        0 => Some(regs::rdi()),
+        1 => Some(regs::rsi()),
+        2 => Some(regs::rdx()),
+        3 => Some(regs::rcx()),
+        4 => Some(regs::r8()),
+        5 => Some(regs::r9()),
+        _ => None,
+    }
+}
+
+fn get_intreg_for_retval_systemv(idx: usize) -> Option<Reg> {
+    match idx {
+        0 => Some(regs::rax()),
+        1 => Some(regs::rdx()),
+        _ => None,
+    }
+}
+
+fn is_callee_save_systemv(r: RealReg) -> bool {
+    use regs::*;
+    match r.get_class() {
+        RegClass::I64 => match r.get_hw_encoding() as u8 {
+            ENC_RBX | ENC_RBP | ENC_R12 | ENC_R13 | ENC_R14 | ENC_R15 => true,
+            _ => false,
+        },
+        _ => unimplemented!(),
+    }
+}
+
+fn get_callee_saves(regs: Vec<Writable<RealReg>>) -> Vec<Writable<RealReg>> {
+    regs.into_iter()
+        .filter(|r| is_callee_save_systemv(r.to_reg()))
+        .collect()
+}
+
+impl X64ABIBody {
+    /// Create a new body ABI instance.
+    pub(crate) fn new(f: &ir::Function, flags: settings::Flags) -> Self {
+        // Compute args and retvals from signature.
+        let mut args = vec![];
+        let mut next_int_arg = 0;
+        for param in &f.signature.params {
+            match param.purpose {
+                ir::ArgumentPurpose::VMContext if f.signature.call_conv.extends_baldrdash() => {
+                    // `VMContext` is `r14` in Baldrdash.
+                    args.push(ABIArg::Reg(regs::r14().to_real_reg()));
+                }
+
+                ir::ArgumentPurpose::Normal | ir::ArgumentPurpose::VMContext => {
+                    if in_int_reg(param.value_type) {
+                        if let Some(reg) = get_intreg_for_arg_systemv(next_int_arg) {
+                            args.push(ABIArg::Reg(reg.to_real_reg()));
+                        } else {
+                            unimplemented!("passing arg on the stack");
+                        }
+                        next_int_arg += 1;
+                    } else {
+                        unimplemented!("non int normal register")
+                    }
+                }
+
+                _ => unimplemented!("other parameter purposes"),
+            }
+        }
+
+        let mut rets = vec![];
+        let mut next_int_retval = 0;
+        for ret in &f.signature.returns {
+            match ret.purpose {
+                ir::ArgumentPurpose::Normal => {
+                    if in_int_reg(ret.value_type) {
+                        if let Some(reg) = get_intreg_for_retval_systemv(next_int_retval) {
+                            rets.push(ABIRet::Reg(reg.to_real_reg()));
+                        } else {
+                            unimplemented!("passing return on the stack");
+                        }
+                        next_int_retval += 1;
+                    } else {
+                        unimplemented!("returning non integer normal value");
+                    }
+                }
+
+                _ => {
+                    unimplemented!("non normal argument purpose");
+                }
+            }
+        }
+
+        // Compute stackslot locations and total stackslot size.
+        let mut stack_offset: usize = 0;
+        let mut _stack_slots = vec![];
+        for (stackslot, data) in f.stack_slots.iter() {
+            let off = stack_offset;
+            stack_offset += data.size as usize;
+
+            // 8-bit align.
+            stack_offset = (stack_offset + 7) & !7usize;
+
+            debug_assert_eq!(stackslot.as_u32() as usize, _stack_slots.len());
+            _stack_slots.push(off);
+        }
+
+        Self {
+            args,
+            rets,
+            _stack_slots,
+            stack_slots_size: stack_offset,
+            clobbered: Set::empty(),
+            num_spill_slots: None,
+            frame_size_bytes: None,
+            call_conv: f.signature.call_conv.clone(),
+            flags,
+        }
+    }
+}
+
+impl ABIBody for X64ABIBody {
+    type I = Inst;
+
+    fn flags(&self) -> &settings::Flags {
+        &self.flags
+    }
+
+    fn num_args(&self) -> usize {
+        unimplemented!()
+    }
+
+    fn num_retvals(&self) -> usize {
+        unimplemented!()
+    }
+
+    fn num_stackslots(&self) -> usize {
+        unimplemented!()
+    }
+
+    fn liveins(&self) -> Set<RealReg> {
+        let mut set: Set<RealReg> = Set::empty();
+        for arg in &self.args {
+            if let &ABIArg::Reg(r) = arg {
+                set.insert(r);
+            }
+        }
+        set
+    }
+
+    fn liveouts(&self) -> Set<RealReg> {
+        let mut set: Set<RealReg> = Set::empty();
+        for ret in &self.rets {
+            if let &ABIRet::Reg(r) = ret {
+                set.insert(r);
+            }
+        }
+        set
+    }
+
+    fn gen_copy_arg_to_reg(&self, idx: usize, to_reg: Writable<Reg>) -> Inst {
+        match &self.args[idx] {
+            ABIArg::Reg(from_reg) => {
+                if from_reg.get_class() == RegClass::I32 || from_reg.get_class() == RegClass::I64 {
+                    // TODO do we need a sign extension if it's I32?
+                    return Inst::mov_r_r(/*is64=*/ true, from_reg.to_reg(), to_reg);
+                }
+                unimplemented!("moving from non-int arg to vreg");
+            }
+            ABIArg::_Stack => unimplemented!("moving from stack arg to vreg"),
+        }
+    }
+
+    fn gen_copy_reg_to_retval(
+        &self,
+        idx: usize,
+        from_reg: Writable<Reg>,
+        ext: ArgumentExtension,
+    ) -> Vec<Inst> {
+        match ext {
+            ArgumentExtension::None => {}
+            _ => unimplemented!(
+                "unimplemented argument extension {:?} is required for baldrdash",
+                ext
+            ),
+        };
+
+        let mut ret = Vec::new();
+        match &self.rets[idx] {
+            ABIRet::Reg(to_reg) => {
+                if to_reg.get_class() == RegClass::I32 || to_reg.get_class() == RegClass::I64 {
+                    ret.push(Inst::mov_r_r(
+                        /*is64=*/ true,
+                        from_reg.to_reg(),
+                        Writable::<Reg>::from_reg(to_reg.to_reg()),
+                    ))
+                } else {
+                    unimplemented!("moving from vreg to non-int return value");
+                }
+            }
+
+            ABIRet::_Stack => {
+                unimplemented!("moving from vreg to stack return value");
+            }
+        }
+
+        ret
+    }
+
+    fn gen_ret(&self) -> Inst {
+        Inst::ret()
+    }
+
+    fn gen_epilogue_placeholder(&self) -> Inst {
+        Inst::epilogue_placeholder()
+    }
+
+    fn set_num_spillslots(&mut self, slots: usize) {
+        self.num_spill_slots = Some(slots);
+    }
+
+    fn set_clobbered(&mut self, clobbered: Set<Writable<RealReg>>) {
+        self.clobbered = clobbered;
+    }
+
+    fn stackslot_addr(&self, _slot: StackSlot, _offset: u32, _into_reg: Writable<Reg>) -> Inst {
+        unimplemented!()
+    }
+
+    fn load_stackslot(
+        &self,
+        _slot: StackSlot,
+        _offset: u32,
+        _ty: Type,
+        _into_reg: Writable<Reg>,
+    ) -> Inst {
+        unimplemented!("load_stackslot")
+    }
+
+    fn store_stackslot(&self, _slot: StackSlot, _offset: u32, _ty: Type, _from_reg: Reg) -> Inst {
+        unimplemented!("store_stackslot")
+    }
+
+    fn load_spillslot(&self, _slot: SpillSlot, _ty: Type, _into_reg: Writable<Reg>) -> Inst {
+        unimplemented!("load_spillslot")
+    }
+
+    fn store_spillslot(&self, _slot: SpillSlot, _ty: Type, _from_reg: Reg) -> Inst {
+        unimplemented!("store_spillslot")
+    }
+
+    fn gen_prologue(&mut self) -> Vec<Inst> {
+        let r_rsp = regs::rsp();
+
+        let mut insts = vec![];
+
+        // Baldrdash generates its own prologue sequence, so we don't have to.
+        if !self.call_conv.extends_baldrdash() {
+            let r_rbp = regs::rbp();
+            let w_rbp = Writable::<Reg>::from_reg(r_rbp);
+
+            // The "traditional" pre-preamble
+            // RSP before the call will be 0 % 16.  So here, it is 8 % 16.
+            insts.push(Inst::push64(RMI::reg(r_rbp)));
+            // RSP is now 0 % 16
+            insts.push(Inst::mov_r_r(true, r_rsp, w_rbp));
+        }
+
+        // Save callee saved registers that we trash. Keep track of how much space we've used, so
+        // as to know what we have to do to get the base of the spill area 0 % 16.
+        let mut callee_saved_used = 0;
+        let clobbered = get_callee_saves(self.clobbered.to_vec());
+        for reg in clobbered {
+            let r_reg = reg.to_reg();
+            match r_reg.get_class() {
+                RegClass::I64 => {
+                    insts.push(Inst::push64(RMI::reg(r_reg.to_reg())));
+                    callee_saved_used += 8;
+                }
+                _ => unimplemented!(),
+            }
+        }
+
+        let mut total_stacksize = self.stack_slots_size + 8 * self.num_spill_slots.unwrap();
+        if self.call_conv.extends_baldrdash() {
+            // Baldrdash expects the stack to take at least the number of words set in
+            // baldrdash_prologue_words; count them here.
+            debug_assert!(
+                !self.flags.enable_probestack(),
+                "baldrdash does not expect cranelift to emit stack probes"
+            );
+            total_stacksize += self.flags.baldrdash_prologue_words() as usize * 8;
+        }
+
+        debug_assert!(callee_saved_used % 16 == 0 || callee_saved_used % 16 == 8);
+        let frame_size = total_stacksize + callee_saved_used % 16;
+
+        // Now make sure the frame stack is aligned, so RSP == 0 % 16 in the function's body.
+        let frame_size = (frame_size + 15) & !15;
+        if frame_size > 0x7FFF_FFFF {
+            unimplemented!("gen_prologue(x86): total_stacksize >= 2G");
+        }
+
+        if !self.call_conv.extends_baldrdash() {
+            // Explicitly allocate the frame.
+            let w_rsp = Writable::<Reg>::from_reg(r_rsp);
+            if frame_size > 0 {
+                insts.push(Inst::alu_rmi_r(
+                    true,
+                    RMI_R_Op::Sub,
+                    RMI::imm(frame_size as u32),
+                    w_rsp,
+                ));
+            }
+        }
+
+        // Stash this value.  We'll need it for the epilogue.
+        debug_assert!(self.frame_size_bytes.is_none());
+        self.frame_size_bytes = Some(frame_size);
+
+        insts
+    }
+
+    fn gen_epilogue(&self) -> Vec<Inst> {
+        let mut insts = vec![];
+
+        // Undo what we did in the prologue.
+
+        // Clear the spill area and the 16-alignment padding below it.
+        if !self.call_conv.extends_baldrdash() {
+            let frame_size = self.frame_size_bytes.unwrap();
+            if frame_size > 0 {
+                let r_rsp = regs::rsp();
+                let w_rsp = Writable::<Reg>::from_reg(r_rsp);
+
+                insts.push(Inst::alu_rmi_r(
+                    true,
+                    RMI_R_Op::Add,
+                    RMI::imm(frame_size as u32),
+                    w_rsp,
+                ));
+            }
+        }
+
+        // Restore regs.
+        let clobbered = get_callee_saves(self.clobbered.to_vec());
+        for w_real_reg in clobbered.into_iter().rev() {
+            match w_real_reg.to_reg().get_class() {
+                RegClass::I64 => {
+                    // TODO: make these conversion sequences less cumbersome.
+                    insts.push(Inst::pop64(Writable::<Reg>::from_reg(
+                        w_real_reg.to_reg().to_reg(),
+                    )))
+                }
+                _ => unimplemented!(),
+            }
+        }
+
+        // Baldrdash generates its own preamble.
+        if !self.call_conv.extends_baldrdash() {
+            let r_rbp = regs::rbp();
+            let w_rbp = Writable::<Reg>::from_reg(r_rbp);
+
+            // Undo the "traditional" pre-preamble
+            // RSP before the call will be 0 % 16.  So here, it is 8 % 16.
+            insts.push(Inst::pop64(w_rbp));
+            insts.push(Inst::ret());
+        }
+
+        insts
+    }
+
+    fn frame_size(&self) -> u32 {
+        self.frame_size_bytes
+            .expect("frame size not computed before prologue generation") as u32
+    }
+
+    fn get_spillslot_size(&self, rc: RegClass, ty: Type) -> u32 {
+        // We allocate in terms of 8-byte slots.
+        match (rc, ty) {
+            (RegClass::I64, _) => 1,
+            (RegClass::V128, F32) | (RegClass::V128, F64) => 1,
+            (RegClass::V128, _) => 2,
+            _ => panic!("Unexpected register class!"),
+        }
+    }
+
+    fn gen_spill(&self, _to_slot: SpillSlot, _from_reg: RealReg, _ty: Type) -> Inst {
+        unimplemented!()
+    }
+
+    fn gen_reload(&self, _to_reg: Writable<RealReg>, _from_slot: SpillSlot, _ty: Type) -> Inst {
+        unimplemented!()
+    }
+}
--- a/cranelift/codegen/src/isa/x64/inst/args.rs
+++ b/cranelift/codegen/src/isa/x64/inst/args.rs
@@ -0,0 +1,451 @@
+//! Instruction operand sub-components (aka "parts"): definitions and printing.
+
+use std::fmt;
+use std::string::{String, ToString};
+
+use regalloc::{RealRegUniverse, Reg, RegClass, RegUsageCollector};
+
+use crate::binemit::CodeOffset;
+use crate::machinst::*;
+
+use super::regs::show_ireg_sized;
+
+/// A Memory Address. These denote a 64-bit value only.
+#[derive(Clone)]
+pub(crate) enum Addr {
+    /// Immediate sign-extended and a Register.
+    IR { simm32: u32, base: Reg },
+
+    /// sign-extend-32-to-64(Immediate) + Register1 + (Register2 << Shift)
+    IRRS {
+        simm32: u32,
+        base: Reg,
+        index: Reg,
+        shift: u8, /* 0 .. 3 only */
+    },
+}
+
+impl Addr {
+    // Constructors.
+
+    pub(crate) fn imm_reg(simm32: u32, base: Reg) -> Self {
+        debug_assert!(base.get_class() == RegClass::I64);
+        Self::IR { simm32, base }
+    }
+
+    pub(crate) fn imm_reg_reg_shift(simm32: u32, base: Reg, index: Reg, shift: u8) -> Self {
+        debug_assert!(base.get_class() == RegClass::I64);
+        debug_assert!(index.get_class() == RegClass::I64);
+        debug_assert!(shift <= 3);
+        Addr::IRRS {
+            simm32,
+            base,
+            index,
+            shift,
+        }
+    }
+
+    /// Add the regs mentioned by `self` to `collector`.
+    pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) {
+        match self {
+            Addr::IR { simm32: _, base } => {
+                collector.add_use(*base);
+            }
+            Addr::IRRS {
+                simm32: _,
+                base,
+                index,
+                shift: _,
+            } => {
+                collector.add_use(*base);
+                collector.add_use(*index);
+            }
+        }
+    }
+}
+
+impl ShowWithRRU for Addr {
+    fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
+        match self {
+            Addr::IR { simm32, base } => format!("{}({})", *simm32 as i32, base.show_rru(mb_rru)),
+            Addr::IRRS {
+                simm32,
+                base,
+                index,
+                shift,
+            } => format!(
+                "{}({},{},{})",
+                *simm32 as i32,
+                base.show_rru(mb_rru),
+                index.show_rru(mb_rru),
+                1 << shift
+            ),
+        }
+    }
+}
+
+/// An operand which is either an integer Register, a value in Memory or an Immediate.  This can
+/// denote an 8, 16, 32 or 64 bit value.  For the Immediate form, in the 8- and 16-bit case, only
+/// the lower 8 or 16 bits of `simm32` is relevant.  In the 64-bit case, the value denoted by
+/// `simm32` is its sign-extension out to 64 bits.
+#[derive(Clone)]
+pub(crate) enum RMI {
+    R { reg: Reg },
+    M { addr: Addr },
+    I { simm32: u32 },
+}
+
+impl RMI {
+    // Constructors
+
+    pub(crate) fn reg(reg: Reg) -> RMI {
+        debug_assert!(reg.get_class() == RegClass::I64);
+        RMI::R { reg }
+    }
+    pub(crate) fn mem(addr: Addr) -> RMI {
+        RMI::M { addr }
+    }
+    pub(crate) fn imm(simm32: u32) -> RMI {
+        RMI::I { simm32 }
+    }
+
+    /// Add the regs mentioned by `self` to `collector`.
+    pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) {
+        match self {
+            RMI::R { reg } => collector.add_use(*reg),
+            RMI::M { addr } => addr.get_regs_as_uses(collector),
+            RMI::I { simm32: _ } => {}
+        }
+    }
+}
+
+impl ShowWithRRU for RMI {
+    fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
+        self.show_rru_sized(mb_rru, 8)
+    }
+
+    fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, size: u8) -> String {
+        match self {
+            RMI::R { reg } => show_ireg_sized(*reg, mb_rru, size),
+            RMI::M { addr } => addr.show_rru(mb_rru),
+            RMI::I { simm32 } => format!("${}", *simm32 as i32),
+        }
+    }
+}
+
+/// An operand which is either an integer Register or a value in Memory.  This can denote an 8, 16,
+/// 32 or 64 bit value.
+#[derive(Clone)]
+pub(crate) enum RM {
+    R { reg: Reg },
+    M { addr: Addr },
+}
+
+impl RM {
+    // Constructors.
+
+    pub(crate) fn reg(reg: Reg) -> Self {
+        debug_assert!(reg.get_class() == RegClass::I64);
+        RM::R { reg }
+    }
+
+    pub(crate) fn mem(addr: Addr) -> Self {
+        RM::M { addr }
+    }
+
+    /// Add the regs mentioned by `self` to `collector`.
+    pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) {
+        match self {
+            RM::R { reg } => collector.add_use(*reg),
+            RM::M { addr } => addr.get_regs_as_uses(collector),
+        }
+    }
+}
+
+impl ShowWithRRU for RM {
+    fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
+        self.show_rru_sized(mb_rru, 8)
+    }
+
+    fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, size: u8) -> String {
+        match self {
+            RM::R { reg } => show_ireg_sized(*reg, mb_rru, size),
+            RM::M { addr } => addr.show_rru(mb_rru),
+        }
+    }
+}
+
+/// Some basic ALU operations.  TODO: maybe add Adc, Sbb.
+#[derive(Clone, PartialEq)]
+pub enum RMI_R_Op {
+    Add,
+    Sub,
+    And,
+    Or,
+    Xor,
+    /// The signless, non-extending (N x N -> N, for N in {32,64}) variant.
+    Mul,
+}
+
+impl RMI_R_Op {
+    pub(crate) fn to_string(&self) -> String {
+        match self {
+            RMI_R_Op::Add => "add".to_string(),
+            RMI_R_Op::Sub => "sub".to_string(),
+            RMI_R_Op::And => "and".to_string(),
+            RMI_R_Op::Or => "or".to_string(),
+            RMI_R_Op::Xor => "xor".to_string(),
+            RMI_R_Op::Mul => "imul".to_string(),
+        }
+    }
+}
+
+impl fmt::Debug for RMI_R_Op {
+    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+        write!(fmt, "{}", self.to_string())
+    }
+}
+
+/// These indicate ways of extending (widening) a value, using the Intel naming:
+/// B(yte) = u8, W(ord) = u16, L(ong)word = u32, Q(uad)word = u64
+#[derive(Clone, PartialEq)]
+pub enum ExtMode {
+    /// Byte -> Longword.
+    BL,
+    /// Byte -> Quadword.
+    BQ,
+    /// Word -> Longword.
+    WL,
+    /// Word -> Quadword.
+    WQ,
+    /// Longword -> Quadword.
+    LQ,
+}
+
+impl ExtMode {
+    pub(crate) fn to_string(&self) -> String {
+        match self {
+            ExtMode::BL => "bl".to_string(),
+            ExtMode::BQ => "bq".to_string(),
+            ExtMode::WL => "wl".to_string(),
+            ExtMode::WQ => "wq".to_string(),
+            ExtMode::LQ => "lq".to_string(),
+        }
+    }
+
+    pub(crate) fn dst_size(&self) -> u8 {
+        match self {
+            ExtMode::BL => 4,
+            ExtMode::BQ => 8,
+            ExtMode::WL => 4,
+            ExtMode::WQ => 8,
+            ExtMode::LQ => 8,
+        }
+    }
+}
+
+impl fmt::Debug for ExtMode {
+    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+        write!(fmt, "{}", self.to_string())
+    }
+}
+
+/// These indicate the form of a scalar shift: left, signed right, unsigned right.
+#[derive(Clone)]
+pub enum ShiftKind {
+    Left,
+    RightZ,
+    RightS,
+}
+
+impl ShiftKind {
+    pub(crate) fn to_string(&self) -> String {
+        match self {
+            ShiftKind::Left => "shl".to_string(),
+            ShiftKind::RightZ => "shr".to_string(),
+            ShiftKind::RightS => "sar".to_string(),
+        }
+    }
+}
+
+impl fmt::Debug for ShiftKind {
+    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+        write!(fmt, "{}", self.to_string())
+    }
+}
+
+/// These indicate condition code tests.  Not all are represented since not all are useful in
+/// compiler-generated code.
+#[derive(Copy, Clone)]
+#[repr(u8)]
+pub enum CC {
+    ///  overflow
+    O = 0,
+    /// no overflow
+    NO = 1,
+
+    /// < unsigned
+    B = 2,
+    /// >= unsigned
+    NB = 3,
+
+    /// zero
+    Z = 4,
+    /// not-zero
+    NZ = 5,
+
+    /// <= unsigned
+    BE = 6,
+    /// > unsigend
+    NBE = 7,
+
+    /// negative
+    S = 8,
+    /// not-negative
+    NS = 9,
+
+    /// < signed
+    L = 12,
+    /// >= signed
+    NL = 13,
+
+    /// <= signed
+    LE = 14,
+    /// > signed
+    NLE = 15,
+}
+
+impl CC {
+    pub(crate) fn to_string(&self) -> String {
+        match self {
+            CC::O => "o".to_string(),
+            CC::NO => "no".to_string(),
+            CC::B => "b".to_string(),
+            CC::NB => "nb".to_string(),
+            CC::Z => "z".to_string(),
+            CC::NZ => "nz".to_string(),
+            CC::BE => "be".to_string(),
+            CC::NBE => "nbe".to_string(),
+            CC::S => "s".to_string(),
+            CC::NS => "ns".to_string(),
+            CC::L => "l".to_string(),
+            CC::NL => "nl".to_string(),
+            CC::LE => "le".to_string(),
+            CC::NLE => "nle".to_string(),
+        }
+    }
+
+    pub(crate) fn invert(&self) -> CC {
+        match self {
+            CC::O => CC::NO,
+            CC::NO => CC::O,
+
+            CC::B => CC::NB,
+            CC::NB => CC::B,
+
+            CC::Z => CC::NZ,
+            CC::NZ => CC::Z,
+
+            CC::BE => CC::NBE,
+            CC::NBE => CC::BE,
+
+            CC::S => CC::NS,
+            CC::NS => CC::S,
+
+            CC::L => CC::NL,
+            CC::NL => CC::L,
+
+            CC::LE => CC::NLE,
+            CC::NLE => CC::LE,
+        }
+    }
+
+    pub(crate) fn get_enc(self) -> u8 {
+        self as u8
+    }
+}
+
+impl fmt::Debug for CC {
+    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+        write!(fmt, "{}", self.to_string())
+    }
+}
+
+/// A branch target. Either unresolved (basic-block index) or resolved (offset
+/// from end of current instruction).
+#[derive(Clone, Copy, Debug)]
+pub enum BranchTarget {
+    /// An unresolved reference to a BlockIndex, as passed into
+    /// `lower_branch_group()`.
+    Block(BlockIndex),
+
+    /// A resolved reference to another instruction, after
+    /// `Inst::with_block_offsets()`.  This offset is in bytes.
+    ResolvedOffset(BlockIndex, isize),
+}
+
+impl ShowWithRRU for BranchTarget {
+    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+        match self {
+            BranchTarget::Block(bix) => format!("(Block {})", bix),
+            BranchTarget::ResolvedOffset(bix, offs) => format!("(Block {}, offset {})", bix, offs),
+        }
+    }
+}
+
+impl BranchTarget {
+    /// Lower the branch target given offsets of each block.
+    pub fn lower(&mut self, targets: &[CodeOffset], my_offset: CodeOffset) {
+        match self {
+            &mut BranchTarget::Block(bix) => {
+                let bix = bix as usize;
+                assert!(bix < targets.len());
+                let block_offset_in_func = targets[bix];
+                let branch_offset = (block_offset_in_func as isize) - (my_offset as isize);
+                *self = BranchTarget::ResolvedOffset(bix as BlockIndex, branch_offset);
+            }
+            &mut BranchTarget::ResolvedOffset(..) => {}
+        }
+    }
+
+    /// Get the block index.
+    pub fn as_block_index(&self) -> Option<BlockIndex> {
+        match self {
+            &BranchTarget::Block(bix) => Some(bix),
+            _ => None,
+        }
+    }
+
+    /// Get the offset as a signed 32 bit byte offset.  This returns the
+    /// offset in bytes between the first byte of the source and the first
+    /// byte of the target.  It does not take into account the Intel-specific
+    /// rule that a branch offset is encoded as relative to the start of the
+    /// following instruction.  That is a problem for the emitter to deal
+    /// with.
+    pub fn as_offset_i32(&self) -> Option<i32> {
+        match self {
+            &BranchTarget::ResolvedOffset(_, off) => {
+                // Leave a bit of slack so that the emitter is guaranteed to
+                // be able to add the length of the jump instruction encoding
+                // to this value and still have a value in signed-32 range.
+                if off >= -0x7FFF_FF00isize && off <= 0x7FFF_FF00isize {
+                    Some(off as i32)
+                } else {
+                    None
+                }
+            }
+            _ => None,
+        }
+    }
+
+    /// Map the block index given a transform map.
+    pub fn map(&mut self, block_index_map: &[BlockIndex]) {
+        match self {
+            &mut BranchTarget::Block(ref mut bix) => {
+                let n = block_index_map[*bix as usize];
+                *bix = n;
+            }
+            _ => panic!("BranchTarget::map() called on already-lowered BranchTarget!"),
+        }
+    }
+}
--- a/cranelift/codegen/src/isa/x64/inst/emit.rs
+++ b/cranelift/codegen/src/isa/x64/inst/emit.rs
@@ -0,0 +1,888 @@
+use regalloc::{Reg, RegClass};
+
+use crate::isa::x64::inst::*;
+
+fn low8willSXto64(x: u32) -> bool {
+    let xs = (x as i32) as i64;
+    xs == ((xs << 56) >> 56)
+}
+
+fn low8willSXto32(x: u32) -> bool {
+    let xs = x as i32;
+    xs == ((xs << 24) >> 24)
+}
+
+//=============================================================================
+// Instructions and subcomponents: emission
+
+// For all of the routines that take both a memory-or-reg operand (sometimes
+// called "E" in the Intel documentation) and a reg-only operand ("G" in
+// Intelese), the order is always G first, then E.
+//
+// "enc" in the following means "hardware register encoding number".
+
+#[inline(always)]
+fn mkModRegRM(m0d: u8, encRegG: u8, rmE: u8) -> u8 {
+    debug_assert!(m0d < 4);
+    debug_assert!(encRegG < 8);
+    debug_assert!(rmE < 8);
+    ((m0d & 3) << 6) | ((encRegG & 7) << 3) | (rmE & 7)
+}
+
+#[inline(always)]
+fn mkSIB(shift: u8, encIndex: u8, encBase: u8) -> u8 {
+    debug_assert!(shift < 4);
+    debug_assert!(encIndex < 8);
+    debug_assert!(encBase < 8);
+    ((shift & 3) << 6) | ((encIndex & 7) << 3) | (encBase & 7)
+}
+
+/// Get the encoding number from something which we sincerely hope is a real
+/// register of class I64.
+#[inline(always)]
+fn iregEnc(reg: Reg) -> u8 {
+    debug_assert!(reg.is_real());
+    debug_assert!(reg.get_class() == RegClass::I64);
+    reg.get_hw_encoding()
+}
+
+// F_*: these flags describe special handling of the insn to be generated.  Be
+// careful with these.  It is easy to create nonsensical combinations.
+const F_NONE: u32 = 0;
+
+/// Emit the REX prefix byte even if it appears to be redundant (== 0x40).
+const F_RETAIN_REDUNDANT_REX: u32 = 1;
+
+/// Set the W bit in the REX prefix to zero.  By default it will be set to 1,
+/// indicating a 64-bit operation.
+const F_CLEAR_REX_W: u32 = 2;
+
+/// Add an 0x66 (operand-size override) prefix.  This is necessary to indicate
+/// a 16-bit operation.  Normally this will be used together with F_CLEAR_REX_W.
+const F_PREFIX_66: u32 = 4;
+
+/// This is the core 'emit' function for instructions that reference memory.
+///
+/// For an instruction that has as operands a register `encG` and a memory
+/// address `memE`, create and emit, first the REX prefix, then caller-supplied
+/// opcode byte(s) (`opcodes` and `numOpcodes`), then the MOD/RM byte, then
+/// optionally, a SIB byte, and finally optionally an immediate that will be
+/// derived from the `memE` operand.  For most instructions up to and including
+/// SSE4.2, that will be the whole instruction.
+///
+/// The opcodes are written bigendianly for the convenience of callers.  For
+/// example, if the opcode bytes to be emitted are, in this order, F3 0F 27,
+/// then the caller should pass `opcodes` == 0xF3_0F_27 and `numOpcodes` == 3.
+///
+/// The register operand is represented here not as a `Reg` but as its hardware
+/// encoding, `encG`.  `flags` can specify special handling for the REX prefix.
+/// By default, the REX prefix will indicate a 64-bit operation and will be
+/// deleted if it is redundant (0x40).  Note that for a 64-bit operation, the
+/// REX prefix will normally never be redundant, since REX.W must be 1 to
+/// indicate a 64-bit operation.
+fn emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE<O: MachSectionOutput>(
+    sink: &mut O,
+    opcodes: u32,
+    mut numOpcodes: usize,
+    encG: u8,
+    memE: &Addr,
+    flags: u32,
+) {
+    // General comment for this function: the registers in `memE` must be
+    // 64-bit integer registers, because they are part of an address
+    // expression.  But `encG` can be derived from a register of any class.
+    let prefix66 = (flags & F_PREFIX_66) != 0;
+    let clearRexW = (flags & F_CLEAR_REX_W) != 0;
+    let retainRedundant = (flags & F_RETAIN_REDUNDANT_REX) != 0;
+    // The operand-size override, if requested.  This indicates a 16-bit
+    // operation.
+    if prefix66 {
+        sink.put1(0x66);
+    }
+    match memE {
+        Addr::IR { simm32, base: regE } => {
+            // First, cook up the REX byte.  This is easy.
+            let encE = iregEnc(*regE);
+            let w = if clearRexW { 0 } else { 1 };
+            let r = (encG >> 3) & 1;
+            let x = 0;
+            let b = (encE >> 3) & 1;
+            let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b;
+            if rex != 0x40 || retainRedundant {
+                sink.put1(rex);
+            }
+            // Now the opcode(s).  These include any other prefixes the caller
+            // hands to us.
+            while numOpcodes > 0 {
+                numOpcodes -= 1;
+                sink.put1(((opcodes >> (numOpcodes << 3)) & 0xFF) as u8);
+            }
+            // Now the mod/rm and associated immediates.  This is
+            // significantly complicated due to the multiple special cases.
+            if *simm32 == 0
+                && encE != regs::ENC_RSP
+                && encE != regs::ENC_RBP
+                && encE != regs::ENC_R12
+                && encE != regs::ENC_R13
+            {
+                // FIXME JRS 2020Feb11: those four tests can surely be
+                // replaced by a single mask-and-compare check.  We should do
+                // that because this routine is likely to be hot.
+                sink.put1(mkModRegRM(0, encG & 7, encE & 7));
+            } else if *simm32 == 0 && (encE == regs::ENC_RSP || encE == regs::ENC_R12) {
+                sink.put1(mkModRegRM(0, encG & 7, 4));
+                sink.put1(0x24);
+            } else if low8willSXto32(*simm32) && encE != regs::ENC_RSP && encE != regs::ENC_R12 {
+                sink.put1(mkModRegRM(1, encG & 7, encE & 7));
+                sink.put1((simm32 & 0xFF) as u8);
+            } else if encE != regs::ENC_RSP && encE != regs::ENC_R12 {
+                sink.put1(mkModRegRM(2, encG & 7, encE & 7));
+                sink.put4(*simm32);
+            } else if (encE == regs::ENC_RSP || encE == regs::ENC_R12) && low8willSXto32(*simm32) {
+                // REX.B distinguishes RSP from R12
+                sink.put1(mkModRegRM(1, encG & 7, 4));
+                sink.put1(0x24);
+                sink.put1((simm32 & 0xFF) as u8);
+            } else if encE == regs::ENC_R12 || encE == regs::ENC_RSP {
+                //.. wait for test case for RSP case
+                // REX.B distinguishes RSP from R12
+                sink.put1(mkModRegRM(2, encG & 7, 4));
+                sink.put1(0x24);
+                sink.put4(*simm32);
+            } else {
+                unreachable!("emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE: IR");
+            }
+        }
+        // Bizarrely, the IRRS case is much simpler.
+        Addr::IRRS {
+            simm32,
+            base: regBase,
+            index: regIndex,
+            shift,
+        } => {
+            let encBase = iregEnc(*regBase);
+            let encIndex = iregEnc(*regIndex);
+            // The rex byte
+            let w = if clearRexW { 0 } else { 1 };
+            let r = (encG >> 3) & 1;
+            let x = (encIndex >> 3) & 1;
+            let b = (encBase >> 3) & 1;
+            let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b;
+            if rex != 0x40 || retainRedundant {
+                sink.put1(rex);
+            }
+            // All other prefixes and opcodes
+            while numOpcodes > 0 {
+                numOpcodes -= 1;
+                sink.put1(((opcodes >> (numOpcodes << 3)) & 0xFF) as u8);
+            }
+            // modrm, SIB, immediates
+            if low8willSXto32(*simm32) && encIndex != regs::ENC_RSP {
+                sink.put1(mkModRegRM(1, encG & 7, 4));
+                sink.put1(mkSIB(*shift, encIndex & 7, encBase & 7));
+                sink.put1(*simm32 as u8);
+            } else if encIndex != regs::ENC_RSP {
+                sink.put1(mkModRegRM(2, encG & 7, 4));
+                sink.put1(mkSIB(*shift, encIndex & 7, encBase & 7));
+                sink.put4(*simm32);
+            } else {
+                panic!("emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE: IRRS");
+            }
+        }
+    }
+}
+
+/// This is the core 'emit' function for instructions that do not reference
+/// memory.
+///
+/// This is conceptually the same as
+/// emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE, except it is for the case
+/// where the E operand is a register rather than memory.  Hence it is much
+/// simpler.
+fn emit_REX_OPCODES_MODRM_encG_encE<O: MachSectionOutput>(
+    sink: &mut O,
+    opcodes: u32,
+    mut numOpcodes: usize,
+    encG: u8,
+    encE: u8,
+    flags: u32,
+) {
+    // EncG and EncE can be derived from registers of any class, and they
+    // don't even have to be from the same class.  For example, for an
+    // integer-to-FP conversion insn, one might be RegClass::I64 and the other
+    // RegClass::V128.
+    let prefix66 = (flags & F_PREFIX_66) != 0;
+    let clearRexW = (flags & F_CLEAR_REX_W) != 0;
+    let retainRedundant = (flags & F_RETAIN_REDUNDANT_REX) != 0;
+    // The operand-size override
+    if prefix66 {
+        sink.put1(0x66);
+    }
+    // The rex byte
+    let w = if clearRexW { 0 } else { 1 };
+    let r = (encG >> 3) & 1;
+    let x = 0;
+    let b = (encE >> 3) & 1;
+    let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b;
+    if rex != 0x40 || retainRedundant {
+        sink.put1(rex);
+    }
+    // All other prefixes and opcodes
+    while numOpcodes > 0 {
+        numOpcodes -= 1;
+        sink.put1(((opcodes >> (numOpcodes << 3)) & 0xFF) as u8);
+    }
+    // Now the mod/rm byte.  The instruction we're generating doesn't access
+    // memory, so there is no SIB byte or immediate -- we're done.
+    sink.put1(mkModRegRM(3, encG & 7, encE & 7));
+}
+
+// These are merely wrappers for the above two functions that facilitate passing
+// actual `Reg`s rather than their encodings.
+
+fn emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE<O: MachSectionOutput>(
+    sink: &mut O,
+    opcodes: u32,
+    numOpcodes: usize,
+    regG: Reg,
+    memE: &Addr,
+    flags: u32,
+) {
+    // JRS FIXME 2020Feb07: this should really just be `regEnc` not `iregEnc`
+    let encG = iregEnc(regG);
+    emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE(sink, opcodes, numOpcodes, encG, memE, flags);
+}
+
+fn emit_REX_OPCODES_MODRM_regG_regE<O: MachSectionOutput>(
+    sink: &mut O,
+    opcodes: u32,
+    numOpcodes: usize,
+    regG: Reg,
+    regE: Reg,
+    flags: u32,
+) {
+    // JRS FIXME 2020Feb07: these should really just be `regEnc` not `iregEnc`
+    let encG = iregEnc(regG);
+    let encE = iregEnc(regE);
+    emit_REX_OPCODES_MODRM_encG_encE(sink, opcodes, numOpcodes, encG, encE, flags);
+}
+
+/// Write a suitable number of bits from an imm64 to the sink.
+fn emit_simm<O: MachSectionOutput>(sink: &mut O, size: u8, simm32: u32) {
+    match size {
+        8 | 4 => sink.put4(simm32),
+        2 => sink.put2(simm32 as u16),
+        1 => sink.put1(simm32 as u8),
+        _ => panic!("x64::Inst::emit_simm: unreachable"),
+    }
+}
+
+/// The top-level emit function.
+///
+/// Important!  Do not add improved (shortened) encoding cases to existing
+/// instructions without also adding tests for those improved encodings.  That
+/// is a dangerous game that leads to hard-to-track-down errors in the emitted
+/// code.
+///
+/// For all instructions, make sure to have test coverage for all of the
+/// following situations.  Do this by creating the cross product resulting from
+/// applying the following rules to each operand:
+///
+/// (1) for any insn that mentions a register: one test using a register from
+///     the group [rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi] and a second one
+///     using a register from the group [r8, r9, r10, r11, r12, r13, r14, r15].
+///     This helps detect incorrect REX prefix construction.
+///
+/// (2) for any insn that mentions a byte register: one test for each of the
+///     four encoding groups [al, cl, dl, bl], [spl, bpl, sil, dil],
+///     [r8b .. r11b] and [r12b .. r15b].  This checks that
+///     apparently-redundant REX prefixes are retained when required.
+///
+/// (3) for any insn that contains an immediate field, check the following
+///     cases: field is zero, field is in simm8 range (-128 .. 127), field is
+///     in simm32 range (-0x8000_0000 .. 0x7FFF_FFFF).  This is because some
+///     instructions that require a 32-bit immediate have a short-form encoding
+///     when the imm is in simm8 range.
+///
+/// Rules (1), (2) and (3) don't apply for registers within address expressions
+/// (`Addr`s).  Those are already pretty well tested, and the registers in them
+/// don't have any effect on the containing instruction (apart from possibly
+/// require REX prefix bits).
+///
+/// When choosing registers for a test, avoid using registers with the same
+/// offset within a given group.  For example, don't use rax and r8, since they
+/// both have the lowest 3 bits as 000, and so the test won't detect errors
+/// where those 3-bit register sub-fields are confused by the emitter.  Instead
+/// use (eg) rax (lo3 = 000) and r9 (lo3 = 001).  Similarly, don't use (eg) cl
+/// and bpl since they have the same offset in their group; use instead (eg) cl
+/// and sil.
+///
+/// For all instructions, also add a test that uses only low-half registers
+/// (rax .. rdi, xmm0 .. xmm7) etc, so as to check that any redundant REX
+/// prefixes are correctly omitted.  This low-half restriction must apply to
+/// _all_ registers in the insn, even those in address expressions.
+///
+/// Following these rules creates large numbers of test cases, but it's the
+/// only way to make the emitter reliable.
+///
+/// Known possible improvements:
+///
+/// * there's a shorter encoding for shl/shr/sar by a 1-bit immediate.  (Do we
+///   care?)
+pub(crate) fn emit<O: MachSectionOutput>(inst: &Inst, sink: &mut O) {
+    match inst {
+        Inst::Nop { len: 0 } => {}
+        Inst::Alu_RMI_R {
+            is_64,
+            op,
+            src: srcE,
+            dst: regG,
+        } => {
+            let flags = if *is_64 { F_NONE } else { F_CLEAR_REX_W };
+            if *op == RMI_R_Op::Mul {
+                // We kinda freeloaded Mul into RMI_R_Op, but it doesn't fit the usual pattern, so
+                // we have to special-case it.
+                match srcE {
+                    RMI::R { reg: regE } => {
+                        emit_REX_OPCODES_MODRM_regG_regE(
+                            sink,
+                            0x0FAF,
+                            2,
+                            regG.to_reg(),
+                            *regE,
+                            flags,
+                        );
+                    }
+                    RMI::M { addr } => {
+                        emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
+                            sink,
+                            0x0FAF,
+                            2,
+                            regG.to_reg(),
+                            addr,
+                            flags,
+                        );
+                    }
+                    RMI::I { simm32 } => {
+                        let useImm8 = low8willSXto32(*simm32);
+                        let opcode = if useImm8 { 0x6B } else { 0x69 };
+                        // Yes, really, regG twice.
+                        emit_REX_OPCODES_MODRM_regG_regE(
+                            sink,
+                            opcode,
+                            1,
+                            regG.to_reg(),
+                            regG.to_reg(),
+                            flags,
+                        );
+                        emit_simm(sink, if useImm8 { 1 } else { 4 }, *simm32);
+                    }
+                }
+            } else {
+                let (opcode_R, opcode_M, subopcode_I) = match op {
+                    RMI_R_Op::Add => (0x01, 0x03, 0),
+                    RMI_R_Op::Sub => (0x29, 0x2B, 5),
+                    RMI_R_Op::And => (0x21, 0x23, 4),
+                    RMI_R_Op::Or => (0x09, 0x0B, 1),
+                    RMI_R_Op::Xor => (0x31, 0x33, 6),
+                    RMI_R_Op::Mul => panic!("unreachable"),
+                };
+                match srcE {
+                    RMI::R { reg: regE } => {
+                        // Note.  The arguments .. regE .. regG .. sequence
+                        // here is the opposite of what is expected.  I'm not
+                        // sure why this is.  But I am fairly sure that the
+                        // arg order could be switched back to the expected
+                        // .. regG .. regE .. if opcode_rr is also switched
+                        // over to the "other" basic integer opcode (viz, the
+                        // R/RM vs RM/R duality).  However, that would mean
+                        // that the test results won't be in accordance with
+                        // the GNU as reference output.  In other words, the
+                        // inversion exists as a result of using GNU as as a
+                        // gold standard.
+                        emit_REX_OPCODES_MODRM_regG_regE(
+                            sink,
+                            opcode_R,
+                            1,
+                            *regE,
+                            regG.to_reg(),
+                            flags,
+                        );
+                        // NB: if this is ever extended to handle byte size
+                        // ops, be sure to retain redundant REX prefixes.
+                    }
+                    RMI::M { addr } => {
+                        // Whereas here we revert to the "normal" G-E ordering.
+                        emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
+                            sink,
+                            opcode_M,
+                            1,
+                            regG.to_reg(),
+                            addr,
+                            flags,
+                        );
+                    }
+                    RMI::I { simm32 } => {
+                        let useImm8 = low8willSXto32(*simm32);
+                        let opcode = if useImm8 { 0x83 } else { 0x81 };
+                        // And also here we use the "normal" G-E ordering.
+                        let encG = iregEnc(regG.to_reg());
+                        emit_REX_OPCODES_MODRM_encG_encE(sink, opcode, 1, subopcode_I, encG, flags);
+                        emit_simm(sink, if useImm8 { 1 } else { 4 }, *simm32);
+                    }
+                }
+            }
+        }
+        Inst::Imm_R {
+            dst_is_64,
+            simm64,
+            dst,
+        } => {
+            let encDst = iregEnc(dst.to_reg());
+            if *dst_is_64 {
+                // FIXME JRS 2020Feb10: also use the 32-bit case here when
+                // possible
+                sink.put1(0x48 | ((encDst >> 3) & 1));
+                sink.put1(0xB8 | (encDst & 7));
+                sink.put8(*simm64);
+            } else {
+                if ((encDst >> 3) & 1) == 1 {
+                    sink.put1(0x41);
+                }
+                sink.put1(0xB8 | (encDst & 7));
+                sink.put4(*simm64 as u32);
+            }
+        }
+        Inst::Mov_R_R { is_64, src, dst } => {
+            let flags = if *is_64 { F_NONE } else { F_CLEAR_REX_W };
+            emit_REX_OPCODES_MODRM_regG_regE(sink, 0x89, 1, *src, dst.to_reg(), flags);
+        }
+        Inst::MovZX_M_R { extMode, addr, dst } => {
+            match extMode {
+                ExtMode::BL => {
+                    // MOVZBL is (REX.W==0) 0F B6 /r
+                    emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
+                        sink,
+                        0x0FB6,
+                        2,
+                        dst.to_reg(),
+                        addr,
+                        F_CLEAR_REX_W,
+                    )
+                }
+                ExtMode::BQ => {
+                    // MOVZBQ is (REX.W==1) 0F B6 /r
+                    // I'm not sure why the Intel manual offers different
+                    // encodings for MOVZBQ than for MOVZBL.  AIUI they should
+                    // achieve the same, since MOVZBL is just going to zero out
+                    // the upper half of the destination anyway.
+                    emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
+                        sink,
+                        0x0FB6,
+                        2,
+                        dst.to_reg(),
+                        addr,
+                        F_NONE,
+                    )
+                }
+                ExtMode::WL => {
+                    // MOVZWL is (REX.W==0) 0F B7 /r
+                    emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
+                        sink,
+                        0x0FB7,
+                        2,
+                        dst.to_reg(),
+                        addr,
+                        F_CLEAR_REX_W,
+                    )
+                }
+                ExtMode::WQ => {
+                    // MOVZWQ is (REX.W==1) 0F B7 /r
+                    emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
+                        sink,
+                        0x0FB7,
+                        2,
+                        dst.to_reg(),
+                        addr,
+                        F_NONE,
+                    )
+                }
+                ExtMode::LQ => {
+                    // This is just a standard 32 bit load, and we rely on the
+                    // default zero-extension rule to perform the extension.
+                    // MOV r/m32, r32 is (REX.W==0) 8B /r
+                    emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
+                        sink,
+                        0x8B,
+                        1,
+                        dst.to_reg(),
+                        addr,
+                        F_CLEAR_REX_W,
+                    )
+                }
+            }
+        }
+        Inst::Mov64_M_R { addr, dst } => {
+            emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(sink, 0x8B, 1, dst.to_reg(), addr, F_NONE)
+        }
+        Inst::MovSX_M_R { extMode, addr, dst } => {
+            match extMode {
+                ExtMode::BL => {
+                    // MOVSBL is (REX.W==0) 0F BE /r
+                    emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
+                        sink,
+                        0x0FBE,
+                        2,
+                        dst.to_reg(),
+                        addr,
+                        F_CLEAR_REX_W,
+                    )
+                }
+                ExtMode::BQ => {
+                    // MOVSBQ is (REX.W==1) 0F BE /r
+                    emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
+                        sink,
+                        0x0FBE,
+                        2,
+                        dst.to_reg(),
+                        addr,
+                        F_NONE,
+                    )
+                }
+                ExtMode::WL => {
+                    // MOVSWL is (REX.W==0) 0F BF /r
+                    emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
+                        sink,
+                        0x0FBF,
+                        2,
+                        dst.to_reg(),
+                        addr,
+                        F_CLEAR_REX_W,
+                    )
+                }
+                ExtMode::WQ => {
+                    // MOVSWQ is (REX.W==1) 0F BF /r
+                    emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
+                        sink,
+                        0x0FBF,
+                        2,
+                        dst.to_reg(),
+                        addr,
+                        F_NONE,
+                    )
+                }
+                ExtMode::LQ => {
+                    // MOVSLQ is (REX.W==1) 63 /r
+                    emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
+                        sink,
+                        0x63,
+                        1,
+                        dst.to_reg(),
+                        addr,
+                        F_NONE,
+                    )
+                }
+            }
+        }
+        Inst::Mov_R_M { size, src, addr } => {
+            match size {
+                1 => {
+                    // This is one of the few places where the presence of a
+                    // redundant REX prefix changes the meaning of the
+                    // instruction.
+                    let encSrc = iregEnc(*src);
+                    let retainRedundantRex = if encSrc >= 4 && encSrc <= 7 {
+                        F_RETAIN_REDUNDANT_REX
+                    } else {
+                        0
+                    };
+                    // MOV r8, r/m8 is (REX.W==0) 88 /r
+                    emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
+                        sink,
+                        0x88,
+                        1,
+                        *src,
+                        addr,
+                        F_CLEAR_REX_W | retainRedundantRex,
+                    )
+                }
+                2 => {
+                    // MOV r16, r/m16 is 66 (REX.W==0) 89 /r
+                    emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
+                        sink,
+                        0x89,
+                        1,
+                        *src,
+                        addr,
+                        F_CLEAR_REX_W | F_PREFIX_66,
+                    )
+                }
+                4 => {
+                    // MOV r32, r/m32 is (REX.W==0) 89 /r
+                    emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
+                        sink,
+                        0x89,
+                        1,
+                        *src,
+                        addr,
+                        F_CLEAR_REX_W,
+                    )
+                }
+                8 => {
+                    // MOV r64, r/m64 is (REX.W==1) 89 /r
+                    emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(sink, 0x89, 1, *src, addr, F_NONE)
+                }
+                _ => panic!("x64::Inst::Mov_R_M::emit: unreachable"),
+            }
+        }
+        Inst::Shift_R {
+            is_64,
+            kind,
+            num_bits,
+            dst,
+        } => {
+            let encDst = iregEnc(dst.to_reg());
+            let subopcode = match kind {
+                ShiftKind::Left => 4,
+                ShiftKind::RightZ => 5,
+                ShiftKind::RightS => 7,
+            };
+            match num_bits {
+                None => {
+                    // SHL/SHR/SAR %cl, reg32 is (REX.W==0) D3 /subopcode
+                    // SHL/SHR/SAR %cl, reg64 is (REX.W==1) D3 /subopcode
+                    emit_REX_OPCODES_MODRM_encG_encE(
+                        sink,
+                        0xD3,
+                        1,
+                        subopcode,
+                        encDst,
+                        if *is_64 { F_NONE } else { F_CLEAR_REX_W },
+                    );
+                }
+                Some(num_bits) => {
+                    // SHL/SHR/SAR $ib, reg32 is (REX.W==0) C1 /subopcode ib
+                    // SHL/SHR/SAR $ib, reg64 is (REX.W==1) C1 /subopcode ib
+                    // When the shift amount is 1, there's an even shorter encoding, but we don't
+                    // bother with that nicety here.
+                    emit_REX_OPCODES_MODRM_encG_encE(
+                        sink,
+                        0xC1,
+                        1,
+                        subopcode,
+                        encDst,
+                        if *is_64 { F_NONE } else { F_CLEAR_REX_W },
+                    );
+                    sink.put1(*num_bits);
+                }
+            }
+        }
+        Inst::Cmp_RMI_R {
+            size,
+            src: srcE,
+            dst: regG,
+        } => {
+            let mut retainRedundantRex = 0;
+            if *size == 1 {
+                // Here, a redundant REX prefix changes the meaning of the
+                // instruction.
+                let encG = iregEnc(*regG);
+                if encG >= 4 && encG <= 7 {
+                    retainRedundantRex = F_RETAIN_REDUNDANT_REX;
+                }
+            }
+            let mut flags = match size {
+                8 => F_NONE,
+                4 => F_CLEAR_REX_W,
+                2 => F_CLEAR_REX_W | F_PREFIX_66,
+                1 => F_CLEAR_REX_W | retainRedundantRex,
+                _ => panic!("x64::Inst::Cmp_RMI_R::emit: unreachable"),
+            };
+            match srcE {
+                RMI::R { reg: regE } => {
+                    let opcode = if *size == 1 { 0x38 } else { 0x39 };
+                    if *size == 1 {
+                        // We also need to check whether the E register forces
+                        // the use of a redundant REX.
+                        let encE = iregEnc(*regE);
+                        if encE >= 4 && encE <= 7 {
+                            flags |= F_RETAIN_REDUNDANT_REX;
+                        }
+                    }
+                    // Same comment re swapped args as for Alu_RMI_R.
+                    emit_REX_OPCODES_MODRM_regG_regE(sink, opcode, 1, *regE, *regG, flags);
+                }
+                RMI::M { addr } => {
+                    let opcode = if *size == 1 { 0x3A } else { 0x3B };
+                    // Whereas here we revert to the "normal" G-E ordering.
+                    emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(sink, opcode, 1, *regG, addr, flags);
+                }
+                RMI::I { simm32 } => {
+                    // FIXME JRS 2020Feb11: there are shorter encodings for
+                    // cmp $imm, rax/eax/ax/al.
+                    let useImm8 = low8willSXto32(*simm32);
+                    let opcode = if *size == 1 {
+                        0x80
+                    } else if useImm8 {
+                        0x83
+                    } else {
+                        0x81
+                    };
+                    // And also here we use the "normal" G-E ordering.
+                    let encG = iregEnc(*regG);
+                    emit_REX_OPCODES_MODRM_encG_encE(
+                        sink, opcode, 1, 7, /*subopcode*/
+                        encG, flags,
+                    );
+                    emit_simm(sink, if useImm8 { 1 } else { *size }, *simm32);
+                }
+            }
+        }
+        Inst::Push64 { src } => {
+            match src {
+                RMI::R { reg } => {
+                    let encReg = iregEnc(*reg);
+                    let rex = 0x40 | ((encReg >> 3) & 1);
+                    if rex != 0x40 {
+                        sink.put1(rex);
+                    }
+                    sink.put1(0x50 | (encReg & 7));
+                }
+                RMI::M { addr } => {
+                    emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE(
+                        sink,
+                        0xFF,
+                        1,
+                        6, /*subopcode*/
+                        addr,
+                        F_CLEAR_REX_W,
+                    );
+                }
+                RMI::I { simm32 } => {
+                    if low8willSXto64(*simm32) {
+                        sink.put1(0x6A);
+                        sink.put1(*simm32 as u8);
+                    } else {
+                        sink.put1(0x68);
+                        sink.put4(*simm32);
+                    }
+                }
+            }
+        }
+        Inst::Pop64 { dst } => {
+            let encDst = iregEnc(dst.to_reg());
+            if encDst >= 8 {
+                // 0x41 == REX.{W=0, B=1}.  It seems that REX.W is irrelevant
+                // here.
+                sink.put1(0x41);
+            }
+            sink.put1(0x58 + (encDst & 7));
+        }
+        //
+        // ** Inst::CallKnown
+        //
+        Inst::CallUnknown { dest } => {
+            match dest {
+                RM::R { reg } => {
+                    let regEnc = iregEnc(*reg);
+                    emit_REX_OPCODES_MODRM_encG_encE(
+                        sink,
+                        0xFF,
+                        1,
+                        2, /*subopcode*/
+                        regEnc,
+                        F_CLEAR_REX_W,
+                    );
+                }
+                RM::M { addr } => {
+                    emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE(
+                        sink,
+                        0xFF,
+                        1,
+                        2, /*subopcode*/
+                        addr,
+                        F_CLEAR_REX_W,
+                    );
+                }
+            }
+        }
+        Inst::Ret {} => sink.put1(0xC3),
+
+        Inst::JmpKnown {
+            dest: BranchTarget::Block(..),
+        } => {
+            // Computation of block offsets/sizes.
+            sink.put1(0);
+            sink.put4(0);
+        }
+        Inst::JmpKnown {
+            dest: BranchTarget::ResolvedOffset(_bix, offset),
+        } if *offset >= -0x7FFF_FF00 && *offset <= 0x7FFF_FF00 => {
+            // And now for real
+            let mut offs_i32 = *offset as i32;
+            offs_i32 -= 5;
+            let offs_u32 = offs_i32 as u32;
+            sink.put1(0xE9);
+            sink.put4(offs_u32);
+        }
+        //
+        // ** Inst::JmpCondSymm   XXXX should never happen
+        //
+        Inst::JmpCond {
+            cc: _,
+            target: BranchTarget::Block(..),
+        } => {
+            // This case occurs when we are computing block offsets / sizes,
+            // prior to lowering block-index targets to concrete-offset targets.
+            // Only the size matters, so let's emit 6 bytes, as below.
+            sink.put1(0);
+            sink.put1(0);
+            sink.put4(0);
+        }
+        Inst::JmpCond {
+            cc,
+            target: BranchTarget::ResolvedOffset(_bix, offset),
+        } if *offset >= -0x7FFF_FF00 && *offset <= 0x7FFF_FF00 => {
+            // This insn is 6 bytes long.  Currently `offset` is relative to
+            // the start of this insn, but the Intel encoding requires it to
+            // be relative to the start of the next instruction.  Hence the
+            // adjustment.
+            let mut offs_i32 = *offset as i32;
+            offs_i32 -= 6;
+            let offs_u32 = offs_i32 as u32;
+            sink.put1(0x0F);
+            sink.put1(0x80 + cc.get_enc());
+            sink.put4(offs_u32);
+        }
+        //
+        // ** Inst::JmpCondCompound   XXXX should never happen
+        //
+        Inst::JmpUnknown { target } => {
+            match target {
+                RM::R { reg } => {
+                    let regEnc = iregEnc(*reg);
+                    emit_REX_OPCODES_MODRM_encG_encE(
+                        sink,
+                        0xFF,
+                        1,
+                        4, /*subopcode*/
+                        regEnc,
+                        F_CLEAR_REX_W,
+                    );
+                }
+                RM::M { addr } => {
+                    emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE(
+                        sink,
+                        0xFF,
+                        1,
+                        4, /*subopcode*/
+                        addr,
+                        F_CLEAR_REX_W,
+                    );
+                }
+            }
+        }
+
+        _ => panic!("x64_emit: unhandled: {} ", inst.show_rru(None)),
+    }
+}
--- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs
+++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs
--- a/cranelift/codegen/src/isa/x64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/x64/inst/mod.rs
@@ -0,0 +1,956 @@
+//! This module defines x86_64-specific machine instruction types.
+
+#![allow(dead_code)]
+#![allow(non_snake_case)]
+#![allow(non_camel_case_types)]
+
+use std::fmt;
+use std::string::{String, ToString};
+
+use regalloc::RegUsageCollector;
+use regalloc::Set;
+use regalloc::{RealRegUniverse, Reg, RegClass, RegUsageMapper, SpillSlot, VirtualReg, Writable};
+
+use crate::binemit::CodeOffset;
+use crate::ir::types::{B1, B128, B16, B32, B64, B8, F32, F64, I128, I16, I32, I64, I8};
+use crate::ir::ExternalName;
+use crate::ir::Type;
+use crate::machinst::*;
+use crate::{settings, CodegenError, CodegenResult};
+
+pub mod args;
+mod emit;
+#[cfg(test)]
+mod emit_tests;
+pub mod regs;
+
+use args::*;
+use regs::{create_reg_universe_systemv, show_ireg_sized};
+
+//=============================================================================
+// Instructions (top level): definition
+
+// Don't build these directly.  Instead use the Inst:: functions to create them.
+
+/// Instructions.  Destinations are on the RIGHT (a la AT&T syntax).
+#[derive(Clone)]
+pub(crate) enum Inst {
+    /// nops of various sizes, including zero
+    Nop { len: u8 },
+
+    /// (add sub and or xor mul adc? sbb?) (32 64) (reg addr imm) reg
+    Alu_RMI_R {
+        is_64: bool,
+        op: RMI_R_Op,
+        src: RMI,
+        dst: Writable<Reg>,
+    },
+
+    /// (imm32 imm64) reg.
+    /// Either: movl $imm32, %reg32 or movabsq $imm64, %reg32
+    Imm_R {
+        dst_is_64: bool,
+        simm64: u64,
+        dst: Writable<Reg>,
+    },
+
+    /// mov (64 32) reg reg
+    Mov_R_R {
+        is_64: bool,
+        src: Reg,
+        dst: Writable<Reg>,
+    },
+
+    /// movz (bl bq wl wq lq) addr reg (good for all ZX loads except 64->64).
+    /// Note that the lq variant doesn't really exist since the default
+    /// zero-extend rule makes it unnecessary.  For that case we emit the
+    /// equivalent "movl AM, reg32".
+    MovZX_M_R {
+        extMode: ExtMode,
+        addr: Addr,
+        dst: Writable<Reg>,
+    },
+
+    /// A plain 64-bit integer load, since MovZX_M_R can't represent that
+    Mov64_M_R { addr: Addr, dst: Writable<Reg> },
+
+    /// movs (bl bq wl wq lq) addr reg (good for all SX loads)
+    MovSX_M_R {
+        extMode: ExtMode,
+        addr: Addr,
+        dst: Writable<Reg>,
+    },
+
+    /// mov (b w l q) reg addr (good for all integer stores)
+    Mov_R_M {
+        size: u8, // 1, 2, 4 or 8
+        src: Reg,
+        addr: Addr,
+    },
+
+    /// (shl shr sar) (l q) imm reg
+    Shift_R {
+        is_64: bool,
+        kind: ShiftKind,
+        /// shift count: Some(0 .. #bits-in-type - 1), or None to mean "%cl".
+        num_bits: Option<u8>,
+        dst: Writable<Reg>,
+    },
+
+    /// cmp (b w l q) (reg addr imm) reg
+    Cmp_RMI_R {
+        size: u8, // 1, 2, 4 or 8
+        src: RMI,
+        dst: Reg,
+    },
+
+    /// pushq (reg addr imm)
+    Push64 { src: RMI },
+
+    /// popq reg
+    Pop64 { dst: Writable<Reg> },
+
+    /// call simm32
+    CallKnown {
+        dest: ExternalName,
+        uses: Set<Reg>,
+        defs: Set<Writable<Reg>>,
+    },
+
+    /// callq (reg mem)
+    CallUnknown {
+        dest: RM,
+        //uses: Set<Reg>,
+        //defs: Set<Writable<Reg>>,
+    },
+
+    // ---- branches (exactly one must appear at end of BB) ----
+    /// ret
+    Ret,
+
+    /// A placeholder instruction, generating no code, meaning that a function epilogue must be
+    /// inserted there.
+    EpiloguePlaceholder,
+
+    /// jmp simm32
+    JmpKnown { dest: BranchTarget },
+
+    /// jcond cond target target
+    // Symmetrical two-way conditional branch.
+    // Should never reach the emitter.
+    JmpCondSymm {
+        cc: CC,
+        taken: BranchTarget,
+        not_taken: BranchTarget,
+    },
+
+    /// Lowered conditional branch: contains the original instruction, and a
+    /// flag indicating whether to invert the taken-condition or not. Only one
+    /// BranchTarget is retained, and the other is implicitly the next
+    /// instruction, given the final basic-block layout.
+    JmpCond {
+        cc: CC,
+        //inverted: bool, is this needed?
+        target: BranchTarget,
+    },
+
+    /// As for `CondBrLowered`, but represents a condbr/uncond-br sequence (two
+    /// actual machine instructions). Needed when the final block layout implies
+    /// that neither arm of a conditional branch targets the fallthrough block.
+    // Should never reach the emitter
+    JmpCondCompound {
+        cc: CC,
+        taken: BranchTarget,
+        not_taken: BranchTarget,
+    },
+
+    /// jmpq (reg mem)
+    JmpUnknown { target: RM },
+}
+
+// Handy constructors for Insts.
+
+// For various sizes, will some number of lowest bits sign extend to be the
+// same as the whole value?
+pub(crate) fn low32willSXto64(x: u64) -> bool {
+    let xs = x as i64;
+    xs == ((xs << 32) >> 32)
+}
+
+impl Inst {
+    pub(crate) fn nop(len: u8) -> Self {
+        debug_assert!(len <= 16);
+        Self::Nop { len }
+    }
+
+    pub(crate) fn alu_rmi_r(is_64: bool, op: RMI_R_Op, src: RMI, dst: Writable<Reg>) -> Self {
+        debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+        Self::Alu_RMI_R {
+            is_64,
+            op,
+            src,
+            dst,
+        }
+    }
+
+    pub(crate) fn imm_r(dst_is_64: bool, simm64: u64, dst: Writable<Reg>) -> Inst {
+        debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+        if !dst_is_64 {
+            debug_assert!(low32willSXto64(simm64));
+        }
+        Inst::Imm_R {
+            dst_is_64,
+            simm64,
+            dst,
+        }
+    }
+
+    pub(crate) fn mov_r_r(is_64: bool, src: Reg, dst: Writable<Reg>) -> Inst {
+        debug_assert!(src.get_class() == RegClass::I64);
+        debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+        Inst::Mov_R_R { is_64, src, dst }
+    }
+
+    pub(crate) fn movzx_m_r(extMode: ExtMode, addr: Addr, dst: Writable<Reg>) -> Inst {
+        debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+        Inst::MovZX_M_R { extMode, addr, dst }
+    }
+
+    pub(crate) fn mov64_m_r(addr: Addr, dst: Writable<Reg>) -> Inst {
+        debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+        Inst::Mov64_M_R { addr, dst }
+    }
+
+    pub(crate) fn movsx_m_r(extMode: ExtMode, addr: Addr, dst: Writable<Reg>) -> Inst {
+        debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+        Inst::MovSX_M_R { extMode, addr, dst }
+    }
+
+    pub(crate) fn mov_r_m(
+        size: u8, // 1, 2, 4 or 8
+        src: Reg,
+        addr: Addr,
+    ) -> Inst {
+        debug_assert!(size == 8 || size == 4 || size == 2 || size == 1);
+        debug_assert!(src.get_class() == RegClass::I64);
+        Inst::Mov_R_M { size, src, addr }
+    }
+
+    pub(crate) fn shift_r(
+        is_64: bool,
+        kind: ShiftKind,
+        num_bits: Option<u8>,
+        dst: Writable<Reg>,
+    ) -> Inst {
+        debug_assert!(if let Some(num_bits) = num_bits {
+            num_bits < if is_64 { 64 } else { 32 }
+        } else {
+            true
+        });
+        debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+        Inst::Shift_R {
+            is_64,
+            kind,
+            num_bits,
+            dst,
+        }
+    }
+
+    pub(crate) fn cmp_rmi_r(
+        size: u8, // 1, 2, 4 or 8
+        src: RMI,
+        dst: Reg,
+    ) -> Inst {
+        debug_assert!(size == 8 || size == 4 || size == 2 || size == 1);
+        debug_assert!(dst.get_class() == RegClass::I64);
+        Inst::Cmp_RMI_R { size, src, dst }
+    }
+
+    pub(crate) fn push64(src: RMI) -> Inst {
+        Inst::Push64 { src }
+    }
+
+    pub(crate) fn pop64(dst: Writable<Reg>) -> Inst {
+        Inst::Pop64 { dst }
+    }
+
+    pub(crate) fn call_unknown(dest: RM) -> Inst {
+        Inst::CallUnknown { dest }
+    }
+
+    pub(crate) fn ret() -> Inst {
+        Inst::Ret
+    }
+
+    pub(crate) fn epilogue_placeholder() -> Inst {
+        Inst::EpiloguePlaceholder
+    }
+
+    pub(crate) fn jmp_known(dest: BranchTarget) -> Inst {
+        Inst::JmpKnown { dest }
+    }
+
+    pub(crate) fn jmp_cond_symm(cc: CC, taken: BranchTarget, not_taken: BranchTarget) -> Inst {
+        Inst::JmpCondSymm {
+            cc,
+            taken,
+            not_taken,
+        }
+    }
+
+    pub(crate) fn jmp_cond(cc: CC, target: BranchTarget) -> Inst {
+        Inst::JmpCond { cc, target }
+    }
+
+    pub(crate) fn jmp_cond_compound(cc: CC, taken: BranchTarget, not_taken: BranchTarget) -> Inst {
+        Inst::JmpCondCompound {
+            cc,
+            taken,
+            not_taken,
+        }
+    }
+
+    pub(crate) fn jmp_unknown(target: RM) -> Inst {
+        Inst::JmpUnknown { target }
+    }
+}
+
+//=============================================================================
+// Instructions: printing
+
+impl ShowWithRRU for Inst {
+    fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
+        fn ljustify(s: String) -> String {
+            let w = 7;
+            if s.len() >= w {
+                s
+            } else {
+                let need = usize::min(w, w - s.len());
+                s + &format!("{nil: <width$}", nil = "", width = need)
+            }
+        }
+
+        fn ljustify2(s1: String, s2: String) -> String {
+            ljustify(s1 + &s2)
+        }
+
+        fn suffixLQ(is_64: bool) -> String {
+            (if is_64 { "q" } else { "l" }).to_string()
+        }
+
+        fn sizeLQ(is_64: bool) -> u8 {
+            if is_64 {
+                8
+            } else {
+                4
+            }
+        }
+
+        fn suffixBWLQ(size: u8) -> String {
+            match size {
+                1 => "b".to_string(),
+                2 => "w".to_string(),
+                4 => "l".to_string(),
+                8 => "q".to_string(),
+                _ => panic!("Inst(x64).show.suffixBWLQ: size={}", size),
+            }
+        }
+
+        match self {
+            Inst::Nop { len } => format!("{} len={}", ljustify("nop".to_string()), len),
+            Inst::Alu_RMI_R {
+                is_64,
+                op,
+                src,
+                dst,
+            } => format!(
+                "{} {}, {}",
+                ljustify2(op.to_string(), suffixLQ(*is_64)),
+                src.show_rru_sized(mb_rru, sizeLQ(*is_64)),
+                show_ireg_sized(dst.to_reg(), mb_rru, sizeLQ(*is_64)),
+            ),
+            Inst::Imm_R {
+                dst_is_64,
+                simm64,
+                dst,
+            } => {
+                if *dst_is_64 {
+                    format!(
+                        "{} ${}, {}",
+                        ljustify("movabsq".to_string()),
+                        *simm64 as i64,
+                        show_ireg_sized(dst.to_reg(), mb_rru, 8)
+                    )
+                } else {
+                    format!(
+                        "{} ${}, {}",
+                        ljustify("movl".to_string()),
+                        (*simm64 as u32) as i32,
+                        show_ireg_sized(dst.to_reg(), mb_rru, 4)
+                    )
+                }
+            }
+            Inst::Mov_R_R { is_64, src, dst } => format!(
+                "{} {}, {}",
+                ljustify2("mov".to_string(), suffixLQ(*is_64)),
+                show_ireg_sized(*src, mb_rru, sizeLQ(*is_64)),
+                show_ireg_sized(dst.to_reg(), mb_rru, sizeLQ(*is_64))
+            ),
+            Inst::MovZX_M_R { extMode, addr, dst } => {
+                if *extMode == ExtMode::LQ {
+                    format!(
+                        "{} {}, {}",
+                        ljustify("movl".to_string()),
+                        addr.show_rru(mb_rru),
+                        show_ireg_sized(dst.to_reg(), mb_rru, 4)
+                    )
+                } else {
+                    format!(
+                        "{} {}, {}",
+                        ljustify2("movz".to_string(), extMode.to_string()),
+                        addr.show_rru(mb_rru),
+                        show_ireg_sized(dst.to_reg(), mb_rru, extMode.dst_size())
+                    )
+                }
+            }
+            Inst::Mov64_M_R { addr, dst } => format!(
+                "{} {}, {}",
+                ljustify("movq".to_string()),
+                addr.show_rru(mb_rru),
+                dst.show_rru(mb_rru)
+            ),
+            Inst::MovSX_M_R { extMode, addr, dst } => format!(
+                "{} {}, {}",
+                ljustify2("movs".to_string(), extMode.to_string()),
+                addr.show_rru(mb_rru),
+                show_ireg_sized(dst.to_reg(), mb_rru, extMode.dst_size())
+            ),
+            Inst::Mov_R_M { size, src, addr } => format!(
+                "{} {}, {}",
+                ljustify2("mov".to_string(), suffixBWLQ(*size)),
+                show_ireg_sized(*src, mb_rru, *size),
+                addr.show_rru(mb_rru)
+            ),
+            Inst::Shift_R {
+                is_64,
+                kind,
+                num_bits,
+                dst,
+            } => match num_bits {
+                None => format!(
+                    "{} %cl, {}",
+                    ljustify2(kind.to_string(), suffixLQ(*is_64)),
+                    show_ireg_sized(dst.to_reg(), mb_rru, sizeLQ(*is_64))
+                ),
+
+                Some(num_bits) => format!(
+                    "{} ${}, {}",
+                    ljustify2(kind.to_string(), suffixLQ(*is_64)),
+                    num_bits,
+                    show_ireg_sized(dst.to_reg(), mb_rru, sizeLQ(*is_64))
+                ),
+            },
+            Inst::Cmp_RMI_R { size, src, dst } => format!(
+                "{} {}, {}",
+                ljustify2("cmp".to_string(), suffixBWLQ(*size)),
+                src.show_rru_sized(mb_rru, *size),
+                show_ireg_sized(*dst, mb_rru, *size)
+            ),
+            Inst::Push64 { src } => {
+                format!("{} {}", ljustify("pushq".to_string()), src.show_rru(mb_rru))
+            }
+            Inst::Pop64 { dst } => {
+                format!("{} {}", ljustify("popq".to_string()), dst.show_rru(mb_rru))
+            }
+            //Inst::CallKnown { target } => format!("{} {:?}", ljustify("call".to_string()), target),
+            Inst::CallKnown { .. } => "**CallKnown**".to_string(),
+            Inst::CallUnknown { dest } => format!(
+                "{} *{}",
+                ljustify("call".to_string()),
+                dest.show_rru(mb_rru)
+            ),
+            Inst::Ret => "ret".to_string(),
+            Inst::EpiloguePlaceholder => "epilogue placeholder".to_string(),
+            Inst::JmpKnown { dest } => {
+                format!("{} {}", ljustify("jmp".to_string()), dest.show_rru(mb_rru))
+            }
+            Inst::JmpCondSymm {
+                cc,
+                taken,
+                not_taken,
+            } => format!(
+                "{} taken={} not_taken={}",
+                ljustify2("j".to_string(), cc.to_string()),
+                taken.show_rru(mb_rru),
+                not_taken.show_rru(mb_rru)
+            ),
+            //
+            Inst::JmpCond { cc, ref target } => format!(
+                "{} {}",
+                ljustify2("j".to_string(), cc.to_string()),
+                target.show_rru(None)
+            ),
+            //
+            Inst::JmpCondCompound { .. } => "**JmpCondCompound**".to_string(),
+            Inst::JmpUnknown { target } => format!(
+                "{} *{}",
+                ljustify("jmp".to_string()),
+                target.show_rru(mb_rru)
+            ),
+        }
+    }
+}
+
+// Temp hook for legacy printing machinery
+impl fmt::Debug for Inst {
+    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+        // Print the insn without a Universe :-(
+        write!(fmt, "{}", self.show_rru(None))
+    }
+}
+
+fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
+    // This is a bit subtle. If some register is in the modified set, then it may not be in either
+    // the use or def sets. However, enforcing that directly is somewhat difficult. Instead,
+    // regalloc.rs will "fix" this for us by removing the the modified set from the use and def
+    // sets.
+    match inst {
+        // ** Nop
+        Inst::Alu_RMI_R {
+            is_64: _,
+            op: _,
+            src,
+            dst,
+        } => {
+            src.get_regs_as_uses(collector);
+            collector.add_mod(*dst);
+        }
+        Inst::Imm_R {
+            dst_is_64: _,
+            simm64: _,
+            dst,
+        } => {
+            collector.add_def(*dst);
+        }
+        Inst::Mov_R_R { is_64: _, src, dst } => {
+            collector.add_use(*src);
+            collector.add_def(*dst);
+        }
+        Inst::MovZX_M_R {
+            extMode: _,
+            addr,
+            dst,
+        } => {
+            addr.get_regs_as_uses(collector);
+            collector.add_def(*dst);
+        }
+        Inst::Mov64_M_R { addr, dst } => {
+            addr.get_regs_as_uses(collector);
+            collector.add_def(*dst);
+        }
+        Inst::MovSX_M_R {
+            extMode: _,
+            addr,
+            dst,
+        } => {
+            addr.get_regs_as_uses(collector);
+            collector.add_def(*dst);
+        }
+        Inst::Mov_R_M { size: _, src, addr } => {
+            collector.add_use(*src);
+            addr.get_regs_as_uses(collector);
+        }
+        Inst::Shift_R {
+            is_64: _,
+            kind: _,
+            num_bits,
+            dst,
+        } => {
+            if num_bits.is_none() {
+                collector.add_use(regs::rcx());
+            }
+            collector.add_mod(*dst);
+        }
+        Inst::Cmp_RMI_R { size: _, src, dst } => {
+            src.get_regs_as_uses(collector);
+            collector.add_use(*dst); // yes, really `add_use`
+        }
+        Inst::Push64 { src } => {
+            src.get_regs_as_uses(collector);
+            collector.add_mod(Writable::from_reg(regs::rsp()));
+        }
+        Inst::Pop64 { dst } => {
+            collector.add_def(*dst);
+        }
+        Inst::CallKnown {
+            dest: _,
+            uses: _,
+            defs: _,
+        } => {
+            // FIXME add arg regs (iru.used) and caller-saved regs (iru.defined)
+            unimplemented!();
+        }
+        Inst::CallUnknown { dest } => {
+            dest.get_regs_as_uses(collector);
+        }
+        Inst::Ret => {}
+        Inst::EpiloguePlaceholder => {}
+        Inst::JmpKnown { dest: _ } => {}
+        Inst::JmpCondSymm {
+            cc: _,
+            taken: _,
+            not_taken: _,
+        } => {}
+        //
+        // ** JmpCond
+        //
+        // ** JmpCondCompound
+        //
+        //Inst::JmpUnknown { target } => {
+        //    target.get_regs_as_uses(collector);
+        //}
+        Inst::Nop { .. }
+        | Inst::JmpCond { .. }
+        | Inst::JmpCondCompound { .. }
+        | Inst::JmpUnknown { .. } => unimplemented!("x64_get_regs inst"),
+    }
+}
+
+//=============================================================================
+// Instructions and subcomponents: map_regs
+
+fn map_use(m: &RegUsageMapper, r: &mut Reg) {
+    if r.is_virtual() {
+        let new = m.get_use(r.to_virtual_reg()).unwrap().to_reg();
+        *r = new;
+    }
+}
+
+fn map_def(m: &RegUsageMapper, r: &mut Writable<Reg>) {
+    if r.to_reg().is_virtual() {
+        let new = m.get_def(r.to_reg().to_virtual_reg()).unwrap().to_reg();
+        *r = Writable::from_reg(new);
+    }
+}
+
+fn map_mod(m: &RegUsageMapper, r: &mut Writable<Reg>) {
+    if r.to_reg().is_virtual() {
+        let new = m.get_mod(r.to_reg().to_virtual_reg()).unwrap().to_reg();
+        *r = Writable::from_reg(new);
+    }
+}
+
+impl Addr {
+    fn map_uses(&mut self, map: &RegUsageMapper) {
+        match self {
+            Addr::IR {
+                simm32: _,
+                ref mut base,
+            } => map_use(map, base),
+            Addr::IRRS {
+                simm32: _,
+                ref mut base,
+                ref mut index,
+                shift: _,
+            } => {
+                map_use(map, base);
+                map_use(map, index);
+            }
+        }
+    }
+}
+
+impl RMI {
+    fn map_uses(&mut self, map: &RegUsageMapper) {
+        match self {
+            RMI::R { ref mut reg } => map_use(map, reg),
+            RMI::M { ref mut addr } => addr.map_uses(map),
+            RMI::I { simm32: _ } => {}
+        }
+    }
+}
+
+impl RM {
+    fn map_uses(&mut self, map: &RegUsageMapper) {
+        match self {
+            RM::R { ref mut reg } => map_use(map, reg),
+            RM::M { ref mut addr } => addr.map_uses(map),
+        }
+    }
+}
+
+fn x64_map_regs(inst: &mut Inst, mapper: &RegUsageMapper) {
+    // Note this must be carefully synchronized with x64_get_regs.
+    match inst {
+        // ** Nop
+        Inst::Alu_RMI_R {
+            is_64: _,
+            op: _,
+            ref mut src,
+            ref mut dst,
+        } => {
+            src.map_uses(mapper);
+            map_mod(mapper, dst);
+        }
+        Inst::Imm_R {
+            dst_is_64: _,
+            simm64: _,
+            ref mut dst,
+        } => map_def(mapper, dst),
+        Inst::Mov_R_R {
+            is_64: _,
+            ref mut src,
+            ref mut dst,
+        } => {
+            map_use(mapper, src);
+            map_def(mapper, dst);
+        }
+        Inst::MovZX_M_R {
+            extMode: _,
+            ref mut addr,
+            ref mut dst,
+        } => {
+            addr.map_uses(mapper);
+            map_def(mapper, dst);
+        }
+        Inst::Mov64_M_R { addr, dst } => {
+            addr.map_uses(mapper);
+            map_def(mapper, dst);
+        }
+        Inst::MovSX_M_R {
+            extMode: _,
+            ref mut addr,
+            ref mut dst,
+        } => {
+            addr.map_uses(mapper);
+            map_def(mapper, dst);
+        }
+        Inst::Mov_R_M {
+            size: _,
+            ref mut src,
+            ref mut addr,
+        } => {
+            map_use(mapper, src);
+            addr.map_uses(mapper);
+        }
+        Inst::Shift_R {
+            is_64: _,
+            kind: _,
+            num_bits: _,
+            ref mut dst,
+        } => {
+            map_mod(mapper, dst);
+        }
+        Inst::Cmp_RMI_R {
+            size: _,
+            ref mut src,
+            ref mut dst,
+        } => {
+            src.map_uses(mapper);
+            map_use(mapper, dst);
+        }
+        Inst::Push64 { ref mut src } => src.map_uses(mapper),
+        Inst::Pop64 { ref mut dst } => {
+            map_def(mapper, dst);
+        }
+        Inst::CallKnown {
+            dest: _,
+            uses: _,
+            defs: _,
+        } => {}
+        Inst::CallUnknown { dest } => dest.map_uses(mapper),
+        Inst::Ret => {}
+        Inst::EpiloguePlaceholder => {}
+        Inst::JmpKnown { dest: _ } => {}
+        Inst::JmpCondSymm {
+            cc: _,
+            taken: _,
+            not_taken: _,
+        } => {}
+        //
+        // ** JmpCond
+        //
+        // ** JmpCondCompound
+        //
+        //Inst::JmpUnknown { target } => {
+        //    target.apply_map(mapper);
+        //}
+        Inst::Nop { .. }
+        | Inst::JmpCond { .. }
+        | Inst::JmpCondCompound { .. }
+        | Inst::JmpUnknown { .. } => unimplemented!("x64_map_regs opcode"),
+    }
+}
+
+//=============================================================================
+// Instructions: misc functions and external interface
+
+impl MachInst for Inst {
+    fn get_regs(&self, collector: &mut RegUsageCollector) {
+        x64_get_regs(&self, collector)
+    }
+
+    fn map_regs(&mut self, mapper: &RegUsageMapper) {
+        x64_map_regs(self, mapper);
+    }
+
+    fn is_move(&self) -> Option<(Writable<Reg>, Reg)> {
+        // Note (carefully!) that a 32-bit mov *isn't* a no-op since it zeroes
+        // out the upper 32 bits of the destination.  For example, we could
+        // conceivably use `movl %reg, %reg` to zero out the top 32 bits of
+        // %reg.
+        match self {
+            Self::Mov_R_R { is_64, src, dst } if *is_64 => Some((*dst, *src)),
+            _ => None,
+        }
+    }
+
+    fn is_epilogue_placeholder(&self) -> bool {
+        if let Self::EpiloguePlaceholder = self {
+            true
+        } else {
+            false
+        }
+    }
+
+    fn is_term<'a>(&'a self) -> MachTerminator<'a> {
+        match self {
+            // Interesting cases.
+            &Self::Ret | &Self::EpiloguePlaceholder => MachTerminator::Ret,
+            &Self::JmpKnown { dest } => MachTerminator::Uncond(dest.as_block_index().unwrap()),
+            &Self::JmpCondSymm {
+                cc: _,
+                taken,
+                not_taken,
+            } => MachTerminator::Cond(
+                taken.as_block_index().unwrap(),
+                not_taken.as_block_index().unwrap(),
+            ),
+            &Self::JmpCond { .. } | &Self::JmpCondCompound { .. } => {
+                panic!("is_term() called after lowering branches");
+            }
+            // All other cases are boring.
+            _ => MachTerminator::None,
+        }
+    }
+
+    fn gen_move(dst_reg: Writable<Reg>, src_reg: Reg, _ty: Type) -> Inst {
+        let rc_dst = dst_reg.to_reg().get_class();
+        let rc_src = src_reg.get_class();
+        // If this isn't true, we have gone way off the rails.
+        debug_assert!(rc_dst == rc_src);
+        match rc_dst {
+            RegClass::I64 => Inst::mov_r_r(true, src_reg, dst_reg),
+            _ => panic!("gen_move(x64): unhandled regclass"),
+        }
+    }
+
+    fn gen_zero_len_nop() -> Inst {
+        unimplemented!()
+    }
+
+    fn gen_nop(_preferred_size: usize) -> Inst {
+        unimplemented!()
+    }
+
+    fn maybe_direct_reload(&self, _reg: VirtualReg, _slot: SpillSlot) -> Option<Inst> {
+        None
+    }
+
+    fn rc_for_type(ty: Type) -> CodegenResult<RegClass> {
+        match ty {
+            I8 | I16 | I32 | I64 | B1 | B8 | B16 | B32 | B64 => Ok(RegClass::I64),
+            F32 | F64 | I128 | B128 => Ok(RegClass::V128),
+            _ => Err(CodegenError::Unsupported(format!(
+                "Unexpected SSA-value type: {}",
+                ty
+            ))),
+        }
+    }
+
+    fn gen_jump(blockindex: BlockIndex) -> Inst {
+        Inst::jmp_known(BranchTarget::Block(blockindex))
+    }
+
+    fn with_block_rewrites(&mut self, block_target_map: &[BlockIndex]) {
+        // This is identical (modulo renaming) to the arm64 version.
+        match self {
+            &mut Inst::JmpKnown { ref mut dest } => {
+                dest.map(block_target_map);
+            }
+            &mut Inst::JmpCondSymm {
+                cc: _,
+                ref mut taken,
+                ref mut not_taken,
+            } => {
+                taken.map(block_target_map);
+                not_taken.map(block_target_map);
+            }
+            &mut Inst::JmpCond { .. } | &mut Inst::JmpCondCompound { .. } => {
+                panic!("with_block_rewrites called after branch lowering!");
+            }
+            _ => {}
+        }
+    }
+
+    fn with_fallthrough_block(&mut self, fallthrough: Option<BlockIndex>) {
+        // This is identical (modulo renaming) to the arm64 version.
+        match self {
+            &mut Inst::JmpCondSymm {
+                cc,
+                taken,
+                not_taken,
+            } => {
+                if taken.as_block_index() == fallthrough {
+                    *self = Inst::jmp_cond(cc.invert(), not_taken);
+                } else if not_taken.as_block_index() == fallthrough {
+                    *self = Inst::jmp_cond(cc, taken);
+                } else {
+                    // We need a compound sequence (condbr / uncond-br).
+                    *self = Inst::jmp_cond_compound(cc, taken, not_taken);
+                }
+            }
+            &mut Inst::JmpKnown { dest } => {
+                if dest.as_block_index() == fallthrough {
+                    *self = Inst::nop(0);
+                }
+            }
+            _ => {}
+        }
+    }
+
+    fn with_block_offsets(&mut self, my_offset: CodeOffset, targets: &[CodeOffset]) {
+        // This is identical (modulo renaming) to the arm64 version.
+        match self {
+            &mut Self::JmpCond {
+                cc: _,
+                ref mut target,
+            } => {
+                target.lower(targets, my_offset);
+            }
+            &mut Self::JmpCondCompound {
+                cc: _,
+                ref mut taken,
+                ref mut not_taken,
+                ..
+            } => {
+                taken.lower(targets, my_offset);
+                not_taken.lower(targets, my_offset);
+            }
+            &mut Self::JmpKnown { ref mut dest } => {
+                dest.lower(targets, my_offset);
+            }
+            _ => {}
+        }
+    }
+
+    fn reg_universe(flags: &settings::Flags) -> RealRegUniverse {
+        create_reg_universe_systemv(flags)
+    }
+}
+
+impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
+    fn emit(&self, sink: &mut O, _flags: &settings::Flags) {
+        emit::emit(self, sink);
+    }
+}
--- a/cranelift/codegen/src/isa/x64/inst/regs.rs
+++ b/cranelift/codegen/src/isa/x64/inst/regs.rs
@@ -0,0 +1,261 @@
+//! Registers, the Universe thereof, and printing.
+//!
+//! These are ordered by sequence number, as required in the Universe.  The strange ordering is
+//! intended to make callee-save registers available before caller-saved ones.  This is a net win
+//! provided that each function makes at least one onward call.  It'll be a net loss for leaf
+//! functions, and we should change the ordering in that case, so as to make caller-save regs
+//! available first.
+//!
+//! TODO Maybe have two different universes, one for leaf functions and one for non-leaf functions?
+//! Also, they will have to be ABI dependent.  Need to find a way to avoid constructing a universe
+//! for each function we compile.
+
+use alloc::vec::Vec;
+use std::string::String;
+
+use regalloc::{RealReg, RealRegUniverse, Reg, RegClass, RegClassInfo, NUM_REG_CLASSES};
+
+use crate::machinst::pretty_print::ShowWithRRU;
+use crate::settings;
+
+// Hardware encodings for a few registers.
+
+pub const ENC_RBX: u8 = 3;
+pub const ENC_RSP: u8 = 4;
+pub const ENC_RBP: u8 = 5;
+pub const ENC_R12: u8 = 12;
+pub const ENC_R13: u8 = 13;
+pub const ENC_R14: u8 = 14;
+pub const ENC_R15: u8 = 15;
+
+fn gpr(enc: u8, index: u8) -> Reg {
+    Reg::new_real(RegClass::I64, enc, index)
+}
+
+pub(crate) fn r12() -> Reg {
+    gpr(ENC_R12, 0)
+}
+pub(crate) fn r13() -> Reg {
+    gpr(ENC_R13, 1)
+}
+pub(crate) fn r14() -> Reg {
+    gpr(ENC_R14, 2)
+}
+pub(crate) fn r15() -> Reg {
+    gpr(ENC_R15, 3)
+}
+pub(crate) fn rbx() -> Reg {
+    gpr(ENC_RBX, 4)
+}
+pub(crate) fn rsi() -> Reg {
+    gpr(6, 5)
+}
+pub(crate) fn rdi() -> Reg {
+    gpr(7, 6)
+}
+pub(crate) fn rax() -> Reg {
+    gpr(0, 7)
+}
+pub(crate) fn rcx() -> Reg {
+    gpr(1, 8)
+}
+pub(crate) fn rdx() -> Reg {
+    gpr(2, 9)
+}
+pub(crate) fn r8() -> Reg {
+    gpr(8, 10)
+}
+pub(crate) fn r9() -> Reg {
+    gpr(9, 11)
+}
+pub(crate) fn r10() -> Reg {
+    gpr(10, 12)
+}
+pub(crate) fn r11() -> Reg {
+    gpr(11, 13)
+}
+
+fn fpr(enc: u8, index: u8) -> Reg {
+    Reg::new_real(RegClass::V128, enc, index)
+}
+fn xmm0() -> Reg {
+    fpr(0, 14)
+}
+fn xmm1() -> Reg {
+    fpr(1, 15)
+}
+fn xmm2() -> Reg {
+    fpr(2, 16)
+}
+fn xmm3() -> Reg {
+    fpr(3, 17)
+}
+fn xmm4() -> Reg {
+    fpr(4, 18)
+}
+fn xmm5() -> Reg {
+    fpr(5, 19)
+}
+fn xmm6() -> Reg {
+    fpr(6, 20)
+}
+fn xmm7() -> Reg {
+    fpr(7, 21)
+}
+fn xmm8() -> Reg {
+    fpr(8, 22)
+}
+fn xmm9() -> Reg {
+    fpr(9, 23)
+}
+fn xmm10() -> Reg {
+    fpr(10, 24)
+}
+fn xmm11() -> Reg {
+    fpr(11, 25)
+}
+fn xmm12() -> Reg {
+    fpr(12, 26)
+}
+fn xmm13() -> Reg {
+    fpr(13, 27)
+}
+fn xmm14() -> Reg {
+    fpr(14, 28)
+}
+fn xmm15() -> Reg {
+    fpr(15, 29)
+}
+
+pub(crate) fn rsp() -> Reg {
+    gpr(ENC_RSP, 30)
+}
+pub(crate) fn rbp() -> Reg {
+    gpr(ENC_RBP, 31)
+}
+
+/// Create the register universe for X64.
+///
+/// The ordering of registers matters, as commented in the file doc comment: assumes the
+/// calling-convention is SystemV, at the moment.
+pub(crate) fn create_reg_universe_systemv(_flags: &settings::Flags) -> RealRegUniverse {
+    let mut regs = Vec::<(RealReg, String)>::new();
+    let mut allocable_by_class = [None; NUM_REG_CLASSES];
+
+    // Integer regs.
+    let mut base = regs.len();
+
+    // Callee-saved, in the SystemV x86_64 ABI.
+    regs.push((r12().to_real_reg(), "%r12".into()));
+    regs.push((r13().to_real_reg(), "%r13".into()));
+    regs.push((r14().to_real_reg(), "%r14".into()));
+    regs.push((r15().to_real_reg(), "%r15".into()));
+    regs.push((rbx().to_real_reg(), "%rbx".into()));
+
+    // Caller-saved, in the SystemV x86_64 ABI.
+    regs.push((rsi().to_real_reg(), "%rsi".into()));
+    regs.push((rdi().to_real_reg(), "%rdi".into()));
+    regs.push((rax().to_real_reg(), "%rax".into()));
+    regs.push((rcx().to_real_reg(), "%rcx".into()));
+    regs.push((rdx().to_real_reg(), "%rdx".into()));
+    regs.push((r8().to_real_reg(), "%r8".into()));
+    regs.push((r9().to_real_reg(), "%r9".into()));
+    regs.push((r10().to_real_reg(), "%r10".into()));
+    regs.push((r11().to_real_reg(), "%r11".into()));
+
+    allocable_by_class[RegClass::I64.rc_to_usize()] = Some(RegClassInfo {
+        first: base,
+        last: regs.len() - 1,
+        suggested_scratch: Some(r12().get_index()),
+    });
+
+    // XMM registers
+    base = regs.len();
+    regs.push((xmm0().to_real_reg(), "%xmm0".into()));
+    regs.push((xmm1().to_real_reg(), "%xmm1".into()));
+    regs.push((xmm2().to_real_reg(), "%xmm2".into()));
+    regs.push((xmm3().to_real_reg(), "%xmm3".into()));
+    regs.push((xmm4().to_real_reg(), "%xmm4".into()));
+    regs.push((xmm5().to_real_reg(), "%xmm5".into()));
+    regs.push((xmm6().to_real_reg(), "%xmm6".into()));
+    regs.push((xmm7().to_real_reg(), "%xmm7".into()));
+    regs.push((xmm8().to_real_reg(), "%xmm8".into()));
+    regs.push((xmm9().to_real_reg(), "%xmm9".into()));
+    regs.push((xmm10().to_real_reg(), "%xmm10".into()));
+    regs.push((xmm11().to_real_reg(), "%xmm11".into()));
+    regs.push((xmm12().to_real_reg(), "%xmm12".into()));
+    regs.push((xmm13().to_real_reg(), "%xmm13".into()));
+    regs.push((xmm14().to_real_reg(), "%xmm14".into()));
+    regs.push((xmm15().to_real_reg(), "%xmm15".into()));
+
+    allocable_by_class[RegClass::V128.rc_to_usize()] = Some(RegClassInfo {
+        first: base,
+        last: regs.len() - 1,
+        suggested_scratch: Some(xmm15().get_index()),
+    });
+
+    // Other regs, not available to the allocator.
+    let allocable = regs.len();
+    regs.push((rsp().to_real_reg(), "%rsp".into()));
+    regs.push((rbp().to_real_reg(), "%rbp".into()));
+
+    RealRegUniverse {
+        regs,
+        allocable,
+        allocable_by_class,
+    }
+}
+
+/// If `ireg` denotes an I64-classed reg, make a best-effort attempt to show its name at some
+/// smaller size (4, 2 or 1 bytes).
+pub fn show_ireg_sized(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: u8) -> String {
+    let mut s = reg.show_rru(mb_rru);
+
+    if reg.get_class() != RegClass::I64 || size == 8 {
+        // We can't do any better.
+        return s;
+    }
+
+    if reg.is_real() {
+        // Change (eg) "rax" into "eax", "ax" or "al" as appropriate.  This is something one could
+        // describe diplomatically as "a kludge", but it's only debug code.
+        let remapper = match s.as_str() {
+            "%rax" => Some(["%eax", "%ax", "%al"]),
+            "%rbx" => Some(["%ebx", "%bx", "%bl"]),
+            "%rcx" => Some(["%ecx", "%cx", "%cl"]),
+            "%rdx" => Some(["%edx", "%dx", "%dl"]),
+            "%rsi" => Some(["%esi", "%si", "%sil"]),
+            "%rdi" => Some(["%edi", "%di", "%dil"]),
+            "%rbp" => Some(["%ebp", "%bp", "%bpl"]),
+            "%rsp" => Some(["%esp", "%sp", "%spl"]),
+            "%r8" => Some(["%r8d", "%r8w", "%r8b"]),
+            "%r9" => Some(["%r9d", "%r9w", "%r9b"]),
+            "%r10" => Some(["%r10d", "%r10w", "%r10b"]),
+            "%r11" => Some(["%r11d", "%r11w", "%r11b"]),
+            "%r12" => Some(["%r12d", "%r12w", "%r12b"]),
+            "%r13" => Some(["%r13d", "%r13w", "%r13b"]),
+            "%r14" => Some(["%r14d", "%r14w", "%r14b"]),
+            "%r15" => Some(["%r15d", "%r15w", "%r15b"]),
+            _ => None,
+        };
+        if let Some(smaller_names) = remapper {
+            match size {
+                4 => s = smaller_names[0].into(),
+                2 => s = smaller_names[1].into(),
+                1 => s = smaller_names[2].into(),
+                _ => panic!("show_ireg_sized: real"),
+            }
+        }
+    } else {
+        // Add a "l", "w" or "b" suffix to RegClass::I64 vregs used at narrower widths.
+        let suffix = match size {
+            4 => "l",
+            2 => "w",
+            1 => "b",
+            _ => panic!("show_ireg_sized: virtual"),
+        };
+        s = s + suffix;
+    }
+
+    s
+}
--- a/cranelift/codegen/src/isa/x64/lower.rs
+++ b/cranelift/codegen/src/isa/x64/lower.rs
@@ -0,0 +1,358 @@
+//! Lowering rules for X64.
+
+#![allow(dead_code)]
+#![allow(non_snake_case)]
+
+use regalloc::{Reg, Writable};
+
+use crate::ir::condcodes::IntCC;
+use crate::ir::types;
+use crate::ir::Inst as IRInst;
+use crate::ir::{InstructionData, Opcode, Type};
+
+use crate::machinst::lower::*;
+use crate::machinst::*;
+
+use crate::isa::x64::inst::args::*;
+use crate::isa::x64::inst::*;
+use crate::isa::x64::X64Backend;
+
+/// Context passed to all lowering functions.
+type Ctx<'a> = &'a mut dyn LowerCtx<I = Inst>;
+
+//=============================================================================
+// Helpers for instruction lowering.
+
+fn is_int_ty(ty: Type) -> bool {
+    match ty {
+        types::I8 | types::I16 | types::I32 | types::I64 => true,
+        _ => false,
+    }
+}
+
+fn int_ty_to_is64(ty: Type) -> bool {
+    match ty {
+        types::I8 | types::I16 | types::I32 => false,
+        types::I64 => true,
+        _ => panic!("type {} is none of I8, I16, I32 or I64", ty),
+    }
+}
+
+fn int_ty_to_sizeB(ty: Type) -> u8 {
+    match ty {
+        types::I8 => 1,
+        types::I16 => 2,
+        types::I32 => 4,
+        types::I64 => 8,
+        _ => panic!("ity_to_sizeB"),
+    }
+}
+
+fn iri_to_u64_immediate<'a>(ctx: Ctx<'a>, iri: IRInst) -> Option<u64> {
+    let inst_data = ctx.data(iri);
+    if inst_data.opcode() == Opcode::Null {
+        Some(0)
+    } else {
+        match inst_data {
+            &InstructionData::UnaryImm { opcode: _, imm } => {
+                // Only has Into for i64; we use u64 elsewhere, so we cast.
+                let imm: i64 = imm.into();
+                Some(imm as u64)
+            }
+            _ => None,
+        }
+    }
+}
+
+fn inst_condcode(data: &InstructionData) -> IntCC {
+    match data {
+        &InstructionData::IntCond { cond, .. }
+        | &InstructionData::BranchIcmp { cond, .. }
+        | &InstructionData::IntCompare { cond, .. }
+        | &InstructionData::IntCondTrap { cond, .. }
+        | &InstructionData::BranchInt { cond, .. }
+        | &InstructionData::IntSelect { cond, .. }
+        | &InstructionData::IntCompareImm { cond, .. } => cond,
+        _ => panic!("inst_condcode(x64): unhandled: {:?}", data),
+    }
+}
+
+fn intCC_to_x64_CC(cc: IntCC) -> CC {
+    match cc {
+        IntCC::Equal => CC::Z,
+        IntCC::NotEqual => CC::NZ,
+        IntCC::SignedGreaterThanOrEqual => CC::NL,
+        IntCC::SignedGreaterThan => CC::NLE,
+        IntCC::SignedLessThanOrEqual => CC::LE,
+        IntCC::SignedLessThan => CC::L,
+        IntCC::UnsignedGreaterThanOrEqual => CC::NB,
+        IntCC::UnsignedGreaterThan => CC::NBE,
+        IntCC::UnsignedLessThanOrEqual => CC::BE,
+        IntCC::UnsignedLessThan => CC::B,
+        IntCC::Overflow => CC::O,
+        IntCC::NotOverflow => CC::NO,
+    }
+}
+
+//=============================================================================
+// Top-level instruction lowering entry point, for one instruction.
+
+/// Actually codegen an instruction's results into registers.
+fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, iri: IRInst) {
+    let op = ctx.data(iri).opcode();
+    let ty = if ctx.num_outputs(iri) == 1 {
+        Some(ctx.output_ty(iri, 0))
+    } else {
+        None
+    };
+
+    // This is all outstandingly feeble.  TODO: much better!
+
+    match op {
+        Opcode::Iconst => {
+            if let Some(w64) = iri_to_u64_immediate(ctx, iri) {
+                // Get exactly the bit pattern in 'w64' into the dest.  No
+                // monkeying with sign extension etc.
+                let dstIs64 = w64 > 0xFFFF_FFFF;
+                let regD = ctx.output(iri, 0);
+                ctx.emit(Inst::imm_r(dstIs64, w64, regD));
+            } else {
+                unimplemented!();
+            }
+        }
+
+        Opcode::Iadd | Opcode::Isub => {
+            let regD = ctx.output(iri, 0);
+            let regL = ctx.input(iri, 0);
+            let regR = ctx.input(iri, 1);
+            let is64 = int_ty_to_is64(ty.unwrap());
+            let how = if op == Opcode::Iadd {
+                RMI_R_Op::Add
+            } else {
+                RMI_R_Op::Sub
+            };
+            ctx.emit(Inst::mov_r_r(true, regL, regD));
+            ctx.emit(Inst::alu_rmi_r(is64, how, RMI::reg(regR), regD));
+        }
+
+        Opcode::Ishl | Opcode::Ushr | Opcode::Sshr => {
+            // TODO: implement imm shift value into insn
+            let tySL = ctx.input_ty(iri, 0);
+            let tyD = ctx.output_ty(iri, 0); // should be the same as tySL
+            let regSL = ctx.input(iri, 0);
+            let regSR = ctx.input(iri, 1);
+            let regD = ctx.output(iri, 0);
+            if tyD == tySL && (tyD == types::I32 || tyD == types::I64) {
+                let how = match op {
+                    Opcode::Ishl => ShiftKind::Left,
+                    Opcode::Ushr => ShiftKind::RightZ,
+                    Opcode::Sshr => ShiftKind::RightS,
+                    _ => unreachable!(),
+                };
+                let is64 = tyD == types::I64;
+                let r_rcx = regs::rcx();
+                let w_rcx = Writable::<Reg>::from_reg(r_rcx);
+                ctx.emit(Inst::mov_r_r(true, regSL, regD));
+                ctx.emit(Inst::mov_r_r(true, regSR, w_rcx));
+                ctx.emit(Inst::shift_r(is64, how, None /*%cl*/, regD));
+            } else {
+                unimplemented!()
+            }
+        }
+
+        Opcode::Uextend | Opcode::Sextend => {
+            // TODO: this is all extremely lame, all because Mov{ZX,SX}_M_R
+            // don't accept a register source operand.  They should be changed
+            // so as to have _RM_R form.
+            // TODO2: if the source operand is a load, incorporate that.
+            let isZX = op == Opcode::Uextend;
+            let tyS = ctx.input_ty(iri, 0);
+            let tyD = ctx.output_ty(iri, 0);
+            let regS = ctx.input(iri, 0);
+            let regD = ctx.output(iri, 0);
+            ctx.emit(Inst::mov_r_r(true, regS, regD));
+            match (tyS, tyD, isZX) {
+                (types::I8, types::I64, false) => {
+                    ctx.emit(Inst::shift_r(true, ShiftKind::Left, Some(56), regD));
+                    ctx.emit(Inst::shift_r(true, ShiftKind::RightS, Some(56), regD));
+                }
+                _ => unimplemented!(),
+            }
+        }
+
+        Opcode::FallthroughReturn | Opcode::Return => {
+            for i in 0..ctx.num_inputs(iri) {
+                let src_reg = ctx.input(iri, i);
+                let retval_reg = ctx.retval(i);
+                ctx.emit(Inst::mov_r_r(true, src_reg, retval_reg));
+            }
+            // N.B.: the Ret itself is generated by the ABI.
+        }
+
+        Opcode::IaddImm
+        | Opcode::ImulImm
+        | Opcode::UdivImm
+        | Opcode::SdivImm
+        | Opcode::UremImm
+        | Opcode::SremImm
+        | Opcode::IrsubImm
+        | Opcode::IaddCin
+        | Opcode::IaddIfcin
+        | Opcode::IaddCout
+        | Opcode::IaddIfcout
+        | Opcode::IaddCarry
+        | Opcode::IaddIfcarry
+        | Opcode::IsubBin
+        | Opcode::IsubIfbin
+        | Opcode::IsubBout
+        | Opcode::IsubIfbout
+        | Opcode::IsubBorrow
+        | Opcode::IsubIfborrow
+        | Opcode::BandImm
+        | Opcode::BorImm
+        | Opcode::BxorImm
+        | Opcode::RotlImm
+        | Opcode::RotrImm
+        | Opcode::IshlImm
+        | Opcode::UshrImm
+        | Opcode::SshrImm => {
+            panic!("ALU+imm and ALU+carry ops should not appear here!");
+        }
+
+        Opcode::X86Udivmodx
+        | Opcode::X86Sdivmodx
+        | Opcode::X86Umulx
+        | Opcode::X86Smulx
+        | Opcode::X86Cvtt2si
+        | Opcode::X86Fmin
+        | Opcode::X86Fmax
+        | Opcode::X86Push
+        | Opcode::X86Pop
+        | Opcode::X86Bsr
+        | Opcode::X86Bsf
+        | Opcode::X86Pshufd
+        | Opcode::X86Pshufb
+        | Opcode::X86Pextr
+        | Opcode::X86Pinsr
+        | Opcode::X86Insertps
+        | Opcode::X86Movsd
+        | Opcode::X86Movlhps
+        | Opcode::X86Psll
+        | Opcode::X86Psrl
+        | Opcode::X86Psra
+        | Opcode::X86Ptest
+        | Opcode::X86Pmaxs
+        | Opcode::X86Pmaxu
+        | Opcode::X86Pmins
+        | Opcode::X86Pminu => {
+            panic!("x86-specific opcode in supposedly arch-neutral IR!");
+        }
+
+        _ => unimplemented!("unimplemented lowering for opcode {:?}", op),
+    }
+}
+
+//=============================================================================
+// Lowering-backend trait implementation.
+
+impl LowerBackend for X64Backend {
+    type MInst = Inst;
+
+    fn lower<C: LowerCtx<I = Inst>>(&self, ctx: &mut C, ir_inst: IRInst) {
+        lower_insn_to_regs(ctx, ir_inst);
+    }
+
+    fn lower_branch_group<C: LowerCtx<I = Inst>>(
+        &self,
+        ctx: &mut C,
+        branches: &[IRInst],
+        targets: &[BlockIndex],
+        fallthrough: Option<BlockIndex>,
+    ) {
+        // A block should end with at most two branches. The first may be a
+        // conditional branch; a conditional branch can be followed only by an
+        // unconditional branch or fallthrough. Otherwise, if only one branch,
+        // it may be an unconditional branch, a fallthrough, a return, or a
+        // trap. These conditions are verified by `is_ebb_basic()` during the
+        // verifier pass.
+        assert!(branches.len() <= 2);
+
+        let mut unimplemented = false;
+
+        if branches.len() == 2 {
+            // Must be a conditional branch followed by an unconditional branch.
+            let op0 = ctx.data(branches[0]).opcode();
+            let op1 = ctx.data(branches[1]).opcode();
+
+            println!(
+                "QQQQ lowering two-branch group: opcodes are {:?} and {:?}",
+                op0, op1
+            );
+
+            assert!(op1 == Opcode::Jump || op1 == Opcode::Fallthrough);
+            let taken = BranchTarget::Block(targets[0]);
+            let not_taken = match op1 {
+                Opcode::Jump => BranchTarget::Block(targets[1]),
+                Opcode::Fallthrough => BranchTarget::Block(fallthrough.unwrap()),
+                _ => unreachable!(), // assert above.
+            };
+            match op0 {
+                Opcode::Brz | Opcode::Brnz => {
+                    let tyS = ctx.input_ty(branches[0], 0);
+                    if is_int_ty(tyS) {
+                        let rS = ctx.input(branches[0], 0);
+                        let cc = match op0 {
+                            Opcode::Brz => CC::Z,
+                            Opcode::Brnz => CC::NZ,
+                            _ => unreachable!(),
+                        };
+                        let sizeB = int_ty_to_sizeB(tyS);
+                        ctx.emit(Inst::cmp_rmi_r(sizeB, RMI::imm(0), rS));
+                        ctx.emit(Inst::jmp_cond_symm(cc, taken, not_taken));
+                    } else {
+                        unimplemented = true;
+                    }
+                }
+                Opcode::BrIcmp => {
+                    let tyS = ctx.input_ty(branches[0], 0);
+                    if is_int_ty(tyS) {
+                        let rSL = ctx.input(branches[0], 0);
+                        let rSR = ctx.input(branches[0], 1);
+                        let cc = intCC_to_x64_CC(inst_condcode(ctx.data(branches[0])));
+                        let sizeB = int_ty_to_sizeB(tyS);
+                        // FIXME verify rSR vs rSL ordering
+                        ctx.emit(Inst::cmp_rmi_r(sizeB, RMI::reg(rSR), rSL));
+                        ctx.emit(Inst::jmp_cond_symm(cc, taken, not_taken));
+                    } else {
+                        unimplemented = true;
+                    }
+                }
+                // TODO: Brif/icmp, Brff/icmp, jump tables
+                _ => {
+                    unimplemented = true;
+                }
+            }
+        } else {
+            assert!(branches.len() == 1);
+
+            // Must be an unconditional branch or trap.
+            let op = ctx.data(branches[0]).opcode();
+            match op {
+                Opcode::Jump => {
+                    ctx.emit(Inst::jmp_known(BranchTarget::Block(targets[0])));
+                }
+                Opcode::Fallthrough => {
+                    ctx.emit(Inst::jmp_known(BranchTarget::Block(targets[0])));
+                }
+                Opcode::Trap => {
+                    unimplemented = true;
+                }
+                _ => panic!("Unknown branch type!"),
+            }
+        }
+
+        if unimplemented {
+            unimplemented!("lower_branch_group(x64): can't handle: {:?}", branches);
+        }
+    }
+}
--- a/cranelift/codegen/src/isa/x64/mod.rs
+++ b/cranelift/codegen/src/isa/x64/mod.rs
@@ -0,0 +1,92 @@
+//! X86_64-bit Instruction Set Architecture.
+
+use alloc::boxed::Box;
+
+use regalloc::RealRegUniverse;
+use target_lexicon::Triple;
+
+use crate::ir::Function;
+use crate::isa::Builder as IsaBuilder;
+use crate::machinst::pretty_print::ShowWithRRU;
+use crate::machinst::{compile, MachBackend, MachCompileResult, TargetIsaAdapter, VCode};
+use crate::result::CodegenResult;
+use crate::settings::{self, Flags};
+
+use crate::isa::x64::inst::regs::create_reg_universe_systemv;
+
+mod abi;
+mod inst;
+mod lower;
+
+/// An X64 backend.
+pub(crate) struct X64Backend {
+    triple: Triple,
+    flags: Flags,
+}
+
+impl X64Backend {
+    /// Create a new X64 backend with the given (shared) flags.
+    fn new_with_flags(triple: Triple, flags: Flags) -> Self {
+        Self { triple, flags }
+    }
+
+    fn compile_vcode(&self, func: &Function, flags: Flags) -> CodegenResult<VCode<inst::Inst>> {
+        // This performs lowering to VCode, register-allocates the code, computes
+        // block layout and finalizes branches. The result is ready for binary emission.
+        let abi = Box::new(abi::X64ABIBody::new(&func, flags));
+        compile::compile::<Self>(&func, self, abi)
+    }
+}
+
+impl MachBackend for X64Backend {
+    fn compile_function(
+        &self,
+        func: &Function,
+        want_disasm: bool,
+    ) -> CodegenResult<MachCompileResult> {
+        let flags = self.flags();
+        let vcode = self.compile_vcode(func, flags.clone())?;
+        let sections = vcode.emit();
+        let frame_size = vcode.frame_size();
+
+        let disasm = if want_disasm {
+            Some(vcode.show_rru(Some(&create_reg_universe_systemv(flags))))
+        } else {
+            None
+        };
+
+        Ok(MachCompileResult {
+            sections,
+            frame_size,
+            disasm,
+        })
+    }
+
+    fn flags(&self) -> &Flags {
+        &self.flags
+    }
+
+    fn name(&self) -> &'static str {
+        "x64"
+    }
+
+    fn triple(&self) -> Triple {
+        self.triple.clone()
+    }
+
+    fn reg_universe(&self) -> RealRegUniverse {
+        create_reg_universe_systemv(&self.flags)
+    }
+}
+
+/// Create a new `isa::Builder`.
+pub(crate) fn isa_builder(triple: Triple) -> IsaBuilder {
+    IsaBuilder {
+        triple,
+        setup: settings::builder(),
+        constructor: |triple: Triple, flags: Flags, _arch_flag_builder: settings::Builder| {
+            let backend = X64Backend::new_with_flags(triple, flags);
+            Box::new(TargetIsaAdapter::new(backend))
+        },
+    }
+}
--- a/cranelift/codegen/src/isa/x86/mod.rs
+++ b/cranelift/codegen/src/isa/x86/mod.rs
@@ -53,12 +53,23 @@ fn isa_constructor(
        PointerWidth::U32 => &enc_tables::LEVEL1_I32[..],
        PointerWidth::U64 => &enc_tables::LEVEL1_I64[..],
    };
+
+    let isa_flags = settings::Flags::new(&shared_flags, builder);
+
+    if isa_flags.use_new_backend() {
+        #[cfg(not(feature = "x64"))]
+        panic!("new backend x86 support not included by cargo features!");
+
+        #[cfg(feature = "x64")]
+        super::x64::isa_builder(triple).finish(shared_flags)
+    } else {
        Box::new(Isa {
            triple,
-        isa_flags: settings::Flags::new(&shared_flags, builder),
+            isa_flags,
            shared_flags,
            cpumode: level1,
        })
+    }
 }

 impl TargetIsa for Isa {