Update x64 backend to use new lowering APIs.

2020-05-17 16:22:35 -07:00
parent 72e6be9342
commit 687aca00fe
8 changed files with 205 additions and 303 deletions
--- a/cranelift/codegen/src/isa/mod.rs
+++ b/cranelift/codegen/src/isa/mod.rs
@@ -77,8 +77,8 @@ mod riscv;
 #[cfg(feature = "x86")]
 mod x86;
-//#[cfg(feature = "x64")]
+#[cfg(feature = "x64")]
-//mod x64;
+mod x64;
 #[cfg(feature = "arm32")]
 mod arm32;
--- a/cranelift/codegen/src/isa/x64/inst/args.rs
+++ b/cranelift/codegen/src/isa/x64/inst/args.rs
@@ -5,7 +5,6 @@ use std::string::{String, ToString};
 use regalloc::{RealRegUniverse, Reg, RegClass, RegUsageCollector};
 use crate::binemit::CodeOffset;
 use crate::machinst::*;
 use super::regs::show_ireg_sized;
@@ -375,43 +374,27 @@ impl fmt::Debug for CC {
 /// from end of current instruction).
 #[derive(Clone, Copy, Debug)]
 pub enum BranchTarget {
-    /// An unresolved reference to a BlockIndex, as passed into
+    /// An unresolved reference to a MachLabel.
-    /// `lower_branch_group()`.
+    Label(MachLabel),
    Block(BlockIndex),
-    /// A resolved reference to another instruction, after
+    /// A resolved reference to another instruction, in bytes.
-    /// `Inst::with_block_offsets()`.  This offset is in bytes.
+    ResolvedOffset(isize),
    ResolvedOffset(BlockIndex, isize),
 }
 impl ShowWithRRU for BranchTarget {
    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
        match self {
-            BranchTarget::Block(bix) => format!("(Block {})", bix),
+            BranchTarget::Label(l) => format!("{:?}", l),
-            BranchTarget::ResolvedOffset(bix, offs) => format!("(Block {}, offset {})", bix, offs),
+            BranchTarget::ResolvedOffset(offs) => format!("(offset {})", offs),
        }
    }
 }
 impl BranchTarget {
-    /// Lower the branch target given offsets of each block.
+    /// Get the label.
-    pub fn lower(&mut self, targets: &[CodeOffset], my_offset: CodeOffset) {
+    pub fn as_label(&self) -> Option<MachLabel> {
        match self {
-            &mut BranchTarget::Block(bix) => {
+            &BranchTarget::Label(l) => Some(l),
                let bix = bix as usize;
                assert!(bix < targets.len());
                let block_offset_in_func = targets[bix];
                let branch_offset = (block_offset_in_func as isize) - (my_offset as isize);
                *self = BranchTarget::ResolvedOffset(bix as BlockIndex, branch_offset);
            }
            &mut BranchTarget::ResolvedOffset(..) => {}
        }
    }
    /// Get the block index.
    pub fn as_block_index(&self) -> Option<BlockIndex> {
        match self {
            &BranchTarget::Block(bix) => Some(bix),
            _ => None,
        }
    }
@@ -421,31 +404,17 @@ impl BranchTarget {
    /// byte of the target.  It does not take into account the Intel-specific
    /// rule that a branch offset is encoded as relative to the start of the
    /// following instruction.  That is a problem for the emitter to deal
-    /// with.
+    /// with. If a label, returns zero.
-    pub fn as_offset_i32(&self) -> Option<i32> {
+    pub fn as_offset32_or_zero(&self) -> i32 {
        match self {
-            &BranchTarget::ResolvedOffset(_, off) => {
+            &BranchTarget::ResolvedOffset(off) => {
                // Leave a bit of slack so that the emitter is guaranteed to
                // be able to add the length of the jump instruction encoding
                // to this value and still have a value in signed-32 range.
-                if off >= -0x7FFF_FF00isize && off <= 0x7FFF_FF00isize {
+                assert!(off >= -0x7FFF_FF00 && off <= 0x7FFF_FF00);
-                    Some(off as i32)
+                off as i32
                } else {
                    None
            }
-            }
+            _ => 0,
            _ => None,
        }
    }
    /// Map the block index given a transform map.
    pub fn map(&mut self, block_index_map: &[BlockIndex]) {
        match self {
            &mut BranchTarget::Block(ref mut bix) => {
                let n = block_index_map[*bix as usize];
                *bix = n;
            }
            _ => panic!("BranchTarget::map() called on already-lowered BranchTarget!"),
        }
    }
 }
--- a/cranelift/codegen/src/isa/x64/inst/emit.rs
+++ b/cranelift/codegen/src/isa/x64/inst/emit.rs
@@ -80,8 +80,8 @@ const F_PREFIX_66: u32 = 4;
 /// deleted if it is redundant (0x40).  Note that for a 64-bit operation, the
 /// REX prefix will normally never be redundant, since REX.W must be 1 to
 /// indicate a 64-bit operation.
-fn emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE<O: MachSectionOutput>(
+fn emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE(
-    sink: &mut O,
+    sink: &mut MachBuffer<Inst>,
    opcodes: u32,
    mut numOpcodes: usize,
    encG: u8,
@@ -199,8 +199,8 @@ fn emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE<O: MachSectionOutput>(
 /// emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE, except it is for the case
 /// where the E operand is a register rather than memory.  Hence it is much
 /// simpler.
-fn emit_REX_OPCODES_MODRM_encG_encE<O: MachSectionOutput>(
+fn emit_REX_OPCODES_MODRM_encG_encE(
-    sink: &mut O,
+    sink: &mut MachBuffer<Inst>,
    opcodes: u32,
    mut numOpcodes: usize,
    encG: u8,
@@ -240,8 +240,8 @@ fn emit_REX_OPCODES_MODRM_encG_encE<O: MachSectionOutput>(
 // These are merely wrappers for the above two functions that facilitate passing
 // actual `Reg`s rather than their encodings.
-fn emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE<O: MachSectionOutput>(
+fn emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
-    sink: &mut O,
+    sink: &mut MachBuffer<Inst>,
    opcodes: u32,
    numOpcodes: usize,
    regG: Reg,
@@ -253,8 +253,8 @@ fn emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE<O: MachSectionOutput>(
    emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE(sink, opcodes, numOpcodes, encG, memE, flags);
 }
-fn emit_REX_OPCODES_MODRM_regG_regE<O: MachSectionOutput>(
+fn emit_REX_OPCODES_MODRM_regG_regE(
-    sink: &mut O,
+    sink: &mut MachBuffer<Inst>,
    opcodes: u32,
    numOpcodes: usize,
    regG: Reg,
@@ -268,7 +268,7 @@ fn emit_REX_OPCODES_MODRM_regG_regE<O: MachSectionOutput>(
 }
 /// Write a suitable number of bits from an imm64 to the sink.
-fn emit_simm<O: MachSectionOutput>(sink: &mut O, size: u8, simm32: u32) {
+fn emit_simm(sink: &mut MachBuffer<Inst>, size: u8, simm32: u32) {
    match size {
        8 | 4 => sink.put4(simm32),
        2 => sink.put2(simm32 as u16),
@@ -329,7 +329,7 @@ fn emit_simm<O: MachSectionOutput>(sink: &mut O, size: u8, simm32: u32) {
 ///
 /// * there's a shorter encoding for shl/shr/sar by a 1-bit immediate.  (Do we
 ///   care?)
-pub(crate) fn emit<O: MachSectionOutput>(inst: &Inst, sink: &mut O) {
+pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
    match inst {
        Inst::Nop { len: 0 } => {}
        Inst::Alu_RMI_R {
@@ -808,55 +808,59 @@ pub(crate) fn emit<O: MachSectionOutput>(inst: &Inst, sink: &mut O) {
        }
        Inst::Ret {} => sink.put1(0xC3),
-        Inst::JmpKnown {
+        Inst::JmpKnown { dest } => {
-            dest: BranchTarget::Block(..),
+            let disp = dest.as_offset32_or_zero() - 5;
-        } => {
+            let disp = disp as u32;
-            // Computation of block offsets/sizes.
+            let br_start = sink.cur_offset();
            sink.put1(0);
            sink.put4(0);
        }
        Inst::JmpKnown {
            dest: BranchTarget::ResolvedOffset(_bix, offset),
        } if *offset >= -0x7FFF_FF00 && *offset <= 0x7FFF_FF00 => {
            // And now for real
            let mut offs_i32 = *offset as i32;
            offs_i32 -= 5;
            let offs_u32 = offs_i32 as u32;
            sink.put1(0xE9);
-            sink.put4(offs_u32);
+            let br_disp_off = sink.cur_offset();
            sink.put4(disp);
            let br_end = sink.cur_offset();
            if let Some(l) = dest.as_label() {
                sink.use_label_at_offset(br_disp_off, l, LabelUse::Rel32);
                sink.add_uncond_branch(br_start, br_end, l);
            }
        //
        // ** Inst::JmpCondSymm   XXXX should never happen
        //
        Inst::JmpCond {
            cc: _,
            target: BranchTarget::Block(..),
        } => {
            // This case occurs when we are computing block offsets / sizes,
            // prior to lowering block-index targets to concrete-offset targets.
            // Only the size matters, so let's emit 6 bytes, as below.
            sink.put1(0);
            sink.put1(0);
            sink.put4(0);
        }
-        Inst::JmpCond {
+        Inst::JmpCondSymm {
            cc,
-            target: BranchTarget::ResolvedOffset(_bix, offset),
+            taken,
-        } if *offset >= -0x7FFF_FF00 && *offset <= 0x7FFF_FF00 => {
+            not_taken,
        } => {
            // Conditional part.
            // This insn is 6 bytes long.  Currently `offset` is relative to
            // the start of this insn, but the Intel encoding requires it to
            // be relative to the start of the next instruction.  Hence the
            // adjustment.
-            let mut offs_i32 = *offset as i32;
+            let taken_disp = taken.as_offset32_or_zero() - 6;
-            offs_i32 -= 6;
+            let taken_disp = taken_disp as u32;
-            let offs_u32 = offs_i32 as u32;
+            let cond_start = sink.cur_offset();
            sink.put1(0x0F);
            sink.put1(0x80 + cc.get_enc());
-            sink.put4(offs_u32);
+            let cond_disp_off = sink.cur_offset();
            sink.put4(taken_disp);
            let cond_end = sink.cur_offset();
            if let Some(l) = taken.as_label() {
                sink.use_label_at_offset(cond_disp_off, l, LabelUse::Rel32);
                let inverted: [u8; 6] =
                    [0x0F, 0x80 + (cc.invert().get_enc()), 0xFA, 0xFF, 0xFF, 0xFF];
                sink.add_cond_branch(cond_start, cond_end, l, &inverted[..]);
            }
            // Unconditional part.
            let nt_disp = not_taken.as_offset32_or_zero() - 5;
            let nt_disp = nt_disp as u32;
            let uncond_start = sink.cur_offset();
            sink.put1(0xE9);
            let uncond_disp_off = sink.cur_offset();
            sink.put4(nt_disp);
            let uncond_end = sink.cur_offset();
            if let Some(l) = not_taken.as_label() {
                sink.use_label_at_offset(uncond_disp_off, l, LabelUse::Rel32);
                sink.add_uncond_branch(uncond_start, uncond_end, l);
            }
        }
        //
        // ** Inst::JmpCondCompound   XXXX should never happen
        //
        Inst::JmpUnknown { target } => {
            match target {
                RM::R { reg } => {
--- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs
+++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs
@@ -2180,19 +2180,11 @@ fn test_x64_emit() {
        let actual_printing = insn.show_rru(Some(&rru));
        assert_eq!(expected_printing, actual_printing);
        // Check the encoding is as expected.
        let text_size = {
            let mut code_sec = MachSectionSize::new(0);
            insn.emit(&mut code_sec, &flags, &mut Default::default());
            code_sec.size()
        };
        let mut sink = test_utils::TestCodeSink::new();
-        let mut sections = MachSections::new();
+        let mut buffer = MachBuffer::new();
-        let code_idx = sections.add_section(0, text_size);
+        insn.emit(&mut buffer, &flags, &mut Default::default());
-        let code_sec = sections.get_section(code_idx);
+        let buffer = buffer.finish();
-        insn.emit(code_sec, &flags, &mut Default::default());
+        buffer.emit(&mut sink);
        sections.emit(&mut sink);
        let actual_encoding = &sink.stringify();
        assert_eq!(expected_encoding, actual_encoding);
    }
--- a/cranelift/codegen/src/isa/x64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/x64/inst/mod.rs
@@ -4,6 +4,8 @@
 #![allow(non_snake_case)]
 #![allow(non_camel_case_types)]
 use core::convert::TryFrom;
 use smallvec::SmallVec;
 use std::fmt;
 use std::string::{String, ToString};
@@ -16,6 +18,7 @@ use crate::ir::types::{B1, B128, B16, B32, B64, B8, F32, F64, I128, I16, I32, I6
 use crate::ir::ExternalName;
 use crate::ir::Type;
 use crate::machinst::*;
 use crate::settings::Flags;
 use crate::{settings, CodegenError, CodegenResult};
 pub mod args;
@@ -25,7 +28,7 @@ mod emit_tests;
 pub mod regs;
 use args::*;
-use regs::show_ireg_sized;
+use regs::{create_reg_universe_systemv, show_ireg_sized};
 //=============================================================================
 // Instructions (top level): definition
@@ -136,34 +139,15 @@ pub(crate) enum Inst {
    JmpKnown { dest: BranchTarget },
    /// jcond cond target target
-    // Symmetrical two-way conditional branch.
+    /// Symmetrical two-way conditional branch.
-    // Should never reach the emitter.
+    /// Emitted as a compound sequence; the MachBuffer will shrink it
    /// as appropriate.
    JmpCondSymm {
        cc: CC,
        taken: BranchTarget,
        not_taken: BranchTarget,
    },
    /// Lowered conditional branch: contains the original instruction, and a
    /// flag indicating whether to invert the taken-condition or not. Only one
    /// BranchTarget is retained, and the other is implicitly the next
    /// instruction, given the final basic-block layout.
    JmpCond {
        cc: CC,
        //inverted: bool, is this needed?
        target: BranchTarget,
    },
    /// As for `CondBrLowered`, but represents a condbr/uncond-br sequence (two
    /// actual machine instructions). Needed when the final block layout implies
    /// that neither arm of a conditional branch targets the fallthrough block.
    // Should never reach the emitter
    JmpCondCompound {
        cc: CC,
        taken: BranchTarget,
        not_taken: BranchTarget,
    },
    /// jmpq (reg mem)
    JmpUnknown { target: RM },
 }
@@ -298,18 +282,6 @@ impl Inst {
        }
    }
    pub(crate) fn jmp_cond(cc: CC, target: BranchTarget) -> Inst {
        Inst::JmpCond { cc, target }
    }
    pub(crate) fn jmp_cond_compound(cc: CC, taken: BranchTarget, not_taken: BranchTarget) -> Inst {
        Inst::JmpCondCompound {
            cc,
            taken,
            not_taken,
        }
    }
    pub(crate) fn jmp_unknown(target: RM) -> Inst {
        Inst::JmpUnknown { target }
    }
@@ -485,13 +457,6 @@ impl ShowWithRRU for Inst {
                not_taken.show_rru(mb_rru)
            ),
            //
            Inst::JmpCond { cc, ref target } => format!(
                "{} {}",
                ljustify2("j".to_string(), cc.to_string()),
                target.show_rru(None)
            ),
            //
            Inst::JmpCondCompound { .. } => "**JmpCondCompound**".to_string(),
            Inst::JmpUnknown { target } => format!(
                "{} *{}",
                ljustify("jmp".to_string()),
@@ -601,18 +566,10 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
            taken: _,
            not_taken: _,
        } => {}
        //
        // ** JmpCond
        //
        // ** JmpCondCompound
        //
        //Inst::JmpUnknown { target } => {
        //    target.get_regs_as_uses(collector);
        //}
-        Inst::Nop { .. }
+        Inst::Nop { .. } | Inst::JmpUnknown { .. } => unimplemented!("x64_get_regs inst"),
        | Inst::JmpCond { .. }
        | Inst::JmpCondCompound { .. }
        | Inst::JmpUnknown { .. } => unimplemented!("x64_get_regs inst"),
    }
 }
@@ -767,18 +724,10 @@ fn x64_map_regs(inst: &mut Inst, mapper: &RegUsageMapper) {
            taken: _,
            not_taken: _,
        } => {}
        //
        // ** JmpCond
        //
        // ** JmpCondCompound
        //
        //Inst::JmpUnknown { target } => {
        //    target.apply_map(mapper);
        //}
-        Inst::Nop { .. }
+        Inst::Nop { .. } | Inst::JmpUnknown { .. } => unimplemented!("x64_map_regs opcode"),
        | Inst::JmpCond { .. }
        | Inst::JmpCondCompound { .. }
        | Inst::JmpUnknown { .. } => unimplemented!("x64_map_regs opcode"),
    }
 }
@@ -817,18 +766,12 @@ impl MachInst for Inst {
        match self {
            // Interesting cases.
            &Self::Ret | &Self::EpiloguePlaceholder => MachTerminator::Ret,
-            &Self::JmpKnown { dest } => MachTerminator::Uncond(dest.as_block_index().unwrap()),
+            &Self::JmpKnown { dest } => MachTerminator::Uncond(dest.as_label().unwrap()),
            &Self::JmpCondSymm {
                cc: _,
                taken,
                not_taken,
-            } => MachTerminator::Cond(
+            } => MachTerminator::Cond(taken.as_label().unwrap(), not_taken.as_label().unwrap()),
                taken.as_block_index().unwrap(),
                not_taken.as_block_index().unwrap(),
            ),
            &Self::JmpCond { .. } | &Self::JmpCondCompound { .. } => {
                panic!("is_term() called after lowering branches");
            }
            // All other cases are boring.
            _ => MachTerminator::None,
        }
@@ -868,87 +811,95 @@ impl MachInst for Inst {
        }
    }
-    fn gen_jump(blockindex: BlockIndex) -> Inst {
+    fn gen_jump(label: MachLabel) -> Inst {
-        Inst::jmp_known(BranchTarget::Block(blockindex))
+        Inst::jmp_known(BranchTarget::Label(label))
    }
-    fn with_block_rewrites(&mut self, block_target_map: &[BlockIndex]) {
+    fn gen_constant(to_reg: Writable<Reg>, value: u64, _: Type) -> SmallVec<[Self; 4]> {
-        // This is identical (modulo renaming) to the arm64 version.
+        let mut ret = SmallVec::new();
-        match self {
+        let is64 = value > 0xffff_ffff;
-            &mut Inst::JmpKnown { ref mut dest } => {
+        ret.push(Inst::imm_r(is64, value, to_reg));
-                dest.map(block_target_map);
+        ret
            }
            &mut Inst::JmpCondSymm {
                cc: _,
                ref mut taken,
                ref mut not_taken,
            } => {
                taken.map(block_target_map);
                not_taken.map(block_target_map);
            }
            &mut Inst::JmpCond { .. } | &mut Inst::JmpCondCompound { .. } => {
                panic!("with_block_rewrites called after branch lowering!");
            }
            _ => {}
        }
    }
-    fn with_fallthrough_block(&mut self, fallthrough: Option<BlockIndex>) {
+    fn reg_universe(flags: &Flags) -> RealRegUniverse {
-        // This is identical (modulo renaming) to the arm64 version.
+        create_reg_universe_systemv(flags)
        match self {
            &mut Inst::JmpCondSymm {
                cc,
                taken,
                not_taken,
            } => {
                if taken.as_block_index() == fallthrough {
                    *self = Inst::jmp_cond(cc.invert(), not_taken);
                } else if not_taken.as_block_index() == fallthrough {
                    *self = Inst::jmp_cond(cc, taken);
                } else {
                    // We need a compound sequence (condbr / uncond-br).
                    *self = Inst::jmp_cond_compound(cc, taken, not_taken);
                }
            }
            &mut Inst::JmpKnown { dest } => {
                if dest.as_block_index() == fallthrough {
                    *self = Inst::nop(0);
                }
            }
            _ => {}
        }
    }
-    fn with_block_offsets(&mut self, my_offset: CodeOffset, targets: &[CodeOffset]) {
+    fn worst_case_size() -> CodeOffset {
-        // This is identical (modulo renaming) to the arm64 version.
+        15
        match self {
            &mut Self::JmpCond {
                cc: _,
                ref mut target,
            } => {
                target.lower(targets, my_offset);
            }
            &mut Self::JmpCondCompound {
                cc: _,
                ref mut taken,
                ref mut not_taken,
                ..
            } => {
                taken.lower(targets, my_offset);
                not_taken.lower(targets, my_offset);
            }
            &mut Self::JmpKnown { ref mut dest } => {
                dest.lower(targets, my_offset);
            }
            _ => {}
        }
    }
    type LabelUse = LabelUse;
 }
-impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
+impl MachInstEmit for Inst {
    type State = ();
-    fn emit(&self, sink: &mut O, _flags: &settings::Flags, _: &mut Self::State) {
+    fn emit(&self, sink: &mut MachBuffer<Inst>, _flags: &settings::Flags, _: &mut Self::State) {
        emit::emit(self, sink);
    }
 }
 /// A label-use (internal relocation) in generated code.
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
 pub(crate) enum LabelUse {
    /// A 32-bit offset from location of relocation itself, added to the
    /// existing value at that location.
    Rel32,
 }
 impl MachInstLabelUse for LabelUse {
    const ALIGN: CodeOffset = 1;
    fn max_pos_range(self) -> CodeOffset {
        match self {
            LabelUse::Rel32 => 0x7fff_ffff,
        }
    }
    fn max_neg_range(self) -> CodeOffset {
        match self {
            LabelUse::Rel32 => 0x8000_0000,
        }
    }
    fn patch_size(self) -> CodeOffset {
        match self {
            LabelUse::Rel32 => 4,
        }
    }
    fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset) {
        match self {
            LabelUse::Rel32 => {
                let addend = i32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
                let value = i32::try_from(label_offset)
                    .unwrap()
                    .wrapping_sub(i32::try_from(use_offset).unwrap())
                    .wrapping_add(addend);
                buffer.copy_from_slice(&value.to_le_bytes()[..]);
            }
        }
    }
    fn supports_veneer(self) -> bool {
        match self {
            LabelUse::Rel32 => false,
        }
    }
    fn veneer_size(self) -> CodeOffset {
        match self {
            LabelUse::Rel32 => 0,
        }
    }
    fn generate_veneer(self, _: &mut [u8], _: CodeOffset) -> (CodeOffset, LabelUse) {
        match self {
            LabelUse::Rel32 => {
                panic!("Veneer not supported for Rel32 label-use.");
            }
        }
    }
 }
--- a/cranelift/codegen/src/isa/x64/lower.rs
+++ b/cranelift/codegen/src/isa/x64/lower.rs
@@ -12,6 +12,7 @@ use crate::ir::{InstructionData, Opcode, Type};
 use crate::machinst::lower::*;
 use crate::machinst::*;
 use crate::result::CodegenResult;
 use crate::isa::x64::inst::args::*;
 use crate::isa::x64::inst::*;
@@ -94,6 +95,16 @@ fn intCC_to_x64_CC(cc: IntCC) -> CC {
    }
 }
 fn input_to_reg<'a>(ctx: Ctx<'a>, iri: IRInst, input: usize) -> Reg {
    let inputs = ctx.get_input(iri, input);
    ctx.use_input_reg(inputs);
    inputs.reg
 }
 fn output_to_reg<'a>(ctx: Ctx<'a>, iri: IRInst, output: usize) -> Writable<Reg> {
    ctx.get_output(iri, output)
 }
 //=============================================================================
 // Top-level instruction lowering entry point, for one instruction.
@@ -114,7 +125,7 @@ fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, iri: IRInst) {
                // Get exactly the bit pattern in 'w64' into the dest.  No
                // monkeying with sign extension etc.
                let dstIs64 = w64 > 0xFFFF_FFFF;
-                let regD = ctx.output(iri, 0);
+                let regD = output_to_reg(ctx, iri, 0);
                ctx.emit(Inst::imm_r(dstIs64, w64, regD));
            } else {
                unimplemented!();
@@ -122,9 +133,9 @@ fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, iri: IRInst) {
        }
        Opcode::Iadd | Opcode::Isub => {
-            let regD = ctx.output(iri, 0);
+            let regD = output_to_reg(ctx, iri, 0);
-            let regL = ctx.input(iri, 0);
+            let regL = input_to_reg(ctx, iri, 0);
-            let regR = ctx.input(iri, 1);
+            let regR = input_to_reg(ctx, iri, 1);
            let is64 = int_ty_to_is64(ty.unwrap());
            let how = if op == Opcode::Iadd {
                RMI_R_Op::Add
@@ -139,9 +150,9 @@ fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, iri: IRInst) {
            // TODO: implement imm shift value into insn
            let tySL = ctx.input_ty(iri, 0);
            let tyD = ctx.output_ty(iri, 0); // should be the same as tySL
-            let regSL = ctx.input(iri, 0);
+            let regSL = input_to_reg(ctx, iri, 0);
-            let regSR = ctx.input(iri, 1);
+            let regSR = input_to_reg(ctx, iri, 1);
-            let regD = ctx.output(iri, 0);
+            let regD = output_to_reg(ctx, iri, 0);
            if tyD == tySL && (tyD == types::I32 || tyD == types::I64) {
                let how = match op {
                    Opcode::Ishl => ShiftKind::Left,
@@ -168,8 +179,8 @@ fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, iri: IRInst) {
            let isZX = op == Opcode::Uextend;
            let tyS = ctx.input_ty(iri, 0);
            let tyD = ctx.output_ty(iri, 0);
-            let regS = ctx.input(iri, 0);
+            let regS = input_to_reg(ctx, iri, 0);
-            let regD = ctx.output(iri, 0);
+            let regD = output_to_reg(ctx, iri, 0);
            ctx.emit(Inst::mov_r_r(true, regS, regD));
            match (tyS, tyD, isZX) {
                (types::I8, types::I64, false) => {
@@ -182,7 +193,7 @@ fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, iri: IRInst) {
        Opcode::FallthroughReturn | Opcode::Return => {
            for i in 0..ctx.num_inputs(iri) {
-                let src_reg = ctx.input(iri, i);
+                let src_reg = input_to_reg(ctx, iri, i);
                let retval_reg = ctx.retval(i);
                ctx.emit(Inst::mov_r_r(true, src_reg, retval_reg));
            }
@@ -219,35 +230,6 @@ fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, iri: IRInst) {
            panic!("ALU+imm and ALU+carry ops should not appear here!");
        }
        Opcode::X86Udivmodx
        | Opcode::X86Sdivmodx
        | Opcode::X86Umulx
        | Opcode::X86Smulx
        | Opcode::X86Cvtt2si
        | Opcode::X86Fmin
        | Opcode::X86Fmax
        | Opcode::X86Push
        | Opcode::X86Pop
        | Opcode::X86Bsr
        | Opcode::X86Bsf
        | Opcode::X86Pshufd
        | Opcode::X86Pshufb
        | Opcode::X86Pextr
        | Opcode::X86Pinsr
        | Opcode::X86Insertps
        | Opcode::X86Movsd
        | Opcode::X86Movlhps
        | Opcode::X86Psll
        | Opcode::X86Psrl
        | Opcode::X86Psra
        | Opcode::X86Ptest
        | Opcode::X86Pmaxs
        | Opcode::X86Pmaxu
        | Opcode::X86Pmins
        | Opcode::X86Pminu => {
            panic!("x86-specific opcode in supposedly arch-neutral IR!");
        }
        _ => unimplemented!("unimplemented lowering for opcode {:?}", op),
    }
 }
@@ -258,17 +240,18 @@ fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, iri: IRInst) {
 impl LowerBackend for X64Backend {
    type MInst = Inst;
-    fn lower<C: LowerCtx<I = Inst>>(&self, ctx: &mut C, ir_inst: IRInst) {
+    fn lower<C: LowerCtx<I = Inst>>(&self, ctx: &mut C, ir_inst: IRInst) -> CodegenResult<()> {
        lower_insn_to_regs(ctx, ir_inst);
        Ok(())
    }
    fn lower_branch_group<C: LowerCtx<I = Inst>>(
        &self,
        ctx: &mut C,
        branches: &[IRInst],
-        targets: &[BlockIndex],
+        targets: &[MachLabel],
-        fallthrough: Option<BlockIndex>,
+        fallthrough: Option<MachLabel>,
-    ) {
+    ) -> CodegenResult<()> {
        // A block should end with at most two branches. The first may be a
        // conditional branch; a conditional branch can be followed only by an
        // unconditional branch or fallthrough. Otherwise, if only one branch,
@@ -290,17 +273,17 @@ impl LowerBackend for X64Backend {
            );
            assert!(op1 == Opcode::Jump || op1 == Opcode::Fallthrough);
-            let taken = BranchTarget::Block(targets[0]);
+            let taken = BranchTarget::Label(targets[0]);
            let not_taken = match op1 {
-                Opcode::Jump => BranchTarget::Block(targets[1]),
+                Opcode::Jump => BranchTarget::Label(targets[1]),
-                Opcode::Fallthrough => BranchTarget::Block(fallthrough.unwrap()),
+                Opcode::Fallthrough => BranchTarget::Label(fallthrough.unwrap()),
                _ => unreachable!(), // assert above.
            };
            match op0 {
                Opcode::Brz | Opcode::Brnz => {
                    let tyS = ctx.input_ty(branches[0], 0);
                    if is_int_ty(tyS) {
-                        let rS = ctx.input(branches[0], 0);
+                        let rS = input_to_reg(ctx, branches[0], 0);
                        let cc = match op0 {
                            Opcode::Brz => CC::Z,
                            Opcode::Brnz => CC::NZ,
@@ -316,8 +299,8 @@ impl LowerBackend for X64Backend {
                Opcode::BrIcmp => {
                    let tyS = ctx.input_ty(branches[0], 0);
                    if is_int_ty(tyS) {
-                        let rSL = ctx.input(branches[0], 0);
+                        let rSL = input_to_reg(ctx, branches[0], 0);
-                        let rSR = ctx.input(branches[0], 1);
+                        let rSR = input_to_reg(ctx, branches[0], 1);
                        let cc = intCC_to_x64_CC(inst_condcode(ctx.data(branches[0])));
                        let sizeB = int_ty_to_sizeB(tyS);
                        // FIXME verify rSR vs rSL ordering
@@ -339,10 +322,10 @@ impl LowerBackend for X64Backend {
            let op = ctx.data(branches[0]).opcode();
            match op {
                Opcode::Jump => {
-                    ctx.emit(Inst::jmp_known(BranchTarget::Block(targets[0])));
+                    ctx.emit(Inst::jmp_known(BranchTarget::Label(targets[0])));
                }
                Opcode::Fallthrough => {
-                    ctx.emit(Inst::jmp_known(BranchTarget::Block(targets[0])));
+                    ctx.emit(Inst::jmp_known(BranchTarget::Label(targets[0])));
                }
                Opcode::Trap => {
                    unimplemented = true;
@@ -354,5 +337,7 @@ impl LowerBackend for X64Backend {
        if unimplemented {
            unimplemented!("lower_branch_group(x64): can't handle: {:?}", branches);
        }
        Ok(())
    }
 }
--- a/cranelift/codegen/src/isa/x64/mod.rs
+++ b/cranelift/codegen/src/isa/x64/mod.rs
@@ -52,7 +52,8 @@ impl MachBackend for X64Backend {
    ) -> CodegenResult<MachCompileResult> {
        let flags = self.flags();
        let vcode = self.compile_vcode(func, flags.clone())?;
-        let sections = vcode.emit();
+        let buffer = vcode.emit();
        let buffer = buffer.finish();
        let frame_size = vcode.frame_size();
        let disasm = if want_disasm {
@@ -62,7 +63,7 @@ impl MachBackend for X64Backend {
        };
        Ok(MachCompileResult {
-            sections,
+            buffer,
            frame_size,
            disasm,
        })
--- a/cranelift/codegen/src/isa/x86/mod.rs
+++ b/cranelift/codegen/src/isa/x86/mod.rs
@@ -57,11 +57,11 @@ fn isa_constructor(
    let isa_flags = settings::Flags::new(&shared_flags, builder);
    if isa_flags.use_new_backend() {
-        //#[cfg(not(feature = "x64"))]
+        #[cfg(not(feature = "x64"))]
        panic!("new backend x86 support not included by cargo features!");
-    //#[cfg(feature = "x64")]
+        #[cfg(feature = "x64")]
-    //super::x64::isa_builder(triple).finish(shared_flags)
+        super::x64::isa_builder(triple).finish(shared_flags)
    } else {
        Box::new(Isa {
            triple,