diff --git a/cranelift/codegen/src/isa/mod.rs b/cranelift/codegen/src/isa/mod.rs
index d193936a91..f7f033965f 100644
--- a/cranelift/codegen/src/isa/mod.rs
+++ b/cranelift/codegen/src/isa/mod.rs
@@ -77,8 +77,8 @@ mod riscv;
 #[cfg(feature = "x86")]
 mod x86;
 
-//#[cfg(feature = "x64")]
-//mod x64;
+#[cfg(feature = "x64")]
+mod x64;
 
 #[cfg(feature = "arm32")]
 mod arm32;
diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs
index dbdd484fca..1e77dd91fa 100644
--- a/cranelift/codegen/src/isa/x64/inst/args.rs
+++ b/cranelift/codegen/src/isa/x64/inst/args.rs
@@ -5,7 +5,6 @@ use std::string::{String, ToString};
 
 use regalloc::{RealRegUniverse, Reg, RegClass, RegUsageCollector};
 
-use crate::binemit::CodeOffset;
 use crate::machinst::*;
 
 use super::regs::show_ireg_sized;
@@ -375,43 +374,27 @@ impl fmt::Debug for CC {
 /// from end of current instruction).
 #[derive(Clone, Copy, Debug)]
 pub enum BranchTarget {
-    /// An unresolved reference to a BlockIndex, as passed into
-    /// `lower_branch_group()`.
-    Block(BlockIndex),
+    /// An unresolved reference to a MachLabel.
+    Label(MachLabel),
 
-    /// A resolved reference to another instruction, after
-    /// `Inst::with_block_offsets()`.  This offset is in bytes.
-    ResolvedOffset(BlockIndex, isize),
+    /// A resolved reference to another instruction, in bytes.
+    ResolvedOffset(isize),
 }
 
 impl ShowWithRRU for BranchTarget {
     fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
         match self {
-            BranchTarget::Block(bix) => format!("(Block {})", bix),
-            BranchTarget::ResolvedOffset(bix, offs) => format!("(Block {}, offset {})", bix, offs),
+            BranchTarget::Label(l) => format!("{:?}", l),
+            BranchTarget::ResolvedOffset(offs) => format!("(offset {})", offs),
         }
     }
 }
 
 impl BranchTarget {
-    /// Lower the branch target given offsets of each block.
-    pub fn lower(&mut self, targets: &[CodeOffset], my_offset: CodeOffset) {
+    /// Get the label.
+    pub fn as_label(&self) -> Option<MachLabel> {
         match self {
-            &mut BranchTarget::Block(bix) => {
-                let bix = bix as usize;
-                assert!(bix < targets.len());
-                let block_offset_in_func = targets[bix];
-                let branch_offset = (block_offset_in_func as isize) - (my_offset as isize);
-                *self = BranchTarget::ResolvedOffset(bix as BlockIndex, branch_offset);
-            }
-            &mut BranchTarget::ResolvedOffset(..) => {}
-        }
-    }
-
-    /// Get the block index.
-    pub fn as_block_index(&self) -> Option<BlockIndex> {
-        match self {
-            &BranchTarget::Block(bix) => Some(bix),
+            &BranchTarget::Label(l) => Some(l),
             _ => None,
         }
     }
@@ -421,31 +404,17 @@ impl BranchTarget {
     /// byte of the target.  It does not take into account the Intel-specific
     /// rule that a branch offset is encoded as relative to the start of the
     /// following instruction.  That is a problem for the emitter to deal
-    /// with.
-    pub fn as_offset_i32(&self) -> Option<i32> {
+    /// with. If a label, returns zero.
+    pub fn as_offset32_or_zero(&self) -> i32 {
         match self {
-            &BranchTarget::ResolvedOffset(_, off) => {
+            &BranchTarget::ResolvedOffset(off) => {
                 // Leave a bit of slack so that the emitter is guaranteed to
                 // be able to add the length of the jump instruction encoding
                 // to this value and still have a value in signed-32 range.
-                if off >= -0x7FFF_FF00isize && off <= 0x7FFF_FF00isize {
-                    Some(off as i32)
-                } else {
-                    None
-                }
+                assert!(off >= -0x7FFF_FF00 && off <= 0x7FFF_FF00);
+                off as i32
             }
-            _ => None,
-        }
-    }
-
-    /// Map the block index given a transform map.
-    pub fn map(&mut self, block_index_map: &[BlockIndex]) {
-        match self {
-            &mut BranchTarget::Block(ref mut bix) => {
-                let n = block_index_map[*bix as usize];
-                *bix = n;
-            }
-            _ => panic!("BranchTarget::map() called on already-lowered BranchTarget!"),
+            _ => 0,
         }
     }
 }
diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs
index 2b4d3e54d6..cd0bdbb5be 100644
--- a/cranelift/codegen/src/isa/x64/inst/emit.rs
+++ b/cranelift/codegen/src/isa/x64/inst/emit.rs
@@ -80,8 +80,8 @@ const F_PREFIX_66: u32 = 4;
 /// deleted if it is redundant (0x40).  Note that for a 64-bit operation, the
 /// REX prefix will normally never be redundant, since REX.W must be 1 to
 /// indicate a 64-bit operation.
-fn emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE<O: MachSectionOutput>(
-    sink: &mut O,
+fn emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE(
+    sink: &mut MachBuffer<Inst>,
     opcodes: u32,
     mut numOpcodes: usize,
     encG: u8,
@@ -199,8 +199,8 @@ fn emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE<O: MachSectionOutput>(
 /// emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE, except it is for the case
 /// where the E operand is a register rather than memory.  Hence it is much
 /// simpler.
-fn emit_REX_OPCODES_MODRM_encG_encE<O: MachSectionOutput>(
-    sink: &mut O,
+fn emit_REX_OPCODES_MODRM_encG_encE(
+    sink: &mut MachBuffer<Inst>,
     opcodes: u32,
     mut numOpcodes: usize,
     encG: u8,
@@ -240,8 +240,8 @@ fn emit_REX_OPCODES_MODRM_encG_encE<O: MachSectionOutput>(
 // These are merely wrappers for the above two functions that facilitate passing
 // actual `Reg`s rather than their encodings.
 
-fn emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE<O: MachSectionOutput>(
-    sink: &mut O,
+fn emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
+    sink: &mut MachBuffer<Inst>,
     opcodes: u32,
     numOpcodes: usize,
     regG: Reg,
@@ -253,8 +253,8 @@ fn emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE<O: MachSectionOutput>(
     emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE(sink, opcodes, numOpcodes, encG, memE, flags);
 }
 
-fn emit_REX_OPCODES_MODRM_regG_regE<O: MachSectionOutput>(
-    sink: &mut O,
+fn emit_REX_OPCODES_MODRM_regG_regE(
+    sink: &mut MachBuffer<Inst>,
     opcodes: u32,
     numOpcodes: usize,
     regG: Reg,
@@ -268,7 +268,7 @@ fn emit_REX_OPCODES_MODRM_regG_regE<O: MachSectionOutput>(
 }
 
 /// Write a suitable number of bits from an imm64 to the sink.
-fn emit_simm<O: MachSectionOutput>(sink: &mut O, size: u8, simm32: u32) {
+fn emit_simm(sink: &mut MachBuffer<Inst>, size: u8, simm32: u32) {
     match size {
         8 | 4 => sink.put4(simm32),
         2 => sink.put2(simm32 as u16),
@@ -329,7 +329,7 @@ fn emit_simm<O: MachSectionOutput>(sink: &mut O, size: u8, simm32: u32) {
 ///
 /// * there's a shorter encoding for shl/shr/sar by a 1-bit immediate.  (Do we
 ///   care?)
-pub(crate) fn emit<O: MachSectionOutput>(inst: &Inst, sink: &mut O) {
+pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
     match inst {
         Inst::Nop { len: 0 } => {}
         Inst::Alu_RMI_R {
@@ -808,55 +808,59 @@ pub(crate) fn emit<O: MachSectionOutput>(inst: &Inst, sink: &mut O) {
         }
         Inst::Ret {} => sink.put1(0xC3),
 
-        Inst::JmpKnown {
-            dest: BranchTarget::Block(..),
-        } => {
-            // Computation of block offsets/sizes.
-            sink.put1(0);
-            sink.put4(0);
-        }
-        Inst::JmpKnown {
-            dest: BranchTarget::ResolvedOffset(_bix, offset),
-        } if *offset >= -0x7FFF_FF00 && *offset <= 0x7FFF_FF00 => {
-            // And now for real
-            let mut offs_i32 = *offset as i32;
-            offs_i32 -= 5;
-            let offs_u32 = offs_i32 as u32;
+        Inst::JmpKnown { dest } => {
+            let disp = dest.as_offset32_or_zero() - 5;
+            let disp = disp as u32;
+            let br_start = sink.cur_offset();
             sink.put1(0xE9);
-            sink.put4(offs_u32);
+            let br_disp_off = sink.cur_offset();
+            sink.put4(disp);
+            let br_end = sink.cur_offset();
+            if let Some(l) = dest.as_label() {
+                sink.use_label_at_offset(br_disp_off, l, LabelUse::Rel32);
+                sink.add_uncond_branch(br_start, br_end, l);
+            }
         }
-        //
-        // ** Inst::JmpCondSymm   XXXX should never happen
-        //
-        Inst::JmpCond {
-            cc: _,
-            target: BranchTarget::Block(..),
-        } => {
-            // This case occurs when we are computing block offsets / sizes,
-            // prior to lowering block-index targets to concrete-offset targets.
-            // Only the size matters, so let's emit 6 bytes, as below.
-            sink.put1(0);
-            sink.put1(0);
-            sink.put4(0);
-        }
-        Inst::JmpCond {
+        Inst::JmpCondSymm {
             cc,
-            target: BranchTarget::ResolvedOffset(_bix, offset),
-        } if *offset >= -0x7FFF_FF00 && *offset <= 0x7FFF_FF00 => {
+            taken,
+            not_taken,
+        } => {
+            // Conditional part.
+
             // This insn is 6 bytes long.  Currently `offset` is relative to
             // the start of this insn, but the Intel encoding requires it to
             // be relative to the start of the next instruction.  Hence the
             // adjustment.
-            let mut offs_i32 = *offset as i32;
-            offs_i32 -= 6;
-            let offs_u32 = offs_i32 as u32;
+            let taken_disp = taken.as_offset32_or_zero() - 6;
+            let taken_disp = taken_disp as u32;
+            let cond_start = sink.cur_offset();
             sink.put1(0x0F);
             sink.put1(0x80 + cc.get_enc());
-            sink.put4(offs_u32);
+            let cond_disp_off = sink.cur_offset();
+            sink.put4(taken_disp);
+            let cond_end = sink.cur_offset();
+            if let Some(l) = taken.as_label() {
+                sink.use_label_at_offset(cond_disp_off, l, LabelUse::Rel32);
+                let inverted: [u8; 6] =
+                    [0x0F, 0x80 + (cc.invert().get_enc()), 0xFA, 0xFF, 0xFF, 0xFF];
+                sink.add_cond_branch(cond_start, cond_end, l, &inverted[..]);
+            }
+
+            // Unconditional part.
+
+            let nt_disp = not_taken.as_offset32_or_zero() - 5;
+            let nt_disp = nt_disp as u32;
+            let uncond_start = sink.cur_offset();
+            sink.put1(0xE9);
+            let uncond_disp_off = sink.cur_offset();
+            sink.put4(nt_disp);
+            let uncond_end = sink.cur_offset();
+            if let Some(l) = not_taken.as_label() {
+                sink.use_label_at_offset(uncond_disp_off, l, LabelUse::Rel32);
+                sink.add_uncond_branch(uncond_start, uncond_end, l);
+            }
         }
-        //
-        // ** Inst::JmpCondCompound   XXXX should never happen
-        //
         Inst::JmpUnknown { target } => {
             match target {
                 RM::R { reg } => {
diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs
index 7c833a47c9..654dcc6b33 100644
--- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs
+++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs
@@ -2180,19 +2180,11 @@ fn test_x64_emit() {
         let actual_printing = insn.show_rru(Some(&rru));
         assert_eq!(expected_printing, actual_printing);
 
-        // Check the encoding is as expected.
-        let text_size = {
-            let mut code_sec = MachSectionSize::new(0);
-            insn.emit(&mut code_sec, &flags, &mut Default::default());
-            code_sec.size()
-        };
-
         let mut sink = test_utils::TestCodeSink::new();
-        let mut sections = MachSections::new();
-        let code_idx = sections.add_section(0, text_size);
-        let code_sec = sections.get_section(code_idx);
-        insn.emit(code_sec, &flags, &mut Default::default());
-        sections.emit(&mut sink);
+        let mut buffer = MachBuffer::new();
+        insn.emit(&mut buffer, &flags, &mut Default::default());
+        let buffer = buffer.finish();
+        buffer.emit(&mut sink);
         let actual_encoding = &sink.stringify();
         assert_eq!(expected_encoding, actual_encoding);
     }
diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs
index d2f438fc9b..a42653915b 100644
--- a/cranelift/codegen/src/isa/x64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/x64/inst/mod.rs
@@ -4,6 +4,8 @@
 #![allow(non_snake_case)]
 #![allow(non_camel_case_types)]
 
+use core::convert::TryFrom;
+use smallvec::SmallVec;
 use std::fmt;
 use std::string::{String, ToString};
 
@@ -16,6 +18,7 @@ use crate::ir::types::{B1, B128, B16, B32, B64, B8, F32, F64, I128, I16, I32, I6
 use crate::ir::ExternalName;
 use crate::ir::Type;
 use crate::machinst::*;
+use crate::settings::Flags;
 use crate::{settings, CodegenError, CodegenResult};
 
 pub mod args;
@@ -25,7 +28,7 @@ mod emit_tests;
 pub mod regs;
 
 use args::*;
-use regs::show_ireg_sized;
+use regs::{create_reg_universe_systemv, show_ireg_sized};
 
 //=============================================================================
 // Instructions (top level): definition
@@ -136,34 +139,15 @@ pub(crate) enum Inst {
     JmpKnown { dest: BranchTarget },
 
     /// jcond cond target target
-    // Symmetrical two-way conditional branch.
-    // Should never reach the emitter.
+    /// Symmetrical two-way conditional branch.
+    /// Emitted as a compound sequence; the MachBuffer will shrink it
+    /// as appropriate.
     JmpCondSymm {
         cc: CC,
         taken: BranchTarget,
         not_taken: BranchTarget,
     },
 
-    /// Lowered conditional branch: contains the original instruction, and a
-    /// flag indicating whether to invert the taken-condition or not. Only one
-    /// BranchTarget is retained, and the other is implicitly the next
-    /// instruction, given the final basic-block layout.
-    JmpCond {
-        cc: CC,
-        //inverted: bool, is this needed?
-        target: BranchTarget,
-    },
-
-    /// As for `CondBrLowered`, but represents a condbr/uncond-br sequence (two
-    /// actual machine instructions). Needed when the final block layout implies
-    /// that neither arm of a conditional branch targets the fallthrough block.
-    // Should never reach the emitter
-    JmpCondCompound {
-        cc: CC,
-        taken: BranchTarget,
-        not_taken: BranchTarget,
-    },
-
     /// jmpq (reg mem)
     JmpUnknown { target: RM },
 }
@@ -298,18 +282,6 @@ impl Inst {
         }
     }
 
-    pub(crate) fn jmp_cond(cc: CC, target: BranchTarget) -> Inst {
-        Inst::JmpCond { cc, target }
-    }
-
-    pub(crate) fn jmp_cond_compound(cc: CC, taken: BranchTarget, not_taken: BranchTarget) -> Inst {
-        Inst::JmpCondCompound {
-            cc,
-            taken,
-            not_taken,
-        }
-    }
-
     pub(crate) fn jmp_unknown(target: RM) -> Inst {
         Inst::JmpUnknown { target }
     }
@@ -485,13 +457,6 @@ impl ShowWithRRU for Inst {
                 not_taken.show_rru(mb_rru)
             ),
             //
-            Inst::JmpCond { cc, ref target } => format!(
-                "{} {}",
-                ljustify2("j".to_string(), cc.to_string()),
-                target.show_rru(None)
-            ),
-            //
-            Inst::JmpCondCompound { .. } => "**JmpCondCompound**".to_string(),
             Inst::JmpUnknown { target } => format!(
                 "{} *{}",
                 ljustify("jmp".to_string()),
@@ -601,18 +566,10 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
             taken: _,
             not_taken: _,
         } => {}
-        //
-        // ** JmpCond
-        //
-        // ** JmpCondCompound
-        //
         //Inst::JmpUnknown { target } => {
         //    target.get_regs_as_uses(collector);
         //}
-        Inst::Nop { .. }
-        | Inst::JmpCond { .. }
-        | Inst::JmpCondCompound { .. }
-        | Inst::JmpUnknown { .. } => unimplemented!("x64_get_regs inst"),
+        Inst::Nop { .. } | Inst::JmpUnknown { .. } => unimplemented!("x64_get_regs inst"),
     }
 }
 
@@ -767,18 +724,10 @@ fn x64_map_regs(inst: &mut Inst, mapper: &RegUsageMapper) {
             taken: _,
             not_taken: _,
         } => {}
-        //
-        // ** JmpCond
-        //
-        // ** JmpCondCompound
-        //
         //Inst::JmpUnknown { target } => {
         //    target.apply_map(mapper);
         //}
-        Inst::Nop { .. }
-        | Inst::JmpCond { .. }
-        | Inst::JmpCondCompound { .. }
-        | Inst::JmpUnknown { .. } => unimplemented!("x64_map_regs opcode"),
+        Inst::Nop { .. } | Inst::JmpUnknown { .. } => unimplemented!("x64_map_regs opcode"),
     }
 }
 
@@ -817,18 +766,12 @@ impl MachInst for Inst {
         match self {
             // Interesting cases.
             &Self::Ret | &Self::EpiloguePlaceholder => MachTerminator::Ret,
-            &Self::JmpKnown { dest } => MachTerminator::Uncond(dest.as_block_index().unwrap()),
+            &Self::JmpKnown { dest } => MachTerminator::Uncond(dest.as_label().unwrap()),
             &Self::JmpCondSymm {
                 cc: _,
                 taken,
                 not_taken,
-            } => MachTerminator::Cond(
-                taken.as_block_index().unwrap(),
-                not_taken.as_block_index().unwrap(),
-            ),
-            &Self::JmpCond { .. } | &Self::JmpCondCompound { .. } => {
-                panic!("is_term() called after lowering branches");
-            }
+            } => MachTerminator::Cond(taken.as_label().unwrap(), not_taken.as_label().unwrap()),
             // All other cases are boring.
             _ => MachTerminator::None,
         }
@@ -868,87 +811,95 @@ impl MachInst for Inst {
         }
     }
 
-    fn gen_jump(blockindex: BlockIndex) -> Inst {
-        Inst::jmp_known(BranchTarget::Block(blockindex))
+    fn gen_jump(label: MachLabel) -> Inst {
+        Inst::jmp_known(BranchTarget::Label(label))
     }
 
-    fn with_block_rewrites(&mut self, block_target_map: &[BlockIndex]) {
-        // This is identical (modulo renaming) to the arm64 version.
-        match self {
-            &mut Inst::JmpKnown { ref mut dest } => {
-                dest.map(block_target_map);
-            }
-            &mut Inst::JmpCondSymm {
-                cc: _,
-                ref mut taken,
-                ref mut not_taken,
-            } => {
-                taken.map(block_target_map);
-                not_taken.map(block_target_map);
-            }
-            &mut Inst::JmpCond { .. } | &mut Inst::JmpCondCompound { .. } => {
-                panic!("with_block_rewrites called after branch lowering!");
-            }
-            _ => {}
-        }
+    fn gen_constant(to_reg: Writable<Reg>, value: u64, _: Type) -> SmallVec<[Self; 4]> {
+        let mut ret = SmallVec::new();
+        let is64 = value > 0xffff_ffff;
+        ret.push(Inst::imm_r(is64, value, to_reg));
+        ret
     }
 
-    fn with_fallthrough_block(&mut self, fallthrough: Option<BlockIndex>) {
-        // This is identical (modulo renaming) to the arm64 version.
-        match self {
-            &mut Inst::JmpCondSymm {
-                cc,
-                taken,
-                not_taken,
-            } => {
-                if taken.as_block_index() == fallthrough {
-                    *self = Inst::jmp_cond(cc.invert(), not_taken);
-                } else if not_taken.as_block_index() == fallthrough {
-                    *self = Inst::jmp_cond(cc, taken);
-                } else {
-                    // We need a compound sequence (condbr / uncond-br).
-                    *self = Inst::jmp_cond_compound(cc, taken, not_taken);
-                }
-            }
-            &mut Inst::JmpKnown { dest } => {
-                if dest.as_block_index() == fallthrough {
-                    *self = Inst::nop(0);
-                }
-            }
-            _ => {}
-        }
+    fn reg_universe(flags: &Flags) -> RealRegUniverse {
+        create_reg_universe_systemv(flags)
     }
 
-    fn with_block_offsets(&mut self, my_offset: CodeOffset, targets: &[CodeOffset]) {
-        // This is identical (modulo renaming) to the arm64 version.
-        match self {
-            &mut Self::JmpCond {
-                cc: _,
-                ref mut target,
-            } => {
-                target.lower(targets, my_offset);
-            }
-            &mut Self::JmpCondCompound {
-                cc: _,
-                ref mut taken,
-                ref mut not_taken,
-                ..
-            } => {
-                taken.lower(targets, my_offset);
-                not_taken.lower(targets, my_offset);
-            }
-            &mut Self::JmpKnown { ref mut dest } => {
-                dest.lower(targets, my_offset);
-            }
-            _ => {}
-        }
+    fn worst_case_size() -> CodeOffset {
+        15
     }
+
+    type LabelUse = LabelUse;
 }
 
-impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
+impl MachInstEmit for Inst {
     type State = ();
 
-    fn emit(&self, sink: &mut O, _flags: &settings::Flags, _: &mut Self::State) {
+    fn emit(&self, sink: &mut MachBuffer<Inst>, _flags: &settings::Flags, _: &mut Self::State) {
         emit::emit(self, sink);
     }
 }
+
+/// A label-use (internal relocation) in generated code.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub(crate) enum LabelUse {
+    /// A 32-bit offset from location of relocation itself, added to the
+    /// existing value at that location.
+    Rel32,
+}
+
+impl MachInstLabelUse for LabelUse {
+    const ALIGN: CodeOffset = 1;
+
+    fn max_pos_range(self) -> CodeOffset {
+        match self {
+            LabelUse::Rel32 => 0x7fff_ffff,
+        }
+    }
+
+    fn max_neg_range(self) -> CodeOffset {
+        match self {
+            LabelUse::Rel32 => 0x8000_0000,
+        }
+    }
+
+    fn patch_size(self) -> CodeOffset {
+        match self {
+            LabelUse::Rel32 => 4,
+        }
+    }
+
+    fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset) {
+        match self {
+            LabelUse::Rel32 => {
+                let addend = i32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
+                let value = i32::try_from(label_offset)
+                    .unwrap()
+                    .wrapping_sub(i32::try_from(use_offset).unwrap())
+                    .wrapping_add(addend);
+                buffer.copy_from_slice(&value.to_le_bytes()[..]);
+            }
+        }
+    }
+
+    fn supports_veneer(self) -> bool {
+        match self {
+            LabelUse::Rel32 => false,
+        }
+    }
+
+    fn veneer_size(self) -> CodeOffset {
+        match self {
+            LabelUse::Rel32 => 0,
+        }
+    }
+
+    fn generate_veneer(self, _: &mut [u8], _: CodeOffset) -> (CodeOffset, LabelUse) {
+        match self {
+            LabelUse::Rel32 => {
+                panic!("Veneer not supported for Rel32 label-use.");
+            }
+        }
+    }
+}
diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs
index 23a190e578..f85d6166ac 100644
--- a/cranelift/codegen/src/isa/x64/lower.rs
+++ b/cranelift/codegen/src/isa/x64/lower.rs
@@ -12,6 +12,7 @@ use crate::ir::{InstructionData, Opcode, Type};
 
 use crate::machinst::lower::*;
 use crate::machinst::*;
+use crate::result::CodegenResult;
 
 use crate::isa::x64::inst::args::*;
 use crate::isa::x64::inst::*;
@@ -94,6 +95,16 @@ fn intCC_to_x64_CC(cc: IntCC) -> CC {
     }
 }
 
+fn input_to_reg<'a>(ctx: Ctx<'a>, iri: IRInst, input: usize) -> Reg {
+    let inputs = ctx.get_input(iri, input);
+    ctx.use_input_reg(inputs);
+    inputs.reg
+}
+
+fn output_to_reg<'a>(ctx: Ctx<'a>, iri: IRInst, output: usize) -> Writable<Reg> {
+    ctx.get_output(iri, output)
+}
+
 //=============================================================================
 // Top-level instruction lowering entry point, for one instruction.
 
@@ -114,7 +125,7 @@ fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, iri: IRInst) {
                 // Get exactly the bit pattern in 'w64' into the dest.  No
                 // monkeying with sign extension etc.
                 let dstIs64 = w64 > 0xFFFF_FFFF;
-                let regD = ctx.output(iri, 0);
+                let regD = output_to_reg(ctx, iri, 0);
                 ctx.emit(Inst::imm_r(dstIs64, w64, regD));
             } else {
                 unimplemented!();
@@ -122,9 +133,9 @@ fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, iri: IRInst) {
         }
 
         Opcode::Iadd | Opcode::Isub => {
-            let regD = ctx.output(iri, 0);
-            let regL = ctx.input(iri, 0);
-            let regR = ctx.input(iri, 1);
+            let regD = output_to_reg(ctx, iri, 0);
+            let regL = input_to_reg(ctx, iri, 0);
+            let regR = input_to_reg(ctx, iri, 1);
             let is64 = int_ty_to_is64(ty.unwrap());
             let how = if op == Opcode::Iadd {
                 RMI_R_Op::Add
@@ -139,9 +150,9 @@ fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, iri: IRInst) {
             // TODO: implement imm shift value into insn
             let tySL = ctx.input_ty(iri, 0);
             let tyD = ctx.output_ty(iri, 0); // should be the same as tySL
-            let regSL = ctx.input(iri, 0);
-            let regSR = ctx.input(iri, 1);
-            let regD = ctx.output(iri, 0);
+            let regSL = input_to_reg(ctx, iri, 0);
+            let regSR = input_to_reg(ctx, iri, 1);
+            let regD = output_to_reg(ctx, iri, 0);
             if tyD == tySL && (tyD == types::I32 || tyD == types::I64) {
                 let how = match op {
                     Opcode::Ishl => ShiftKind::Left,
@@ -168,8 +179,8 @@ fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, iri: IRInst) {
             let isZX = op == Opcode::Uextend;
             let tyS = ctx.input_ty(iri, 0);
             let tyD = ctx.output_ty(iri, 0);
-            let regS = ctx.input(iri, 0);
-            let regD = ctx.output(iri, 0);
+            let regS = input_to_reg(ctx, iri, 0);
+            let regD = output_to_reg(ctx, iri, 0);
             ctx.emit(Inst::mov_r_r(true, regS, regD));
             match (tyS, tyD, isZX) {
                 (types::I8, types::I64, false) => {
@@ -182,7 +193,7 @@ fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, iri: IRInst) {
 
         Opcode::FallthroughReturn | Opcode::Return => {
             for i in 0..ctx.num_inputs(iri) {
-                let src_reg = ctx.input(iri, i);
+                let src_reg = input_to_reg(ctx, iri, i);
                 let retval_reg = ctx.retval(i);
                 ctx.emit(Inst::mov_r_r(true, src_reg, retval_reg));
             }
@@ -219,35 +230,6 @@ fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, iri: IRInst) {
             panic!("ALU+imm and ALU+carry ops should not appear here!");
         }
 
-        Opcode::X86Udivmodx
-        | Opcode::X86Sdivmodx
-        | Opcode::X86Umulx
-        | Opcode::X86Smulx
-        | Opcode::X86Cvtt2si
-        | Opcode::X86Fmin
-        | Opcode::X86Fmax
-        | Opcode::X86Push
-        | Opcode::X86Pop
-        | Opcode::X86Bsr
-        | Opcode::X86Bsf
-        | Opcode::X86Pshufd
-        | Opcode::X86Pshufb
-        | Opcode::X86Pextr
-        | Opcode::X86Pinsr
-        | Opcode::X86Insertps
-        | Opcode::X86Movsd
-        | Opcode::X86Movlhps
-        | Opcode::X86Psll
-        | Opcode::X86Psrl
-        | Opcode::X86Psra
-        | Opcode::X86Ptest
-        | Opcode::X86Pmaxs
-        | Opcode::X86Pmaxu
-        | Opcode::X86Pmins
-        | Opcode::X86Pminu => {
-            panic!("x86-specific opcode in supposedly arch-neutral IR!");
-        }
-
         _ => unimplemented!("unimplemented lowering for opcode {:?}", op),
     }
 }
@@ -258,17 +240,18 @@ fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, iri: IRInst) {
 impl LowerBackend for X64Backend {
     type MInst = Inst;
 
-    fn lower<C: LowerCtx<I = Inst>>(&self, ctx: &mut C, ir_inst: IRInst) {
+    fn lower<C: LowerCtx<I = Inst>>(&self, ctx: &mut C, ir_inst: IRInst) -> CodegenResult<()> {
         lower_insn_to_regs(ctx, ir_inst);
+        Ok(())
     }
 
     fn lower_branch_group<C: LowerCtx<I = Inst>>(
         &self,
         ctx: &mut C,
         branches: &[IRInst],
-        targets: &[BlockIndex],
-        fallthrough: Option<BlockIndex>,
-    ) {
+        targets: &[MachLabel],
+        fallthrough: Option<MachLabel>,
+    ) -> CodegenResult<()> {
         // A block should end with at most two branches. The first may be a
         // conditional branch; a conditional branch can be followed only by an
         // unconditional branch or fallthrough. Otherwise, if only one branch,
@@ -290,17 +273,17 @@ impl LowerBackend for X64Backend {
             );
 
             assert!(op1 == Opcode::Jump || op1 == Opcode::Fallthrough);
-            let taken = BranchTarget::Block(targets[0]);
+            let taken = BranchTarget::Label(targets[0]);
             let not_taken = match op1 {
-                Opcode::Jump => BranchTarget::Block(targets[1]),
-                Opcode::Fallthrough => BranchTarget::Block(fallthrough.unwrap()),
+                Opcode::Jump => BranchTarget::Label(targets[1]),
+                Opcode::Fallthrough => BranchTarget::Label(fallthrough.unwrap()),
                 _ => unreachable!(), // assert above.
             };
             match op0 {
                 Opcode::Brz | Opcode::Brnz => {
                     let tyS = ctx.input_ty(branches[0], 0);
                     if is_int_ty(tyS) {
-                        let rS = ctx.input(branches[0], 0);
+                        let rS = input_to_reg(ctx, branches[0], 0);
                         let cc = match op0 {
                             Opcode::Brz => CC::Z,
                             Opcode::Brnz => CC::NZ,
@@ -316,8 +299,8 @@ impl LowerBackend for X64Backend {
                 Opcode::BrIcmp => {
                     let tyS = ctx.input_ty(branches[0], 0);
                     if is_int_ty(tyS) {
-                        let rSL = ctx.input(branches[0], 0);
-                        let rSR = ctx.input(branches[0], 1);
+                        let rSL = input_to_reg(ctx, branches[0], 0);
+                        let rSR = input_to_reg(ctx, branches[0], 1);
                         let cc = intCC_to_x64_CC(inst_condcode(ctx.data(branches[0])));
                         let sizeB = int_ty_to_sizeB(tyS);
                         // FIXME verify rSR vs rSL ordering
@@ -339,10 +322,10 @@ impl LowerBackend for X64Backend {
             let op = ctx.data(branches[0]).opcode();
             match op {
                 Opcode::Jump => {
-                    ctx.emit(Inst::jmp_known(BranchTarget::Block(targets[0])));
+                    ctx.emit(Inst::jmp_known(BranchTarget::Label(targets[0])));
                 }
                 Opcode::Fallthrough => {
-                    ctx.emit(Inst::jmp_known(BranchTarget::Block(targets[0])));
+                    ctx.emit(Inst::jmp_known(BranchTarget::Label(targets[0])));
                 }
                 Opcode::Trap => {
                     unimplemented = true;
@@ -354,5 +337,7 @@ impl LowerBackend for X64Backend {
         if unimplemented {
             unimplemented!("lower_branch_group(x64): can't handle: {:?}", branches);
         }
+
+        Ok(())
     }
 }
diff --git a/cranelift/codegen/src/isa/x64/mod.rs b/cranelift/codegen/src/isa/x64/mod.rs
index e9b0998821..0e607dcfae 100644
--- a/cranelift/codegen/src/isa/x64/mod.rs
+++ b/cranelift/codegen/src/isa/x64/mod.rs
@@ -52,7 +52,8 @@ impl MachBackend for X64Backend {
     ) -> CodegenResult<MachCompileResult> {
         let flags = self.flags();
         let vcode = self.compile_vcode(func, flags.clone())?;
-        let sections = vcode.emit();
+        let buffer = vcode.emit();
+        let buffer = buffer.finish();
         let frame_size = vcode.frame_size();
 
         let disasm = if want_disasm {
@@ -62,7 +63,7 @@ impl MachBackend for X64Backend {
         };
 
         Ok(MachCompileResult {
-            sections,
+            buffer,
             frame_size,
             disasm,
         })
diff --git a/cranelift/codegen/src/isa/x86/mod.rs b/cranelift/codegen/src/isa/x86/mod.rs
index 0cd825b161..9386e60310 100644
--- a/cranelift/codegen/src/isa/x86/mod.rs
+++ b/cranelift/codegen/src/isa/x86/mod.rs
@@ -57,11 +57,11 @@ fn isa_constructor(
     let isa_flags = settings::Flags::new(&shared_flags, builder);
 
     if isa_flags.use_new_backend() {
-        //#[cfg(not(feature = "x64"))]
+        #[cfg(not(feature = "x64"))]
         panic!("new backend x86 support not included by cargo features!");
 
-    //#[cfg(feature = "x64")]
-    //super::x64::isa_builder(triple).finish(shared_flags)
+        #[cfg(feature = "x64")]
+        super::x64::isa_builder(triple).finish(shared_flags)
     } else {
         Box::new(Isa {
             triple,