Rename the 'cretonne' crate to 'cretonne-codegen'.

This fixes the next part of #287.
2018-04-17 08:48:02 -07:00
parent 7767186dd0
commit 24fa169e1f
254 changed files with 265 additions and 264 deletions
--- a/lib/codegen/src/isa/x86/abi.rs
+++ b/lib/codegen/src/isa/x86/abi.rs
@@ -0,0 +1,371 @@
+//! x86 ABI implementation.
+
+use super::registers::{FPR, GPR, RU};
+use abi::{legalize_args, ArgAction, ArgAssigner, ValueConversion};
+use cursor::{Cursor, CursorPosition, EncCursor};
+use ir;
+use ir::immediates::Imm64;
+use ir::stackslot::{StackOffset, StackSize};
+use ir::{AbiParam, ArgumentExtension, ArgumentLoc, ArgumentPurpose, CallConv, InstBuilder,
+         ValueLoc};
+use isa::{RegClass, RegUnit, TargetIsa};
+use regalloc::RegisterSet;
+use result;
+use settings as shared_settings;
+use stack_layout::layout_stack;
+use std::i32;
+
+/// Argument registers for x86-64
+static ARG_GPRS: [RU; 6] = [RU::rdi, RU::rsi, RU::rdx, RU::rcx, RU::r8, RU::r9];
+
+/// Return value registers.
+static RET_GPRS: [RU; 3] = [RU::rax, RU::rdx, RU::rcx];
+
+struct Args {
+    pointer_bytes: u32,
+    pointer_bits: u16,
+    pointer_type: ir::Type,
+    gpr: &'static [RU],
+    gpr_used: usize,
+    fpr_limit: usize,
+    fpr_used: usize,
+    offset: u32,
+    call_conv: CallConv,
+}
+
+impl Args {
+    fn new(bits: u16, gpr: &'static [RU], fpr_limit: usize, call_conv: CallConv) -> Args {
+        Args {
+            pointer_bytes: u32::from(bits) / 8,
+            pointer_bits: bits,
+            pointer_type: ir::Type::int(bits).unwrap(),
+            gpr,
+            gpr_used: 0,
+            fpr_limit,
+            fpr_used: 0,
+            offset: 0,
+            call_conv: call_conv,
+        }
+    }
+}
+
+impl ArgAssigner for Args {
+    fn assign(&mut self, arg: &AbiParam) -> ArgAction {
+        let ty = arg.value_type;
+
+        // Check for a legal type.
+        // We don't support SIMD yet, so break all vectors down.
+        if ty.is_vector() {
+            return ValueConversion::VectorSplit.into();
+        }
+
+        // Large integers and booleans are broken down to fit in a register.
+        if !ty.is_float() && ty.bits() > self.pointer_bits {
+            return ValueConversion::IntSplit.into();
+        }
+
+        // Small integers are extended to the size of a pointer register.
+        if ty.is_int() && ty.bits() < self.pointer_bits {
+            match arg.extension {
+                ArgumentExtension::None => {}
+                ArgumentExtension::Uext => return ValueConversion::Uext(self.pointer_type).into(),
+                ArgumentExtension::Sext => return ValueConversion::Sext(self.pointer_type).into(),
+            }
+        }
+
+        // Handle special-purpose arguments.
+        if ty.is_int() && self.call_conv == CallConv::SpiderWASM {
+            match arg.purpose {
+                // This is SpiderMonkey's `WasmTlsReg`.
+                ArgumentPurpose::VMContext => {
+                    return ArgumentLoc::Reg(if self.pointer_bits == 64 {
+                        RU::r14
+                    } else {
+                        RU::rsi
+                    } as RegUnit).into()
+                }
+                // This is SpiderMonkey's `WasmTableCallSigReg`.
+                ArgumentPurpose::SignatureId => return ArgumentLoc::Reg(RU::rbx as RegUnit).into(),
+                _ => {}
+            }
+        }
+
+        // Try to use a GPR.
+        if !ty.is_float() && self.gpr_used < self.gpr.len() {
+            let reg = self.gpr[self.gpr_used] as RegUnit;
+            self.gpr_used += 1;
+            return ArgumentLoc::Reg(reg).into();
+        }
+
+        // Try to use an FPR.
+        if ty.is_float() && self.fpr_used < self.fpr_limit {
+            let reg = FPR.unit(self.fpr_used);
+            self.fpr_used += 1;
+            return ArgumentLoc::Reg(reg).into();
+        }
+
+        // Assign a stack location.
+        let loc = ArgumentLoc::Stack(self.offset as i32);
+        self.offset += self.pointer_bytes;
+        debug_assert!(self.offset <= i32::MAX as u32);
+        loc.into()
+    }
+}
+
+/// Legalize `sig`.
+pub fn legalize_signature(sig: &mut ir::Signature, flags: &shared_settings::Flags, _current: bool) {
+    let bits;
+    let mut args;
+
+    if flags.is_64bit() {
+        bits = 64;
+        args = Args::new(bits, &ARG_GPRS, 8, sig.call_conv);
+    } else {
+        bits = 32;
+        args = Args::new(bits, &[], 0, sig.call_conv);
+    }
+
+    legalize_args(&mut sig.params, &mut args);
+
+    let mut rets = Args::new(bits, &RET_GPRS, 2, sig.call_conv);
+    legalize_args(&mut sig.returns, &mut rets);
+}
+
+/// Get register class for a type appearing in a legalized signature.
+pub fn regclass_for_abi_type(ty: ir::Type) -> RegClass {
+    if ty.is_int() || ty.is_bool() {
+        GPR
+    } else {
+        FPR
+    }
+}
+
+/// Get the set of allocatable registers for `func`.
+pub fn allocatable_registers(_func: &ir::Function, flags: &shared_settings::Flags) -> RegisterSet {
+    let mut regs = RegisterSet::new();
+    regs.take(GPR, RU::rsp as RegUnit);
+    regs.take(GPR, RU::rbp as RegUnit);
+
+    // 32-bit arch only has 8 registers.
+    if !flags.is_64bit() {
+        for i in 8..16 {
+            regs.take(GPR, GPR.unit(i));
+            regs.take(FPR, FPR.unit(i));
+        }
+    }
+
+    regs
+}
+
+/// Get the set of callee-saved registers.
+fn callee_saved_gprs(flags: &shared_settings::Flags) -> &'static [RU] {
+    if flags.is_64bit() {
+        &[RU::rbx, RU::r12, RU::r13, RU::r14, RU::r15]
+    } else {
+        &[RU::rbx, RU::rsi, RU::rdi]
+    }
+}
+
+fn callee_saved_gprs_used(flags: &shared_settings::Flags, func: &ir::Function) -> RegisterSet {
+    let mut all_callee_saved = RegisterSet::empty();
+    for reg in callee_saved_gprs(flags) {
+        all_callee_saved.free(GPR, *reg as RegUnit);
+    }
+
+    let mut used = RegisterSet::empty();
+    for value_loc in func.locations.values() {
+        // Note that `value_loc` here contains only a single unit of a potentially multi-unit
+        // register. We don't use registers that overlap each other in the x86 ISA, but in others
+        // we do. So this should not be blindly reused.
+        if let ValueLoc::Reg(ru) = *value_loc {
+            if !used.is_avail(GPR, ru) {
+                used.free(GPR, ru);
+            }
+        }
+    }
+
+    // regmove and regfill instructions may temporarily divert values into other registers,
+    // and these are not reflected in `func.locations`. Scan the function for such instructions
+    // and note which callee-saved registers they use.
+    //
+    // TODO: Consider re-evaluating how regmove/regfill/regspill work and whether it's possible
+    // to avoid this step.
+    for ebb in &func.layout {
+        for inst in func.layout.ebb_insts(ebb) {
+            match func.dfg[inst] {
+                ir::instructions::InstructionData::RegMove { dst, .. } |
+                ir::instructions::InstructionData::RegFill { dst, .. } => {
+                    if !used.is_avail(GPR, dst) {
+                        used.free(GPR, dst);
+                    }
+                }
+                _ => (),
+            }
+        }
+    }
+
+    used.intersect(&all_callee_saved);
+    return used;
+}
+
+pub fn prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> result::CtonResult {
+    match func.signature.call_conv {
+        ir::CallConv::SystemV => system_v_prologue_epilogue(func, isa),
+        ir::CallConv::SpiderWASM => spiderwasm_prologue_epilogue(func, isa),
+    }
+}
+
+pub fn spiderwasm_prologue_epilogue(
+    func: &mut ir::Function,
+    isa: &TargetIsa,
+) -> result::CtonResult {
+    // Spiderwasm on 32-bit x86 always aligns its stack pointer to 16 bytes.
+    let stack_align = 16;
+    let word_size = if isa.flags().is_64bit() { 8 } else { 4 };
+    let bytes = StackSize::from(isa.flags().spiderwasm_prologue_words()) * word_size;
+
+    let mut ss = ir::StackSlotData::new(ir::StackSlotKind::IncomingArg, bytes);
+    ss.offset = Some(-(bytes as StackOffset));
+    func.stack_slots.push(ss);
+
+    layout_stack(&mut func.stack_slots, stack_align)?;
+    Ok(())
+}
+
+/// Insert a System V-compatible prologue and epilogue.
+pub fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> result::CtonResult {
+    // The original 32-bit x86 ELF ABI had a 4-byte aligned stack pointer, but
+    // newer versions use a 16-byte aligned stack pointer.
+    let stack_align = 16;
+    let word_size = if isa.flags().is_64bit() { 8 } else { 4 };
+    let csr_type = if isa.flags().is_64bit() {
+        ir::types::I64
+    } else {
+        ir::types::I32
+    };
+
+    let csrs = callee_saved_gprs_used(isa.flags(), func);
+
+    // The reserved stack area is composed of:
+    //   return address + frame pointer + all callee-saved registers
+    //
+    // Pushing the return address is an implicit function of the `call`
+    // instruction. Each of the others we will then push explicitly. Then we
+    // will adjust the stack pointer to make room for the rest of the required
+    // space for this frame.
+    let csr_stack_size = ((csrs.iter(GPR).len() + 2) * word_size as usize) as i32;
+    func.create_stack_slot(ir::StackSlotData {
+        kind: ir::StackSlotKind::IncomingArg,
+        size: csr_stack_size as u32,
+        offset: Some(-csr_stack_size),
+    });
+
+    let total_stack_size = layout_stack(&mut func.stack_slots, stack_align)? as i32;
+    let local_stack_size = i64::from(total_stack_size - csr_stack_size);
+
+    // Add CSRs to function signature
+    let fp_arg = ir::AbiParam::special_reg(
+        csr_type,
+        ir::ArgumentPurpose::FramePointer,
+        RU::rbp as RegUnit,
+    );
+    func.signature.params.push(fp_arg);
+    func.signature.returns.push(fp_arg);
+
+    for csr in csrs.iter(GPR) {
+        let csr_arg = ir::AbiParam::special_reg(csr_type, ir::ArgumentPurpose::CalleeSaved, csr);
+        func.signature.params.push(csr_arg);
+        func.signature.returns.push(csr_arg);
+    }
+
+    // Set up the cursor and insert the prologue
+    let entry_ebb = func.layout.entry_block().expect("missing entry block");
+    let mut pos = EncCursor::new(func, isa).at_first_insertion_point(entry_ebb);
+    insert_system_v_prologue(&mut pos, local_stack_size, csr_type, &csrs);
+
+    // Reset the cursor and insert the epilogue
+    let mut pos = pos.at_position(CursorPosition::Nowhere);
+    insert_system_v_epilogues(&mut pos, local_stack_size, csr_type, &csrs);
+
+    Ok(())
+}
+
+/// Insert the prologue for a given function.
+fn insert_system_v_prologue(
+    pos: &mut EncCursor,
+    stack_size: i64,
+    csr_type: ir::types::Type,
+    csrs: &RegisterSet,
+) {
+    // Append param to entry EBB
+    let ebb = pos.current_ebb().expect("missing ebb under cursor");
+    let fp = pos.func.dfg.append_ebb_param(ebb, csr_type);
+    pos.func.locations[fp] = ir::ValueLoc::Reg(RU::rbp as RegUnit);
+
+    pos.ins().x86_push(fp);
+    pos.ins().copy_special(
+        RU::rsp as RegUnit,
+        RU::rbp as RegUnit,
+    );
+
+    for reg in csrs.iter(GPR) {
+        // Append param to entry EBB
+        let csr_arg = pos.func.dfg.append_ebb_param(ebb, csr_type);
+
+        // Assign it a location
+        pos.func.locations[csr_arg] = ir::ValueLoc::Reg(reg);
+
+        // Remember it so we can push it momentarily
+        pos.ins().x86_push(csr_arg);
+    }
+
+    if stack_size > 0 {
+        pos.ins().adjust_sp_imm(Imm64::new(-stack_size));
+    }
+}
+
+/// Find all `return` instructions and insert epilogues before them.
+fn insert_system_v_epilogues(
+    pos: &mut EncCursor,
+    stack_size: i64,
+    csr_type: ir::types::Type,
+    csrs: &RegisterSet,
+) {
+    while let Some(ebb) = pos.next_ebb() {
+        pos.goto_last_inst(ebb);
+        if let Some(inst) = pos.current_inst() {
+            if pos.func.dfg[inst].opcode().is_return() {
+                insert_system_v_epilogue(inst, stack_size, pos, csr_type, csrs);
+            }
+        }
+    }
+}
+
+/// Insert an epilogue given a specific `return` instruction.
+fn insert_system_v_epilogue(
+    inst: ir::Inst,
+    stack_size: i64,
+    pos: &mut EncCursor,
+    csr_type: ir::types::Type,
+    csrs: &RegisterSet,
+) {
+    if stack_size > 0 {
+        pos.ins().adjust_sp_imm(Imm64::new(stack_size));
+    }
+
+    // Pop all the callee-saved registers, stepping backward each time to
+    // preserve the correct order.
+    let fp_ret = pos.ins().x86_pop(csr_type);
+    pos.prev_inst();
+
+    pos.func.locations[fp_ret] = ir::ValueLoc::Reg(RU::rbp as RegUnit);
+    pos.func.dfg.append_inst_arg(inst, fp_ret);
+
+    for reg in csrs.iter(GPR) {
+        let csr_ret = pos.ins().x86_pop(csr_type);
+        pos.prev_inst();
+
+        pos.func.locations[csr_ret] = ir::ValueLoc::Reg(reg);
+        pos.func.dfg.append_inst_arg(inst, csr_ret);
+    }
+}
--- a/lib/codegen/src/isa/x86/binemit.rs
+++ b/lib/codegen/src/isa/x86/binemit.rs
@@ -0,0 +1,300 @@
+//! Emitting binary x86 machine code.
+
+use super::registers::RU;
+use binemit::{bad_encoding, CodeSink, Reloc};
+use ir::condcodes::{CondCode, FloatCC, IntCC};
+use ir::{Ebb, Function, Inst, InstructionData, Opcode, TrapCode};
+use isa::{RegUnit, StackBase, StackBaseMask, StackRef};
+use regalloc::RegDiversions;
+
+include!(concat!(env!("OUT_DIR"), "/binemit-x86.rs"));
+
+// Convert a stack base to the corresponding register.
+fn stk_base(base: StackBase) -> RegUnit {
+    let ru = match base {
+        StackBase::SP => RU::rsp,
+        StackBase::FP => RU::rbp,
+        StackBase::Zone => unimplemented!(),
+    };
+    ru as RegUnit
+}
+
+// Mandatory prefix bytes for Mp* opcodes.
+const PREFIX: [u8; 3] = [0x66, 0xf3, 0xf2];
+
+// Second byte for three-byte opcodes for mm=0b10 and mm=0b11.
+const OP3_BYTE2: [u8; 2] = [0x38, 0x3a];
+
+// A REX prefix with no bits set: 0b0100WRXB.
+const BASE_REX: u8 = 0b0100_0000;
+
+// Create a single-register REX prefix, setting the B bit to bit 3 of the register.
+// This is used for instructions that encode a register in the low 3 bits of the opcode and for
+// instructions that use the ModR/M `reg` field for something else.
+fn rex1(reg_b: RegUnit) -> u8 {
+    let b = ((reg_b >> 3) & 1) as u8;
+    BASE_REX | b
+}
+
+// Create a dual-register REX prefix, setting:
+//
+// REX.B = bit 3 of r/m register, or SIB base register when a SIB byte is present.
+// REX.R = bit 3 of reg register.
+fn rex2(rm: RegUnit, reg: RegUnit) -> u8 {
+    let b = ((rm >> 3) & 1) as u8;
+    let r = ((reg >> 3) & 1) as u8;
+    BASE_REX | b | (r << 2)
+}
+
+// Emit a REX prefix.
+//
+// The R, X, and B bits are computed from registers using the functions above. The W bit is
+// extracted from `bits`.
+fn rex_prefix<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
+    debug_assert_eq!(rex & 0xf8, BASE_REX);
+    let w = ((bits >> 15) & 1) as u8;
+    sink.put1(rex | (w << 3));
+}
+
+// Emit a single-byte opcode with no REX prefix.
+fn put_op1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
+    debug_assert_eq!(bits & 0x8f00, 0, "Invalid encoding bits for Op1*");
+    debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Op1 encoding");
+    sink.put1(bits as u8);
+}
+
+// Emit a single-byte opcode with REX prefix.
+fn put_rexop1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
+    debug_assert_eq!(bits & 0x0f00, 0, "Invalid encoding bits for Op1*");
+    rex_prefix(bits, rex, sink);
+    sink.put1(bits as u8);
+}
+
+// Emit two-byte opcode: 0F XX
+fn put_op2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
+    debug_assert_eq!(bits & 0x8f00, 0x0400, "Invalid encoding bits for Op2*");
+    debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Op2 encoding");
+    sink.put1(0x0f);
+    sink.put1(bits as u8);
+}
+
+// Emit two-byte opcode: 0F XX with REX prefix.
+fn put_rexop2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
+    debug_assert_eq!(bits & 0x0f00, 0x0400, "Invalid encoding bits for RexOp2*");
+    rex_prefix(bits, rex, sink);
+    sink.put1(0x0f);
+    sink.put1(bits as u8);
+}
+
+// Emit single-byte opcode with mandatory prefix.
+fn put_mp1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
+    debug_assert_eq!(bits & 0x8c00, 0, "Invalid encoding bits for Mp1*");
+    let pp = (bits >> 8) & 3;
+    sink.put1(PREFIX[(pp - 1) as usize]);
+    debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Mp1 encoding");
+    sink.put1(bits as u8);
+}
+
+// Emit single-byte opcode with mandatory prefix and REX.
+fn put_rexmp1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
+    debug_assert_eq!(bits & 0x0c00, 0, "Invalid encoding bits for Mp1*");
+    let pp = (bits >> 8) & 3;
+    sink.put1(PREFIX[(pp - 1) as usize]);
+    rex_prefix(bits, rex, sink);
+    sink.put1(bits as u8);
+}
+
+// Emit two-byte opcode (0F XX) with mandatory prefix.
+fn put_mp2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
+    debug_assert_eq!(bits & 0x8c00, 0x0400, "Invalid encoding bits for Mp2*");
+    let pp = (bits >> 8) & 3;
+    sink.put1(PREFIX[(pp - 1) as usize]);
+    debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Mp2 encoding");
+    sink.put1(0x0f);
+    sink.put1(bits as u8);
+}
+
+// Emit two-byte opcode (0F XX) with mandatory prefix and REX.
+fn put_rexmp2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
+    debug_assert_eq!(bits & 0x0c00, 0x0400, "Invalid encoding bits for Mp2*");
+    let pp = (bits >> 8) & 3;
+    sink.put1(PREFIX[(pp - 1) as usize]);
+    rex_prefix(bits, rex, sink);
+    sink.put1(0x0f);
+    sink.put1(bits as u8);
+}
+
+// Emit three-byte opcode (0F 3[8A] XX) with mandatory prefix.
+fn put_mp3<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
+    debug_assert_eq!(bits & 0x8800, 0x0800, "Invalid encoding bits for Mp3*");
+    let pp = (bits >> 8) & 3;
+    sink.put1(PREFIX[(pp - 1) as usize]);
+    debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Mp3 encoding");
+    let mm = (bits >> 10) & 3;
+    sink.put1(0x0f);
+    sink.put1(OP3_BYTE2[(mm - 2) as usize]);
+    sink.put1(bits as u8);
+}
+
+// Emit three-byte opcode (0F 3[8A] XX) with mandatory prefix and REX
+fn put_rexmp3<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
+    debug_assert_eq!(bits & 0x0800, 0x0800, "Invalid encoding bits for Mp3*");
+    let pp = (bits >> 8) & 3;
+    sink.put1(PREFIX[(pp - 1) as usize]);
+    rex_prefix(bits, rex, sink);
+    let mm = (bits >> 10) & 3;
+    sink.put1(0x0f);
+    sink.put1(OP3_BYTE2[(mm - 2) as usize]);
+    sink.put1(bits as u8);
+}
+
+/// Emit a ModR/M byte for reg-reg operands.
+fn modrm_rr<CS: CodeSink + ?Sized>(rm: RegUnit, reg: RegUnit, sink: &mut CS) {
+    let reg = reg as u8 & 7;
+    let rm = rm as u8 & 7;
+    let mut b = 0b11000000;
+    b |= reg << 3;
+    b |= rm;
+    sink.put1(b);
+}
+
+/// Emit a ModR/M byte where the reg bits are part of the opcode.
+fn modrm_r_bits<CS: CodeSink + ?Sized>(rm: RegUnit, bits: u16, sink: &mut CS) {
+    let reg = (bits >> 12) as u8 & 7;
+    let rm = rm as u8 & 7;
+    let mut b = 0b11000000;
+    b |= reg << 3;
+    b |= rm;
+    sink.put1(b);
+}
+
+/// Emit a mode 00 ModR/M byte. This is a register-indirect addressing mode with no offset.
+/// Registers %rsp and %rbp are invalid for `rm`, %rsp indicates a SIB byte, and %rbp indicates an
+/// absolute immediate 32-bit address.
+fn modrm_rm<CS: CodeSink + ?Sized>(rm: RegUnit, reg: RegUnit, sink: &mut CS) {
+    let reg = reg as u8 & 7;
+    let rm = rm as u8 & 7;
+    let mut b = 0b00000000;
+    b |= reg << 3;
+    b |= rm;
+    sink.put1(b);
+}
+
+/// Emit a mode 00 Mod/RM byte, with a rip-relative displacement in 64-bit mode. Effective address
+/// is calculated by adding displacement to 64-bit rip of next instruction. See intel Sw dev manual
+/// section 2.2.1.6.
+fn modrm_riprel<CS: CodeSink + ?Sized>(reg: RegUnit, sink: &mut CS) {
+    modrm_rm(0b101, reg, sink)
+}
+
+/// Emit a mode 01 ModR/M byte. This is a register-indirect addressing mode with 8-bit
+/// displacement.
+/// Register %rsp is invalid for `rm`. It indicates the presence of a SIB byte.
+fn modrm_disp8<CS: CodeSink + ?Sized>(rm: RegUnit, reg: RegUnit, sink: &mut CS) {
+    let reg = reg as u8 & 7;
+    let rm = rm as u8 & 7;
+    let mut b = 0b01000000;
+    b |= reg << 3;
+    b |= rm;
+    sink.put1(b);
+}
+
+/// Emit a mode 10 ModR/M byte. This is a register-indirect addressing mode with 32-bit
+/// displacement.
+/// Register %rsp is invalid for `rm`. It indicates the presence of a SIB byte.
+fn modrm_disp32<CS: CodeSink + ?Sized>(rm: RegUnit, reg: RegUnit, sink: &mut CS) {
+    let reg = reg as u8 & 7;
+    let rm = rm as u8 & 7;
+    let mut b = 0b10000000;
+    b |= reg << 3;
+    b |= rm;
+    sink.put1(b);
+}
+
+/// Emit a mode 10 ModR/M byte indicating that a SIB byte is present.
+fn modrm_sib_disp32<CS: CodeSink + ?Sized>(reg: RegUnit, sink: &mut CS) {
+    modrm_disp32(0b100, reg, sink);
+}
+
+/// Emit a SIB byte with a base register and no scale+index.
+fn sib_noindex<CS: CodeSink + ?Sized>(base: RegUnit, sink: &mut CS) {
+    let base = base as u8 & 7;
+    // SIB        SS_III_BBB.
+    let mut b = 0b00_100_000;
+    b |= base;
+    sink.put1(b);
+}
+
+/// Get the low 4 bits of an opcode for an integer condition code.
+///
+/// Add this offset to a base opcode for:
+///
+/// ---- 0x70: Short conditional branch.
+/// 0x0f 0x80: Long conditional branch.
+/// 0x0f 0x90: SetCC.
+///
+fn icc2opc(cond: IntCC) -> u16 {
+    use ir::condcodes::IntCC::*;
+    match cond {
+        // 0x0 = Overflow.
+        // 0x1 = !Overflow.
+        UnsignedLessThan => 0x2,
+        UnsignedGreaterThanOrEqual => 0x3,
+        Equal => 0x4,
+        NotEqual => 0x5,
+        UnsignedLessThanOrEqual => 0x6,
+        UnsignedGreaterThan => 0x7,
+        // 0x8 = Sign.
+        // 0x9 = !Sign.
+        // 0xa = Parity even.
+        // 0xb = Parity odd.
+        SignedLessThan => 0xc,
+        SignedGreaterThanOrEqual => 0xd,
+        SignedLessThanOrEqual => 0xe,
+        SignedGreaterThan => 0xf,
+    }
+}
+
+/// Get the low 4 bits of an opcode for a floating point condition code.
+///
+/// The ucomiss/ucomisd instructions set the FLAGS bits CF/PF/CF like this:
+///
+///    ZPC OSA
+/// UN 111 000
+/// GT 000 000
+/// LT 001 000
+/// EQ 100 000
+///
+/// Not all floating point condition codes are supported.
+fn fcc2opc(cond: FloatCC) -> u16 {
+    use ir::condcodes::FloatCC::*;
+    match cond {
+        Ordered                    => 0xb, // EQ|LT|GT => *np (P=0)
+        Unordered                  => 0xa, // UN       => *p  (P=1)
+        OrderedNotEqual            => 0x5, // LT|GT    => *ne (Z=0),
+        UnorderedOrEqual           => 0x4, // UN|EQ    => *e  (Z=1)
+        GreaterThan                => 0x7, // GT       => *a  (C=0&Z=0)
+        GreaterThanOrEqual         => 0x3, // GT|EQ    => *ae (C=0)
+        UnorderedOrLessThan        => 0x2, // UN|LT    => *b  (C=1)
+        UnorderedOrLessThanOrEqual => 0x6, // UN|LT|EQ => *be (Z=1|C=1)
+        Equal |                            // EQ
+        NotEqual |                         // UN|LT|GT
+        LessThan |                         // LT
+        LessThanOrEqual |                  // LT|EQ
+        UnorderedOrGreaterThan |           // UN|GT
+        UnorderedOrGreaterThanOrEqual      // UN|GT|EQ
+        => panic!("{} not supported", cond),
+    }
+}
+
+/// Emit a single-byte branch displacement to `destination`.
+fn disp1<CS: CodeSink + ?Sized>(destination: Ebb, func: &Function, sink: &mut CS) {
+    let delta = func.offsets[destination].wrapping_sub(sink.offset() + 1);
+    sink.put1(delta as u8);
+}
+
+/// Emit a single-byte branch displacement to `destination`.
+fn disp4<CS: CodeSink + ?Sized>(destination: Ebb, func: &Function, sink: &mut CS) {
+    let delta = func.offsets[destination].wrapping_sub(sink.offset() + 4);
+    sink.put4(delta);
+}
--- a/lib/codegen/src/isa/x86/enc_tables.rs
+++ b/lib/codegen/src/isa/x86/enc_tables.rs
@@ -0,0 +1,509 @@
+//! Encoding tables for x86 ISAs.
+
+use super::registers::*;
+use bitset::BitSet;
+use cursor::{Cursor, FuncCursor};
+use flowgraph::ControlFlowGraph;
+use ir::condcodes::IntCC;
+use ir::{self, InstBuilder};
+use isa;
+use isa::constraints::*;
+use isa::enc_tables::*;
+use isa::encoding::RecipeSizing;
+use predicates;
+
+include!(concat!(env!("OUT_DIR"), "/encoding-x86.rs"));
+include!(concat!(env!("OUT_DIR"), "/legalize-x86.rs"));
+
+/// Expand the `sdiv` and `srem` instructions using `x86_sdivmodx`.
+fn expand_sdivrem(
+    inst: ir::Inst,
+    func: &mut ir::Function,
+    cfg: &mut ControlFlowGraph,
+    isa: &isa::TargetIsa,
+) {
+    let (x, y, is_srem) = match func.dfg[inst] {
+        ir::InstructionData::Binary {
+            opcode: ir::Opcode::Sdiv,
+            args,
+        } => (args[0], args[1], false),
+        ir::InstructionData::Binary {
+            opcode: ir::Opcode::Srem,
+            args,
+        } => (args[0], args[1], true),
+        _ => panic!("Need sdiv/srem: {}", func.dfg.display_inst(inst, None)),
+    };
+    let avoid_div_traps = isa.flags().avoid_div_traps();
+    let old_ebb = func.layout.pp_ebb(inst);
+    let result = func.dfg.first_result(inst);
+    let ty = func.dfg.value_type(result);
+
+    let mut pos = FuncCursor::new(func).at_inst(inst);
+    pos.use_srcloc(inst);
+    pos.func.dfg.clear_results(inst);
+
+    // If we can tolerate native division traps, sdiv doesn't need branching.
+    if !avoid_div_traps && !is_srem {
+        let xhi = pos.ins().sshr_imm(x, i64::from(ty.lane_bits()) - 1);
+        pos.ins().with_result(result).x86_sdivmodx(x, xhi, y);
+        pos.remove_inst();
+        return;
+    }
+
+    // EBB handling the -1 divisor case.
+    let minus_one = pos.func.dfg.make_ebb();
+
+    // Final EBB with one argument representing the final result value.
+    let done = pos.func.dfg.make_ebb();
+
+    // Move the `inst` result value onto the `done` EBB.
+    pos.func.dfg.attach_ebb_param(done, result);
+
+    // Start by checking for a -1 divisor which needs to be handled specially.
+    let is_m1 = pos.ins().ifcmp_imm(y, -1);
+    pos.ins().brif(IntCC::Equal, is_m1, minus_one, &[]);
+
+    // Put in an explicit division-by-zero trap if the environment requires it.
+    if avoid_div_traps {
+        pos.ins().trapz(y, ir::TrapCode::IntegerDivisionByZero);
+    }
+
+    // Now it is safe to execute the `x86_sdivmodx` instruction which will still trap on division
+    // by zero.
+    let xhi = pos.ins().sshr_imm(x, i64::from(ty.lane_bits()) - 1);
+    let (quot, rem) = pos.ins().x86_sdivmodx(x, xhi, y);
+    let divres = if is_srem { rem } else { quot };
+    pos.ins().jump(done, &[divres]);
+
+    // Now deal with the -1 divisor case.
+    pos.insert_ebb(minus_one);
+    let m1_result = if is_srem {
+        // x % -1 = 0.
+        pos.ins().iconst(ty, 0)
+    } else {
+        // Explicitly check for overflow: Trap when x == INT_MIN.
+        debug_assert!(avoid_div_traps, "Native trapping divide handled above");
+        let f = pos.ins().ifcmp_imm(x, -1 << (ty.lane_bits() - 1));
+        pos.ins().trapif(
+            IntCC::Equal,
+            f,
+            ir::TrapCode::IntegerOverflow,
+        );
+        // x / -1 = -x.
+        pos.ins().irsub_imm(x, 0)
+    };
+
+    // Recycle the original instruction as a jump.
+    pos.func.dfg.replace(inst).jump(done, &[m1_result]);
+
+    // Finally insert a label for the completion.
+    pos.next_inst();
+    pos.insert_ebb(done);
+
+    cfg.recompute_ebb(pos.func, old_ebb);
+    cfg.recompute_ebb(pos.func, minus_one);
+    cfg.recompute_ebb(pos.func, done);
+}
+
+/// Expand the `udiv` and `urem` instructions using `x86_udivmodx`.
+fn expand_udivrem(
+    inst: ir::Inst,
+    func: &mut ir::Function,
+    _cfg: &mut ControlFlowGraph,
+    isa: &isa::TargetIsa,
+) {
+    let (x, y, is_urem) = match func.dfg[inst] {
+        ir::InstructionData::Binary {
+            opcode: ir::Opcode::Udiv,
+            args,
+        } => (args[0], args[1], false),
+        ir::InstructionData::Binary {
+            opcode: ir::Opcode::Urem,
+            args,
+        } => (args[0], args[1], true),
+        _ => panic!("Need udiv/urem: {}", func.dfg.display_inst(inst, None)),
+    };
+    let avoid_div_traps = isa.flags().avoid_div_traps();
+    let result = func.dfg.first_result(inst);
+    let ty = func.dfg.value_type(result);
+
+    let mut pos = FuncCursor::new(func).at_inst(inst);
+    pos.use_srcloc(inst);
+    pos.func.dfg.clear_results(inst);
+
+    // Put in an explicit division-by-zero trap if the environment requires it.
+    if avoid_div_traps {
+        pos.ins().trapz(y, ir::TrapCode::IntegerDivisionByZero);
+    }
+
+    // Now it is safe to execute the `x86_udivmodx` instruction.
+    let xhi = pos.ins().iconst(ty, 0);
+    let reuse = if is_urem {
+        [None, Some(result)]
+    } else {
+        [Some(result), None]
+    };
+    pos.ins().with_results(reuse).x86_udivmodx(x, xhi, y);
+    pos.remove_inst();
+}
+
+/// Expand the `fmin` and `fmax` instructions using the x86 `x86_fmin` and `x86_fmax`
+/// instructions.
+fn expand_minmax(
+    inst: ir::Inst,
+    func: &mut ir::Function,
+    cfg: &mut ControlFlowGraph,
+    _isa: &isa::TargetIsa,
+) {
+    use ir::condcodes::FloatCC;
+
+    let (x, y, x86_opc, bitwise_opc) = match func.dfg[inst] {
+        ir::InstructionData::Binary {
+            opcode: ir::Opcode::Fmin,
+            args,
+        } => (args[0], args[1], ir::Opcode::X86Fmin, ir::Opcode::Bor),
+        ir::InstructionData::Binary {
+            opcode: ir::Opcode::Fmax,
+            args,
+        } => (args[0], args[1], ir::Opcode::X86Fmax, ir::Opcode::Band),
+        _ => panic!("Expected fmin/fmax: {}", func.dfg.display_inst(inst, None)),
+    };
+    let old_ebb = func.layout.pp_ebb(inst);
+
+    // We need to handle the following conditions, depending on how x and y compare:
+    //
+    // 1. LT or GT: The native `x86_opc` min/max instruction does what we need.
+    // 2. EQ: We need to use `bitwise_opc` to make sure that
+    //    fmin(0.0, -0.0) -> -0.0 and fmax(0.0, -0.0) -> 0.0.
+    // 3. UN: We need to produce a quiet NaN that is canonical if the inputs are canonical.
+
+    // EBB handling case 3) where one operand is NaN.
+    let uno_ebb = func.dfg.make_ebb();
+
+    // EBB that handles the unordered or equal cases 2) and 3).
+    let ueq_ebb = func.dfg.make_ebb();
+
+    // Final EBB with one argument representing the final result value.
+    let done = func.dfg.make_ebb();
+
+    // The basic blocks are laid out to minimize branching for the common cases:
+    //
+    // 1) One branch not taken, one jump.
+    // 2) One branch taken.
+    // 3) Two branches taken, one jump.
+
+    // Move the `inst` result value onto the `done` EBB.
+    let result = func.dfg.first_result(inst);
+    let ty = func.dfg.value_type(result);
+    func.dfg.clear_results(inst);
+    func.dfg.attach_ebb_param(done, result);
+
+    // Test for case 1) ordered and not equal.
+    let mut pos = FuncCursor::new(func).at_inst(inst);
+    pos.use_srcloc(inst);
+    let cmp_ueq = pos.ins().fcmp(FloatCC::UnorderedOrEqual, x, y);
+    pos.ins().brnz(cmp_ueq, ueq_ebb, &[]);
+
+    // Handle the common ordered, not equal (LT|GT) case.
+    let one_inst = pos.ins().Binary(x86_opc, ty, x, y).0;
+    let one_result = pos.func.dfg.first_result(one_inst);
+    pos.ins().jump(done, &[one_result]);
+
+    // Case 3) Unordered.
+    // We know that at least one operand is a NaN that needs to be propagated. We simply use an
+    // `fadd` instruction which has the same NaN propagation semantics.
+    pos.insert_ebb(uno_ebb);
+    let uno_result = pos.ins().fadd(x, y);
+    pos.ins().jump(done, &[uno_result]);
+
+    // Case 2) or 3).
+    pos.insert_ebb(ueq_ebb);
+    // Test for case 3) (UN) one value is NaN.
+    // TODO: When we get support for flag values, we can reuse the above comparison.
+    let cmp_uno = pos.ins().fcmp(FloatCC::Unordered, x, y);
+    pos.ins().brnz(cmp_uno, uno_ebb, &[]);
+
+    // We are now in case 2) where x and y compare EQ.
+    // We need a bitwise operation to get the sign right.
+    let bw_inst = pos.ins().Binary(bitwise_opc, ty, x, y).0;
+    let bw_result = pos.func.dfg.first_result(bw_inst);
+    // This should become a fall-through for this second most common case.
+    // Recycle the original instruction as a jump.
+    pos.func.dfg.replace(inst).jump(done, &[bw_result]);
+
+    // Finally insert a label for the completion.
+    pos.next_inst();
+    pos.insert_ebb(done);
+
+    cfg.recompute_ebb(pos.func, old_ebb);
+    cfg.recompute_ebb(pos.func, ueq_ebb);
+    cfg.recompute_ebb(pos.func, uno_ebb);
+    cfg.recompute_ebb(pos.func, done);
+}
+
+/// x86 has no unsigned-to-float conversions. We handle the easy case of zero-extending i32 to
+/// i64 with a pattern, the rest needs more code.
+fn expand_fcvt_from_uint(
+    inst: ir::Inst,
+    func: &mut ir::Function,
+    cfg: &mut ControlFlowGraph,
+    _isa: &isa::TargetIsa,
+) {
+    use ir::condcodes::IntCC;
+
+    let x;
+    match func.dfg[inst] {
+        ir::InstructionData::Unary {
+            opcode: ir::Opcode::FcvtFromUint,
+            arg,
+        } => x = arg,
+        _ => panic!("Need fcvt_from_uint: {}", func.dfg.display_inst(inst, None)),
+    }
+    let xty = func.dfg.value_type(x);
+    let result = func.dfg.first_result(inst);
+    let ty = func.dfg.value_type(result);
+    let mut pos = FuncCursor::new(func).at_inst(inst);
+    pos.use_srcloc(inst);
+
+    // Conversion from unsigned 32-bit is easy on x86-64.
+    // TODO: This should be guarded by an ISA check.
+    if xty == ir::types::I32 {
+        let wide = pos.ins().uextend(ir::types::I64, x);
+        pos.func.dfg.replace(inst).fcvt_from_sint(ty, wide);
+        return;
+    }
+
+    let old_ebb = pos.func.layout.pp_ebb(inst);
+
+    // EBB handling the case where x < 0.
+    let neg_ebb = pos.func.dfg.make_ebb();
+
+    // Final EBB with one argument representing the final result value.
+    let done = pos.func.dfg.make_ebb();
+
+    // Move the `inst` result value onto the `done` EBB.
+    pos.func.dfg.clear_results(inst);
+    pos.func.dfg.attach_ebb_param(done, result);
+
+    // If x as a signed int is not negative, we can use the existing `fcvt_from_sint` instruction.
+    let is_neg = pos.ins().icmp_imm(IntCC::SignedLessThan, x, 0);
+    pos.ins().brnz(is_neg, neg_ebb, &[]);
+
+    // Easy case: just use a signed conversion.
+    let posres = pos.ins().fcvt_from_sint(ty, x);
+    pos.ins().jump(done, &[posres]);
+
+    // Now handle the negative case.
+    pos.insert_ebb(neg_ebb);
+
+    // Divide x by two to get it in range for the signed conversion, keep the LSB, and scale it
+    // back up on the FP side.
+    let ihalf = pos.ins().ushr_imm(x, 1);
+    let lsb = pos.ins().band_imm(x, 1);
+    let ifinal = pos.ins().bor(ihalf, lsb);
+    let fhalf = pos.ins().fcvt_from_sint(ty, ifinal);
+    let negres = pos.ins().fadd(fhalf, fhalf);
+
+    // Recycle the original instruction as a jump.
+    pos.func.dfg.replace(inst).jump(done, &[negres]);
+
+    // Finally insert a label for the completion.
+    pos.next_inst();
+    pos.insert_ebb(done);
+
+    cfg.recompute_ebb(pos.func, old_ebb);
+    cfg.recompute_ebb(pos.func, neg_ebb);
+    cfg.recompute_ebb(pos.func, done);
+}
+
+fn expand_fcvt_to_sint(
+    inst: ir::Inst,
+    func: &mut ir::Function,
+    cfg: &mut ControlFlowGraph,
+    _isa: &isa::TargetIsa,
+) {
+    use ir::condcodes::{FloatCC, IntCC};
+    use ir::immediates::{Ieee32, Ieee64};
+
+    let x;
+    match func.dfg[inst] {
+        ir::InstructionData::Unary {
+            opcode: ir::Opcode::FcvtToSint,
+            arg,
+        } => x = arg,
+        _ => panic!("Need fcvt_to_sint: {}", func.dfg.display_inst(inst, None)),
+    }
+    let old_ebb = func.layout.pp_ebb(inst);
+    let xty = func.dfg.value_type(x);
+    let result = func.dfg.first_result(inst);
+    let ty = func.dfg.value_type(result);
+
+    // Final EBB after the bad value checks.
+    let done = func.dfg.make_ebb();
+
+    // The `x86_cvtt2si` performs the desired conversion, but it doesn't trap on NaN or overflow.
+    // It produces an INT_MIN result instead.
+    func.dfg.replace(inst).x86_cvtt2si(ty, x);
+
+    let mut pos = FuncCursor::new(func).after_inst(inst);
+    pos.use_srcloc(inst);
+
+    let is_done = pos.ins().icmp_imm(
+        IntCC::NotEqual,
+        result,
+        1 << (ty.lane_bits() - 1),
+    );
+    pos.ins().brnz(is_done, done, &[]);
+
+    // We now have the following possibilities:
+    //
+    // 1. INT_MIN was actually the correct conversion result.
+    // 2. The input was NaN -> trap bad_toint
+    // 3. The input was out of range -> trap int_ovf
+    //
+
+    // Check for NaN.
+    let is_nan = pos.ins().fcmp(FloatCC::Unordered, x, x);
+    pos.ins().trapnz(
+        is_nan,
+        ir::TrapCode::BadConversionToInteger,
+    );
+
+    // Check for case 1: INT_MIN is the correct result.
+    // Determine the smallest floating point number that would convert to INT_MIN.
+    let mut overflow_cc = FloatCC::LessThan;
+    let output_bits = ty.lane_bits();
+    let flimit = match xty {
+        // An f32 can represent `i16::min_value() - 1` exactly with precision to spare, so
+        // there are values less than -2^(N-1) that convert correctly to INT_MIN.
+        ir::types::F32 => {
+            pos.ins().f32const(if output_bits < 32 {
+                overflow_cc = FloatCC::LessThanOrEqual;
+                Ieee32::fcvt_to_sint_negative_overflow(output_bits)
+            } else {
+                Ieee32::pow2(output_bits - 1).neg()
+            })
+        }
+        ir::types::F64 => {
+            // An f64 can represent `i32::min_value() - 1` exactly with precision to spare, so
+            // there are values less than -2^(N-1) that convert correctly to INT_MIN.
+            pos.ins().f64const(if output_bits < 64 {
+                overflow_cc = FloatCC::LessThanOrEqual;
+                Ieee64::fcvt_to_sint_negative_overflow(output_bits)
+            } else {
+                Ieee64::pow2(output_bits - 1).neg()
+            })
+        }
+        _ => panic!("Can't convert {}", xty),
+    };
+    let overflow = pos.ins().fcmp(overflow_cc, x, flimit);
+    pos.ins().trapnz(overflow, ir::TrapCode::IntegerOverflow);
+
+    // Finally, we could have a positive value that is too large.
+    let fzero = match xty {
+        ir::types::F32 => pos.ins().f32const(Ieee32::with_bits(0)),
+        ir::types::F64 => pos.ins().f64const(Ieee64::with_bits(0)),
+        _ => panic!("Can't convert {}", xty),
+    };
+    let overflow = pos.ins().fcmp(FloatCC::GreaterThanOrEqual, x, fzero);
+    pos.ins().trapnz(overflow, ir::TrapCode::IntegerOverflow);
+
+    pos.ins().jump(done, &[]);
+    pos.insert_ebb(done);
+
+    cfg.recompute_ebb(pos.func, old_ebb);
+    cfg.recompute_ebb(pos.func, done);
+}
+
+fn expand_fcvt_to_uint(
+    inst: ir::Inst,
+    func: &mut ir::Function,
+    cfg: &mut ControlFlowGraph,
+    _isa: &isa::TargetIsa,
+) {
+    use ir::condcodes::{FloatCC, IntCC};
+    use ir::immediates::{Ieee32, Ieee64};
+
+    let x;
+    match func.dfg[inst] {
+        ir::InstructionData::Unary {
+            opcode: ir::Opcode::FcvtToUint,
+            arg,
+        } => x = arg,
+        _ => panic!("Need fcvt_to_uint: {}", func.dfg.display_inst(inst, None)),
+    }
+    let old_ebb = func.layout.pp_ebb(inst);
+    let xty = func.dfg.value_type(x);
+    let result = func.dfg.first_result(inst);
+    let ty = func.dfg.value_type(result);
+
+    // EBB handling numbers >= 2^(N-1).
+    let large = func.dfg.make_ebb();
+
+    // Final EBB after the bad value checks.
+    let done = func.dfg.make_ebb();
+
+    // Move the `inst` result value onto the `done` EBB.
+    func.dfg.clear_results(inst);
+    func.dfg.attach_ebb_param(done, result);
+
+    let mut pos = FuncCursor::new(func).at_inst(inst);
+    pos.use_srcloc(inst);
+
+    // Start by materializing the floating point constant 2^(N-1) where N is the number of bits in
+    // the destination integer type.
+    let pow2nm1 = match xty {
+        ir::types::F32 => pos.ins().f32const(Ieee32::pow2(ty.lane_bits() - 1)),
+        ir::types::F64 => pos.ins().f64const(Ieee64::pow2(ty.lane_bits() - 1)),
+        _ => panic!("Can't convert {}", xty),
+    };
+    let is_large = pos.ins().ffcmp(x, pow2nm1);
+    pos.ins().brff(
+        FloatCC::GreaterThanOrEqual,
+        is_large,
+        large,
+        &[],
+    );
+
+    // We need to generate a specific trap code when `x` is NaN, so reuse the flags from the
+    // previous comparison.
+    pos.ins().trapff(
+        FloatCC::Unordered,
+        is_large,
+        ir::TrapCode::BadConversionToInteger,
+    );
+
+    // Now we know that x < 2^(N-1) and not NaN.
+    let sres = pos.ins().x86_cvtt2si(ty, x);
+    let is_neg = pos.ins().ifcmp_imm(sres, 0);
+    pos.ins().brif(
+        IntCC::SignedGreaterThanOrEqual,
+        is_neg,
+        done,
+        &[sres],
+    );
+    pos.ins().trap(ir::TrapCode::IntegerOverflow);
+
+    // Handle the case where x >= 2^(N-1) and not NaN.
+    pos.insert_ebb(large);
+    let adjx = pos.ins().fsub(x, pow2nm1);
+    let lres = pos.ins().x86_cvtt2si(ty, adjx);
+    let is_neg = pos.ins().ifcmp_imm(lres, 0);
+    pos.ins().trapif(
+        IntCC::SignedLessThan,
+        is_neg,
+        ir::TrapCode::IntegerOverflow,
+    );
+    let lfinal = pos.ins().iadd_imm(lres, 1 << (ty.lane_bits() - 1));
+
+    // Recycle the original instruction as a jump.
+    pos.func.dfg.replace(inst).jump(done, &[lfinal]);
+
+    // Finally insert a label for the completion.
+    pos.next_inst();
+    pos.insert_ebb(done);
+
+    cfg.recompute_ebb(pos.func, old_ebb);
+    cfg.recompute_ebb(pos.func, large);
+    cfg.recompute_ebb(pos.func, done);
+}
--- a/lib/codegen/src/isa/x86/mod.rs
+++ b/lib/codegen/src/isa/x86/mod.rs
@@ -0,0 +1,129 @@
+//! x86 Instruction Set Architectures.
+
+mod abi;
+mod binemit;
+mod enc_tables;
+mod registers;
+pub mod settings;
+
+use super::super::settings as shared_settings;
+use binemit::{emit_function, CodeSink, MemoryCodeSink};
+use ir;
+use isa::Builder as IsaBuilder;
+use isa::enc_tables::{self as shared_enc_tables, lookup_enclist, Encodings};
+use isa::{EncInfo, RegClass, RegInfo, TargetIsa};
+use regalloc;
+use result;
+use std::boxed::Box;
+use std::fmt;
+use timing;
+
+#[allow(dead_code)]
+struct Isa {
+    shared_flags: shared_settings::Flags,
+    isa_flags: settings::Flags,
+    cpumode: &'static [shared_enc_tables::Level1Entry<u16>],
+}
+
+/// Get an ISA builder for creating x86 targets.
+pub fn isa_builder() -> IsaBuilder {
+    IsaBuilder {
+        setup: settings::builder(),
+        constructor: isa_constructor,
+    }
+}
+
+fn isa_constructor(
+    shared_flags: shared_settings::Flags,
+    builder: &shared_settings::Builder,
+) -> Box<TargetIsa> {
+    let level1 = if shared_flags.is_64bit() {
+        &enc_tables::LEVEL1_I64[..]
+    } else {
+        &enc_tables::LEVEL1_I32[..]
+    };
+    Box::new(Isa {
+        isa_flags: settings::Flags::new(&shared_flags, builder),
+        shared_flags,
+        cpumode: level1,
+    })
+}
+
+impl TargetIsa for Isa {
+    fn name(&self) -> &'static str {
+        "x86"
+    }
+
+    fn flags(&self) -> &shared_settings::Flags {
+        &self.shared_flags
+    }
+
+    fn uses_cpu_flags(&self) -> bool {
+        true
+    }
+
+    fn register_info(&self) -> RegInfo {
+        registers::INFO.clone()
+    }
+
+    fn encoding_info(&self) -> EncInfo {
+        enc_tables::INFO.clone()
+    }
+
+    fn legal_encodings<'a>(
+        &'a self,
+        func: &'a ir::Function,
+        inst: &'a ir::InstructionData,
+        ctrl_typevar: ir::Type,
+    ) -> Encodings<'a> {
+        lookup_enclist(
+            ctrl_typevar,
+            inst,
+            func,
+            self.cpumode,
+            &enc_tables::LEVEL2[..],
+            &enc_tables::ENCLISTS[..],
+            &enc_tables::LEGALIZE_ACTIONS[..],
+            &enc_tables::RECIPE_PREDICATES[..],
+            &enc_tables::INST_PREDICATES[..],
+            self.isa_flags.predicate_view(),
+        )
+    }
+
+    fn legalize_signature(&self, sig: &mut ir::Signature, current: bool) {
+        abi::legalize_signature(sig, &self.shared_flags, current)
+    }
+
+    fn regclass_for_abi_type(&self, ty: ir::Type) -> RegClass {
+        abi::regclass_for_abi_type(ty)
+    }
+
+    fn allocatable_registers(&self, func: &ir::Function) -> regalloc::RegisterSet {
+        abi::allocatable_registers(func, &self.shared_flags)
+    }
+
+    fn emit_inst(
+        &self,
+        func: &ir::Function,
+        inst: ir::Inst,
+        divert: &mut regalloc::RegDiversions,
+        sink: &mut CodeSink,
+    ) {
+        binemit::emit_inst(func, inst, divert, sink)
+    }
+
+    fn emit_function(&self, func: &ir::Function, sink: &mut MemoryCodeSink) {
+        emit_function(func, binemit::emit_inst, sink)
+    }
+
+    fn prologue_epilogue(&self, func: &mut ir::Function) -> result::CtonResult {
+        let _tt = timing::prologue_epilogue();
+        abi::prologue_epilogue(func, self)
+    }
+}
+
+impl fmt::Display for Isa {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{}\n{}", self.shared_flags, self.isa_flags)
+    }
+}
--- a/lib/codegen/src/isa/x86/registers.rs
+++ b/lib/codegen/src/isa/x86/registers.rs
@@ -0,0 +1,63 @@
+//! x86 register descriptions.
+
+use isa::registers::{RegBank, RegClass, RegClassData, RegInfo, RegUnit};
+
+include!(concat!(env!("OUT_DIR"), "/registers-x86.rs"));
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use isa::RegUnit;
+    use std::string::{String, ToString};
+
+    #[test]
+    fn unit_encodings() {
+        // The encoding of integer registers is not alphabetical.
+        assert_eq!(INFO.parse_regunit("rax"), Some(0));
+        assert_eq!(INFO.parse_regunit("rbx"), Some(3));
+        assert_eq!(INFO.parse_regunit("rcx"), Some(1));
+        assert_eq!(INFO.parse_regunit("rdx"), Some(2));
+        assert_eq!(INFO.parse_regunit("rsi"), Some(6));
+        assert_eq!(INFO.parse_regunit("rdi"), Some(7));
+        assert_eq!(INFO.parse_regunit("rbp"), Some(5));
+        assert_eq!(INFO.parse_regunit("rsp"), Some(4));
+        assert_eq!(INFO.parse_regunit("r8"), Some(8));
+        assert_eq!(INFO.parse_regunit("r15"), Some(15));
+
+        assert_eq!(INFO.parse_regunit("xmm0"), Some(16));
+        assert_eq!(INFO.parse_regunit("xmm15"), Some(31));
+    }
+
+    #[test]
+    fn unit_names() {
+        fn uname(ru: RegUnit) -> String {
+            INFO.display_regunit(ru).to_string()
+        }
+
+        assert_eq!(uname(0), "%rax");
+        assert_eq!(uname(3), "%rbx");
+        assert_eq!(uname(1), "%rcx");
+        assert_eq!(uname(2), "%rdx");
+        assert_eq!(uname(6), "%rsi");
+        assert_eq!(uname(7), "%rdi");
+        assert_eq!(uname(5), "%rbp");
+        assert_eq!(uname(4), "%rsp");
+        assert_eq!(uname(8), "%r8");
+        assert_eq!(uname(15), "%r15");
+        assert_eq!(uname(16), "%xmm0");
+        assert_eq!(uname(31), "%xmm15");
+    }
+
+    #[test]
+    fn regclasses() {
+        assert_eq!(GPR.intersect_index(GPR), Some(GPR.into()));
+        assert_eq!(GPR.intersect_index(ABCD), Some(ABCD.into()));
+        assert_eq!(GPR.intersect_index(FPR), None);
+        assert_eq!(ABCD.intersect_index(GPR), Some(ABCD.into()));
+        assert_eq!(ABCD.intersect_index(ABCD), Some(ABCD.into()));
+        assert_eq!(ABCD.intersect_index(FPR), None);
+        assert_eq!(FPR.intersect_index(FPR), Some(FPR.into()));
+        assert_eq!(FPR.intersect_index(GPR), None);
+        assert_eq!(FPR.intersect_index(ABCD), None);
+    }
+}
--- a/lib/codegen/src/isa/x86/settings.rs
+++ b/lib/codegen/src/isa/x86/settings.rs
@@ -0,0 +1,52 @@
+//! x86 Settings.
+
+use settings::{self, detail, Builder};
+use std::fmt;
+
+// Include code generated by `lib/codegen/meta/gen_settings.py`. This file contains a public
+// `Flags` struct with an impl for all of the settings defined in
+// `lib/codegen/meta/isa/x86/settings.py`.
+include!(concat!(env!("OUT_DIR"), "/settings-x86.rs"));
+
+#[cfg(test)]
+mod tests {
+    use super::{builder, Flags};
+    use settings::{self, Configurable};
+
+    #[test]
+    fn presets() {
+        let shared = settings::Flags::new(&settings::builder());
+
+        // Nehalem has SSE4.1 but not BMI1.
+        let mut b1 = builder();
+        b1.enable("nehalem").unwrap();
+        let f1 = Flags::new(&shared, &b1);
+        assert_eq!(f1.has_sse41(), true);
+        assert_eq!(f1.has_bmi1(), false);
+
+        let mut b2 = builder();
+        b2.enable("haswell").unwrap();
+        let f2 = Flags::new(&shared, &b2);
+        assert_eq!(f2.has_sse41(), true);
+        assert_eq!(f2.has_bmi1(), true);
+    }
+    #[test]
+    fn display_presets() {
+        // Spot check that the flags Display impl does not cause a panic
+        let shared = settings::Flags::new(&settings::builder());
+
+        let b1 = builder();
+        let f1 = Flags::new(&shared, &b1);
+        let _ = format!("{}", f1);
+
+        let mut b2 = builder();
+        b2.enable("nehalem").unwrap();
+        let f2 = Flags::new(&shared, &b1);
+        let _ = format!("{}", f2);
+
+        let mut b3 = builder();
+        b3.enable("haswell").unwrap();
+        let f3 = Flags::new(&shared, &b1);
+        let _ = format!("{}", f3);
+    }
+}